X-Git-Url: https://gerrit.fd.io/r/gitweb?p=govpp.git;a=blobdiff_plain;f=core%2Fconnection.go;h=67c7e1d9c60f348e1bcea5c29e9db96fe3ce45d2;hp=c77358f76cb0c9f3543461550cdc8808c07dc7a3;hb=6fe52d72255456e7d73df9d2f6b4a8f724ed447d;hpb=a3bb834db727a3ac9a1ffcfeae9265e5dead851f diff --git a/core/connection.go b/core/connection.go index c77358f..67c7e1d 100644 --- a/core/connection.go +++ b/core/connection.go @@ -30,40 +30,22 @@ import ( ) const ( - requestChannelBufSize = 100 // default size of the request channel buffer - replyChannelBufSize = 100 // default size of the reply channel buffer - notificationChannelBufSize = 100 // default size of the notification channel buffer - - defaultReplyTimeout = time.Second * 1 // default timeout for replies from VPP, can be changed with SetReplyTimeout + DefaultReconnectInterval = time.Second // default interval between reconnect attempts + DefaultMaxReconnectAttempts = 3 // default maximum number of reconnect attempts ) var ( - healthCheckInterval = time.Second * 1 // default health check interval - healthCheckReplyTimeout = time.Millisecond * 100 // timeout for reply to a health check - healthCheckThreshold = 1 // number of failed health checks until the error is reported + RequestChanBufSize = 100 // default size of the request channel buffer + ReplyChanBufSize = 100 // default size of the reply channel buffer + NotificationChanBufSize = 100 // default size of the notification channel buffer ) -// SetHealthCheckProbeInterval sets health check probe interval. -// Beware: Function is not thread-safe. It is recommended to setup this parameter -// before connecting to vpp. -func SetHealthCheckProbeInterval(interval time.Duration) { - healthCheckInterval = interval -} - -// SetHealthCheckReplyTimeout sets timeout for reply to a health check probe. -// If reply arrives after the timeout, check is considered as failed. -// Beware: Function is not thread-safe. It is recommended to setup this parameter -// before connecting to vpp. -func SetHealthCheckReplyTimeout(timeout time.Duration) { - healthCheckReplyTimeout = timeout -} - -// SetHealthCheckThreshold sets the number of failed healthProbe checks until the error is reported. -// Beware: Function is not thread-safe. It is recommended to setup this parameter -// before connecting to vpp. -func SetHealthCheckThreshold(threshold int) { - healthCheckThreshold = threshold -} +var ( + HealthCheckProbeInterval = time.Second // default health check probe interval + HealthCheckReplyTimeout = time.Millisecond * 100 // timeout for reply to a health check probe + HealthCheckThreshold = 1 // number of failed health checks until the error is reported + DefaultReplyTimeout = time.Second // default timeout for replies from VPP +) // ConnectionState represents the current state of the connection to VPP. type ConnectionState int @@ -74,8 +56,24 @@ const ( // Disconnected represents state in which the connection has been dropped. Disconnected + + // Failed represents state in which the reconnecting failed after exceeding maximum number of attempts. + Failed ) +func (s ConnectionState) String() string { + switch s { + case Connected: + return "Connected" + case Disconnected: + return "Disconnected" + case Failed: + return "Failed" + default: + return fmt.Sprintf("UnknownState(%d)", s) + } +} + // ConnectionEvent is a notification about change in the VPP connection state. type ConnectionEvent struct { // Timestamp holds the time when the event has been created. @@ -88,15 +86,15 @@ type ConnectionEvent struct { Error error } -var ( - connLock sync.RWMutex // lock for the global connection - conn *Connection // global handle to the Connection (used in the message receive callback) -) - // Connection represents a shared memory connection to VPP via vppAdapter. type Connection struct { - vpp adapter.VppAdapter // VPP adapter - connected uint32 // non-zero if the adapter is connected to VPP + vppClient adapter.VppAPI // VPP binary API client + //statsClient adapter.StatsAPI // VPP stats API client + + maxAttempts int // interval for reconnect attempts + recInterval time.Duration // maximum number of reconnect attempts + + vppConnected uint32 // non-zero if the adapter is connected to VPP codec *codec.MsgCodec // message codec msgIDs map[string]uint16 // map of message IDs indexed by message name + CRC @@ -104,10 +102,10 @@ type Connection struct { maxChannelID uint32 // maximum used channel ID (the real limit is 2^15, 32-bit is used for atomic operations) channelsLock sync.RWMutex // lock for the channels map - channels map[uint16]*channel // map of all API channels indexed by the channel ID + channels map[uint16]*Channel // map of all API channels indexed by the channel ID - notifSubscriptionsLock sync.RWMutex // lock for the subscriptions map - notifSubscriptions map[uint16][]*api.NotifSubscription // map od all notification subscriptions indexed by message ID + subscriptionsLock sync.RWMutex // lock for the subscriptions map + subscriptions map[uint16][]*subscriptionCtx // map od all notification subscriptions indexed by message ID pingReqID uint16 // ID if the ControlPing message pingReplyID uint16 // ID of the ControlPingReply message @@ -116,18 +114,37 @@ type Connection struct { lastReply time.Time // time of the last received reply from VPP } -// Connect connects to VPP using specified VPP adapter and returns the connection handle. -// This call blocks until VPP is connected, or an error occurs. Only one connection attempt will be performed. -func Connect(vppAdapter adapter.VppAdapter) (*Connection, error) { - // create new connection handle - c, err := newConnection(vppAdapter) - if err != nil { - return nil, err +func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration) *Connection { + if attempts == 0 { + attempts = DefaultMaxReconnectAttempts + } + if interval == 0 { + interval = DefaultReconnectInterval } + c := &Connection{ + vppClient: binapi, + maxAttempts: attempts, + recInterval: interval, + codec: &codec.MsgCodec{}, + msgIDs: make(map[string]uint16), + msgMap: make(map[uint16]api.Message), + channels: make(map[uint16]*Channel), + subscriptions: make(map[uint16][]*subscriptionCtx), + } + binapi.SetMsgCallback(c.msgCallback) + return c +} + +// Connect connects to VPP API using specified adapter and returns a connection handle. +// This call blocks until it is either connected, or an error occurs. +// Only one connection attempt will be performed. +func Connect(binapi adapter.VppAPI) (*Connection, error) { + // create new connection handle + c := newConnection(binapi, DefaultMaxReconnectAttempts, DefaultReconnectInterval) + // blocking attempt to connect to VPP - err = c.connectVPP() - if err != nil { + if err := c.connectVPP(); err != nil { return nil, err } @@ -138,184 +155,120 @@ func Connect(vppAdapter adapter.VppAdapter) (*Connection, error) { // and ConnectionState channel. This call does not block until connection is established, it // returns immediately. The caller is supposed to watch the returned ConnectionState channel for // Connected/Disconnected events. In case of disconnect, the library will asynchronously try to reconnect. -func AsyncConnect(vppAdapter adapter.VppAdapter) (*Connection, chan ConnectionEvent, error) { +func AsyncConnect(binapi adapter.VppAPI, attempts int, interval time.Duration) (*Connection, chan ConnectionEvent, error) { // create new connection handle - c, err := newConnection(vppAdapter) - if err != nil { - return nil, nil, err - } + c := newConnection(binapi, attempts, interval) // asynchronously attempt to connect to VPP - connChan := make(chan ConnectionEvent, notificationChannelBufSize) + connChan := make(chan ConnectionEvent, NotificationChanBufSize) go c.connectLoop(connChan) return c, connChan, nil } -// Disconnect disconnects from VPP and releases all connection-related resources. -func (c *Connection) Disconnect() { - if c == nil { - return - } - - connLock.Lock() - defer connLock.Unlock() - - if c.vpp != nil { - c.disconnectVPP() - } - conn = nil -} - -// newConnection returns new connection handle. -func newConnection(vppAdapter adapter.VppAdapter) (*Connection, error) { - connLock.Lock() - defer connLock.Unlock() - - if conn != nil { - return nil, errors.New("only one connection per process is supported") - } - - conn = &Connection{ - vpp: vppAdapter, - codec: &codec.MsgCodec{}, - channels: make(map[uint16]*channel), - msgIDs: make(map[string]uint16), - msgMap: make(map[uint16]api.Message), - notifSubscriptions: make(map[uint16][]*api.NotifSubscription), - } - conn.vpp.SetMsgCallback(conn.msgCallback) - - return conn, nil -} - // connectVPP performs blocking attempt to connect to VPP. func (c *Connection) connectVPP() error { log.Debug("Connecting to VPP..") // blocking connect - if err := c.vpp.Connect(); err != nil { + if err := c.vppClient.Connect(); err != nil { return err } log.Debugf("Connected to VPP.") if err := c.retrieveMessageIDs(); err != nil { - c.vpp.Disconnect() + c.vppClient.Disconnect() return fmt.Errorf("VPP is incompatible: %v", err) } // store connected state - atomic.StoreUint32(&c.connected, 1) + atomic.StoreUint32(&c.vppConnected, 1) return nil } -func getMsgNameWithCrc(x api.Message) string { - return x.GetMessageName() + "_" + x.GetCrcString() -} - -// retrieveMessageIDs retrieves IDs for all registered messages and stores them in map -func (c *Connection) retrieveMessageIDs() (err error) { - t := time.Now() - - var addMsg = func(msgID uint16, msg api.Message) { - c.msgIDs[getMsgNameWithCrc(msg)] = msgID - c.msgMap[msgID] = msg - } - - msgs := api.GetAllMessages() - - for name, msg := range msgs { - msgID, err := c.vpp.GetMsgID(msg.GetMessageName(), msg.GetCrcString()) - if err != nil { - return err - } - - addMsg(msgID, msg) - - if msg.GetMessageName() == msgControlPing.GetMessageName() { - c.pingReqID = msgID - msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) - } else if msg.GetMessageName() == msgControlPingReply.GetMessageName() { - c.pingReplyID = msgID - msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) - } - - if debugMsgIDs { - log.Debugf("message %q (%s) has ID: %d", name, getMsgNameWithCrc(msg), msgID) - } +// Disconnect disconnects from VPP API and releases all connection-related resources. +func (c *Connection) Disconnect() { + if c == nil { + return } - log.Debugf("retrieving %d message IDs took %s", len(msgs), time.Since(t)) - - // fallback for control ping when vpe package is not imported - if c.pingReqID == 0 { - c.pingReqID, err = c.vpp.GetMsgID(msgControlPing.GetMessageName(), msgControlPing.GetCrcString()) - if err != nil { - return err - } - addMsg(c.pingReqID, msgControlPing) - } - if c.pingReplyID == 0 { - c.pingReplyID, err = c.vpp.GetMsgID(msgControlPingReply.GetMessageName(), msgControlPingReply.GetCrcString()) - if err != nil { - return err - } - addMsg(c.pingReplyID, msgControlPingReply) + if c.vppClient != nil { + c.disconnectVPP() } - - return nil } -// GetMessageID returns message identifier of given API message. -func (c *Connection) GetMessageID(msg api.Message) (uint16, error) { - if c == nil { - return 0, errors.New("nil connection passed in") +// disconnectVPP disconnects from VPP in case it is connected. +func (c *Connection) disconnectVPP() { + if atomic.CompareAndSwapUint32(&c.vppConnected, 1, 0) { + c.vppClient.Disconnect() } +} - if msgID, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok { - return msgID, nil - } +func (c *Connection) NewAPIChannel() (api.Channel, error) { + return c.newAPIChannel(RequestChanBufSize, ReplyChanBufSize) +} - return 0, fmt.Errorf("unknown message: %s (%s)", msg.GetMessageName(), msg.GetCrcString()) +func (c *Connection) NewAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (api.Channel, error) { + return c.newAPIChannel(reqChanBufSize, replyChanBufSize) } -// LookupByID looks up message name and crc by ID. -func (c *Connection) LookupByID(msgID uint16) (api.Message, error) { +// NewAPIChannelBuffered returns a new API channel for communication with VPP via govpp core. +// It allows to specify custom buffer sizes for the request and reply Go channels. +func (c *Connection) newAPIChannel(reqChanBufSize, replyChanBufSize int) (*Channel, error) { if c == nil { return nil, errors.New("nil connection passed in") } - if msg, ok := c.msgMap[msgID]; ok { - return msg, nil - } + // create new channel + chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff) + channel := newChannel(chID, c, c.codec, c, reqChanBufSize, replyChanBufSize) - return nil, fmt.Errorf("unknown message ID: %d", msgID) + // store API channel within the client + c.channelsLock.Lock() + c.channels[chID] = channel + c.channelsLock.Unlock() + + // start watching on the request channel + go c.watchRequests(channel) + + return channel, nil } -// disconnectVPP disconnects from VPP in case it is connected. -func (c *Connection) disconnectVPP() { - if atomic.CompareAndSwapUint32(&c.connected, 1, 0) { - c.vpp.Disconnect() - } +// releaseAPIChannel releases API channel that needs to be closed. +func (c *Connection) releaseAPIChannel(ch *Channel) { + log.WithFields(logger.Fields{ + "channel": ch.id, + }).Debug("API channel released") + + // delete the channel from channels map + c.channelsLock.Lock() + delete(c.channels, ch.id) + c.channelsLock.Unlock() } // connectLoop attempts to connect to VPP until it succeeds. // Then it continues with healthCheckLoop. func (c *Connection) connectLoop(connChan chan ConnectionEvent) { + reconnectAttempts := 0 + // loop until connected for { - if err := c.vpp.WaitReady(); err != nil { + if err := c.vppClient.WaitReady(); err != nil { log.Warnf("wait ready failed: %v", err) } if err := c.connectVPP(); err == nil { // signal connected event connChan <- ConnectionEvent{Timestamp: time.Now(), State: Connected} break + } else if reconnectAttempts < c.maxAttempts { + reconnectAttempts++ + log.Errorf("connecting failed (attempt %d/%d): %v", reconnectAttempts, c.maxAttempts, err) + time.Sleep(c.recInterval) } else { - log.Errorf("connecting to VPP failed: %v", err) - time.Sleep(time.Second) + connChan <- ConnectionEvent{Timestamp: time.Now(), State: Failed, Error: err} + return } } @@ -341,9 +294,9 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) { // send health check probes until an error or timeout occurs for { // sleep until next health check probe period - time.Sleep(healthCheckInterval) + time.Sleep(HealthCheckProbeInterval) - if atomic.LoadUint32(&c.connected) == 0 { + if atomic.LoadUint32(&c.vppConnected) == 0 { // Disconnect has been called in the meantime, return the healthcheck - reconnect loop log.Debug("Disconnected on request, exiting health check loop.") return @@ -365,7 +318,7 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) { case vppReply := <-ch.replyChan: err = vppReply.err - case <-time.After(healthCheckReplyTimeout): + case <-time.After(HealthCheckReplyTimeout): err = ErrProbeTimeout // check if time since last reply from any other @@ -374,7 +327,7 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) { sinceLastReply = time.Since(c.lastReply) c.lastReplyLock.Unlock() - if sinceLastReply < healthCheckReplyTimeout { + if sinceLastReply < HealthCheckReplyTimeout { log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply) continue } @@ -384,10 +337,10 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) { if err == ErrProbeTimeout { failedChecks++ - log.Warnf("VPP health check probe timed out after %v (%d. timeout)", healthCheckReplyTimeout, failedChecks) - if failedChecks > healthCheckThreshold { + log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks) + if failedChecks > HealthCheckThreshold { // in case of exceeded failed check treshold, assume VPP disconnected - log.Errorf("VPP health check exceeded treshold for timeouts (>%d), assuming disconnect", healthCheckThreshold) + log.Errorf("VPP health check exceeded treshold for timeouts (>%d), assuming disconnect", HealthCheckThreshold) connChan <- ConnectionEvent{Timestamp: time.Now(), State: Disconnected} break } @@ -411,52 +364,78 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) { c.connectLoop(connChan) } -func (c *Connection) NewAPIChannel() (api.Channel, error) { - return c.newAPIChannel(requestChannelBufSize, replyChannelBufSize) +func getMsgNameWithCrc(x api.Message) string { + return x.GetMessageName() + "_" + x.GetCrcString() } -func (c *Connection) NewAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (api.Channel, error) { - return c.newAPIChannel(reqChanBufSize, replyChanBufSize) +func getMsgFactory(msg api.Message) func() api.Message { + return func() api.Message { + return reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) + } } -// NewAPIChannelBuffered returns a new API channel for communication with VPP via govpp core. -// It allows to specify custom buffer sizes for the request and reply Go channels. -func (c *Connection) newAPIChannel(reqChanBufSize, replyChanBufSize int) (*channel, error) { +// GetMessageID returns message identifier of given API message. +func (c *Connection) GetMessageID(msg api.Message) (uint16, error) { if c == nil { - return nil, errors.New("nil connection passed in") + return 0, errors.New("nil connection passed in") } - chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff) - ch := &channel{ - id: chID, - replyTimeout: defaultReplyTimeout, - msgDecoder: c.codec, - msgIdentifier: c, - reqChan: make(chan *vppRequest, reqChanBufSize), - replyChan: make(chan *vppReply, replyChanBufSize), - notifSubsChan: make(chan *subscriptionRequest, reqChanBufSize), - notifSubsReplyChan: make(chan error, replyChanBufSize), + if msgID, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok { + return msgID, nil } - // store API channel within the client - c.channelsLock.Lock() - c.channels[chID] = ch - c.channelsLock.Unlock() + msgID, err := c.vppClient.GetMsgID(msg.GetMessageName(), msg.GetCrcString()) + if err != nil { + return 0, err + } - // start watching on the request channel - go c.watchRequests(ch) + c.msgIDs[getMsgNameWithCrc(msg)] = msgID + c.msgMap[msgID] = msg - return ch, nil + return msgID, nil } -// releaseAPIChannel releases API channel that needs to be closed. -func (c *Connection) releaseAPIChannel(ch *channel) { - log.WithFields(logger.Fields{ - "channel": ch.id, - }).Debug("API channel released") +// LookupByID looks up message name and crc by ID. +func (c *Connection) LookupByID(msgID uint16) (api.Message, error) { + if c == nil { + return nil, errors.New("nil connection passed in") + } - // delete the channel from channels map - c.channelsLock.Lock() - delete(c.channels, ch.id) - c.channelsLock.Unlock() + if msg, ok := c.msgMap[msgID]; ok { + return msg, nil + } + + return nil, fmt.Errorf("unknown message ID: %d", msgID) +} + +// retrieveMessageIDs retrieves IDs for all registered messages and stores them in map +func (c *Connection) retrieveMessageIDs() (err error) { + t := time.Now() + + msgs := api.GetRegisteredMessages() + + var n int + for name, msg := range msgs { + msgID, err := c.GetMessageID(msg) + if err != nil { + log.Debugf("retrieving msgID for %s failed: %v", name, err) + continue + } + n++ + + if c.pingReqID == 0 && msg.GetMessageName() == msgControlPing.GetMessageName() { + c.pingReqID = msgID + msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) + } else if c.pingReplyID == 0 && msg.GetMessageName() == msgControlPingReply.GetMessageName() { + c.pingReplyID = msgID + msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) + } + + if debugMsgIDs { + log.Debugf("message %q (%s) has ID: %d", name, getMsgNameWithCrc(msg), msgID) + } + } + log.Debugf("retrieved %d/%d msgIDs (took %s)", n, len(msgs), time.Since(t)) + + return nil }