X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;ds=sidebyside;f=core%2Fconnection.go;h=67c7e1d9c60f348e1bcea5c29e9db96fe3ce45d2;hb=6fe52d72255456e7d73df9d2f6b4a8f724ed447d;hp=a44d0c4d80597996d30f70ce34c34359a8159599;hpb=da815585c3f75c4ac073b0766dd668abf83844d8;p=govpp.git diff --git a/core/connection.go b/core/connection.go index a44d0c4..67c7e1d 100644 --- a/core/connection.go +++ b/core/connection.go @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:generate binapi-generator --input-dir=bin_api --output-dir=bin_api - package core import ( "errors" - "os" + "fmt" + "reflect" "sync" "sync/atomic" "time" @@ -28,236 +27,248 @@ import ( "git.fd.io/govpp.git/adapter" "git.fd.io/govpp.git/api" "git.fd.io/govpp.git/codec" - "git.fd.io/govpp.git/core/bin_api/vpe" ) -var ( - msgControlPing api.Message = &vpe.ControlPing{} - msgControlPingReply api.Message = &vpe.ControlPingReply{} +const ( + DefaultReconnectInterval = time.Second // default interval between reconnect attempts + DefaultMaxReconnectAttempts = 3 // default maximum number of reconnect attempts ) -const ( - requestChannelBufSize = 100 // default size of the request channel buffers - replyChannelBufSize = 100 // default size of the reply channel buffers - notificationChannelBufSize = 100 // default size of the notification channel buffers +var ( + RequestChanBufSize = 100 // default size of the request channel buffer + ReplyChanBufSize = 100 // default size of the reply channel buffer + NotificationChanBufSize = 100 // default size of the notification channel buffer ) var ( - healthCheckProbeInterval = time.Second * 1 // default health check probe interval - healthCheckReplyTimeout = time.Millisecond * 100 // timeout for reply to a health check probe - healthCheckThreshold = 1 // number of failed healthProbe until the error is reported + HealthCheckProbeInterval = time.Second // default health check probe interval + HealthCheckReplyTimeout = time.Millisecond * 100 // timeout for reply to a health check probe + HealthCheckThreshold = 1 // number of failed health checks until the error is reported + DefaultReplyTimeout = time.Second // default timeout for replies from VPP ) -// ConnectionState holds the current state of the connection to VPP. +// ConnectionState represents the current state of the connection to VPP. type ConnectionState int const ( - // Connected connection state means that the connection to VPP has been successfully established. + // Connected represents state in which the connection has been successfully established. Connected ConnectionState = iota - // Disconnected connection state means that the connection to VPP has been lost. + // Disconnected represents state in which the connection has been dropped. Disconnected + + // Failed represents state in which the reconnecting failed after exceeding maximum number of attempts. + Failed ) +func (s ConnectionState) String() string { + switch s { + case Connected: + return "Connected" + case Disconnected: + return "Disconnected" + case Failed: + return "Failed" + default: + return fmt.Sprintf("UnknownState(%d)", s) + } +} + // ConnectionEvent is a notification about change in the VPP connection state. type ConnectionEvent struct { - // Timestamp holds the time when the event has been generated. + // Timestamp holds the time when the event has been created. Timestamp time.Time - // State holds the new state of the connection to VPP at the time when the event has been generated. + // State holds the new state of the connection at the time when the event has been created. State ConnectionState + + // Error holds error if any encountered. + Error error } // Connection represents a shared memory connection to VPP via vppAdapter. type Connection struct { - vpp adapter.VppAdapter // VPP adapter - connected uint32 // non-zero if the adapter is connected to VPP - codec *codec.MsgCodec // message codec + vppClient adapter.VppAPI // VPP binary API client + //statsClient adapter.StatsAPI // VPP stats API client + + maxAttempts int // interval for reconnect attempts + recInterval time.Duration // maximum number of reconnect attempts + + vppConnected uint32 // non-zero if the adapter is connected to VPP - msgIDsLock sync.RWMutex // lock for the message IDs map - msgIDs map[string]uint16 // map of message IDs indexed by message name + CRC + codec *codec.MsgCodec // message codec + msgIDs map[string]uint16 // map of message IDs indexed by message name + CRC + msgMap map[uint16]api.Message // map of messages indexed by message ID + maxChannelID uint32 // maximum used channel ID (the real limit is 2^15, 32-bit is used for atomic operations) channelsLock sync.RWMutex // lock for the channels map - channels map[uint16]*channel // map of all API channels indexed by the channel ID + channels map[uint16]*Channel // map of all API channels indexed by the channel ID - notifSubscriptionsLock sync.RWMutex // lock for the subscriptions map - notifSubscriptions map[uint16][]*api.NotifSubscription // map od all notification subscriptions indexed by message ID + subscriptionsLock sync.RWMutex // lock for the subscriptions map + subscriptions map[uint16][]*subscriptionCtx // map od all notification subscriptions indexed by message ID - maxChannelID uint32 // maximum used channel ID (the real limit is 2^15, 32-bit is used for atomic operations) - pingReqID uint16 // ID if the ControlPing message - pingReplyID uint16 // ID of the ControlPingReply message + pingReqID uint16 // ID if the ControlPing message + pingReplyID uint16 // ID of the ControlPingReply message lastReplyLock sync.Mutex // lock for the last reply lastReply time.Time // time of the last received reply from VPP } -var ( - log *logger.Logger // global logger - conn *Connection // global handle to the Connection (used in the message receive callback) - connLock sync.RWMutex // lock for the global connection -) - -// init initializes global logger, which logs debug level messages to stdout. -func init() { - log = logger.New() - log.Out = os.Stdout - log.Level = logger.DebugLevel -} - -// SetLogger sets global logger to provided one. -func SetLogger(l *logger.Logger) { - log = l -} - -// SetHealthCheckProbeInterval sets health check probe interval. -// Beware: Function is not thread-safe. It is recommended to setup this parameter -// before connecting to vpp. -func SetHealthCheckProbeInterval(interval time.Duration) { - healthCheckProbeInterval = interval -} - -// SetHealthCheckReplyTimeout sets timeout for reply to a health check probe. -// If reply arrives after the timeout, check is considered as failed. -// Beware: Function is not thread-safe. It is recommended to setup this parameter -// before connecting to vpp. -func SetHealthCheckReplyTimeout(timeout time.Duration) { - healthCheckReplyTimeout = timeout -} - -// SetHealthCheckThreshold sets the number of failed healthProbe checks until the error is reported. -// Beware: Function is not thread-safe. It is recommended to setup this parameter -// before connecting to vpp. -func SetHealthCheckThreshold(threshold int) { - healthCheckThreshold = threshold -} +func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration) *Connection { + if attempts == 0 { + attempts = DefaultMaxReconnectAttempts + } + if interval == 0 { + interval = DefaultReconnectInterval + } -// SetControlPingMessages sets the messages for ControlPing and ControlPingReply -func SetControlPingMessages(controPing, controlPingReply api.Message) { - msgControlPing = controPing - msgControlPingReply = controlPingReply + c := &Connection{ + vppClient: binapi, + maxAttempts: attempts, + recInterval: interval, + codec: &codec.MsgCodec{}, + msgIDs: make(map[string]uint16), + msgMap: make(map[uint16]api.Message), + channels: make(map[uint16]*Channel), + subscriptions: make(map[uint16][]*subscriptionCtx), + } + binapi.SetMsgCallback(c.msgCallback) + return c } -// Connect connects to VPP using specified VPP adapter and returns the connection handle. -// This call blocks until VPP is connected, or an error occurs. Only one connection attempt will be performed. -func Connect(vppAdapter adapter.VppAdapter) (*Connection, error) { +// Connect connects to VPP API using specified adapter and returns a connection handle. +// This call blocks until it is either connected, or an error occurs. +// Only one connection attempt will be performed. +func Connect(binapi adapter.VppAPI) (*Connection, error) { // create new connection handle - c, err := newConnection(vppAdapter) - if err != nil { - return nil, err - } + c := newConnection(binapi, DefaultMaxReconnectAttempts, DefaultReconnectInterval) // blocking attempt to connect to VPP - err = c.connectVPP() - if err != nil { + if err := c.connectVPP(); err != nil { return nil, err } - return conn, nil + return c, nil } // AsyncConnect asynchronously connects to VPP using specified VPP adapter and returns the connection handle // and ConnectionState channel. This call does not block until connection is established, it // returns immediately. The caller is supposed to watch the returned ConnectionState channel for // Connected/Disconnected events. In case of disconnect, the library will asynchronously try to reconnect. -func AsyncConnect(vppAdapter adapter.VppAdapter) (*Connection, chan ConnectionEvent, error) { +func AsyncConnect(binapi adapter.VppAPI, attempts int, interval time.Duration) (*Connection, chan ConnectionEvent, error) { // create new connection handle - c, err := newConnection(vppAdapter) - if err != nil { - return nil, nil, err - } + c := newConnection(binapi, attempts, interval) // asynchronously attempt to connect to VPP - connChan := make(chan ConnectionEvent, notificationChannelBufSize) + connChan := make(chan ConnectionEvent, NotificationChanBufSize) go c.connectLoop(connChan) - return conn, connChan, nil + return c, connChan, nil +} + +// connectVPP performs blocking attempt to connect to VPP. +func (c *Connection) connectVPP() error { + log.Debug("Connecting to VPP..") + + // blocking connect + if err := c.vppClient.Connect(); err != nil { + return err + } + + log.Debugf("Connected to VPP.") + + if err := c.retrieveMessageIDs(); err != nil { + c.vppClient.Disconnect() + return fmt.Errorf("VPP is incompatible: %v", err) + } + + // store connected state + atomic.StoreUint32(&c.vppConnected, 1) + + return nil } -// Disconnect disconnects from VPP and releases all connection-related resources. +// Disconnect disconnects from VPP API and releases all connection-related resources. func (c *Connection) Disconnect() { if c == nil { return } - connLock.Lock() - defer connLock.Unlock() - if c != nil && c.vpp != nil { + if c.vppClient != nil { c.disconnectVPP() } - conn = nil } -// newConnection returns new connection handle. -func newConnection(vppAdapter adapter.VppAdapter) (*Connection, error) { - connLock.Lock() - defer connLock.Unlock() - - if conn != nil { - return nil, errors.New("only one connection per process is supported") - } - - conn = &Connection{ - vpp: vppAdapter, - codec: &codec.MsgCodec{}, - channels: make(map[uint16]*channel), - msgIDs: make(map[string]uint16), - notifSubscriptions: make(map[uint16][]*api.NotifSubscription), +// disconnectVPP disconnects from VPP in case it is connected. +func (c *Connection) disconnectVPP() { + if atomic.CompareAndSwapUint32(&c.vppConnected, 1, 0) { + c.vppClient.Disconnect() } +} - conn.vpp.SetMsgCallback(msgCallback) - return conn, nil +func (c *Connection) NewAPIChannel() (api.Channel, error) { + return c.newAPIChannel(RequestChanBufSize, ReplyChanBufSize) } -// connectVPP performs one blocking attempt to connect to VPP. -func (c *Connection) connectVPP() error { - log.Debug("Connecting to VPP...") +func (c *Connection) NewAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (api.Channel, error) { + return c.newAPIChannel(reqChanBufSize, replyChanBufSize) +} - // blocking connect - err := c.vpp.Connect() - if err != nil { - log.Warn(err) - return err +// NewAPIChannelBuffered returns a new API channel for communication with VPP via govpp core. +// It allows to specify custom buffer sizes for the request and reply Go channels. +func (c *Connection) newAPIChannel(reqChanBufSize, replyChanBufSize int) (*Channel, error) { + if c == nil { + return nil, errors.New("nil connection passed in") } - // store control ping IDs - if c.pingReqID, err = c.GetMessageID(msgControlPing); err != nil { - c.vpp.Disconnect() - return err - } - if c.pingReplyID, err = c.GetMessageID(msgControlPingReply); err != nil { - c.vpp.Disconnect() - return err - } + // create new channel + chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff) + channel := newChannel(chID, c, c.codec, c, reqChanBufSize, replyChanBufSize) - // store connected state - atomic.StoreUint32(&c.connected, 1) + // store API channel within the client + c.channelsLock.Lock() + c.channels[chID] = channel + c.channelsLock.Unlock() - log.Info("Connected to VPP.") - return nil + // start watching on the request channel + go c.watchRequests(channel) + + return channel, nil } -// disconnectVPP disconnects from VPP in case it is connected. -func (c *Connection) disconnectVPP() { - if atomic.CompareAndSwapUint32(&c.connected, 1, 0) { - c.vpp.Disconnect() - } +// releaseAPIChannel releases API channel that needs to be closed. +func (c *Connection) releaseAPIChannel(ch *Channel) { + log.WithFields(logger.Fields{ + "channel": ch.id, + }).Debug("API channel released") + + // delete the channel from channels map + c.channelsLock.Lock() + delete(c.channels, ch.id) + c.channelsLock.Unlock() } // connectLoop attempts to connect to VPP until it succeeds. // Then it continues with healthCheckLoop. func (c *Connection) connectLoop(connChan chan ConnectionEvent) { + reconnectAttempts := 0 + // loop until connected for { - if err := c.vpp.WaitReady(); err != nil { + if err := c.vppClient.WaitReady(); err != nil { log.Warnf("wait ready failed: %v", err) } if err := c.connectVPP(); err == nil { // signal connected event connChan <- ConnectionEvent{Timestamp: time.Now(), State: Connected} break + } else if reconnectAttempts < c.maxAttempts { + reconnectAttempts++ + log.Errorf("connecting failed (attempt %d/%d): %v", reconnectAttempts, c.maxAttempts, err) + time.Sleep(c.recInterval) } else { - log.Errorf("connecting to VPP failed: %v", err) - time.Sleep(time.Second) + connChan <- ConnectionEvent{Timestamp: time.Now(), State: Failed, Error: err} + return } } @@ -269,21 +280,23 @@ func (c *Connection) connectLoop(connChan chan ConnectionEvent) { // it continues with connectLoop and tries to reconnect. func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) { // create a separate API channel for health check probes - ch, err := conn.newAPIChannelBuffered(1, 1) + ch, err := c.newAPIChannel(1, 1) if err != nil { log.Error("Failed to create health check API channel, health check will be disabled:", err) return } - var sinceLastReply time.Duration - var failedChecks int + var ( + sinceLastReply time.Duration + failedChecks int + ) // send health check probes until an error or timeout occurs for { // sleep until next health check probe period - time.Sleep(healthCheckProbeInterval) + time.Sleep(HealthCheckProbeInterval) - if atomic.LoadUint32(&c.connected) == 0 { + if atomic.LoadUint32(&c.vppConnected) == 0 { // Disconnect has been called in the meantime, return the healthcheck - reconnect loop log.Debug("Disconnected on request, exiting health check loop.") return @@ -297,24 +310,24 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) { } // send the control ping request - ch.reqChan <- &api.VppRequest{Message: msgControlPing} + ch.reqChan <- &vppRequest{msg: msgControlPing} for { // expect response within timeout period select { case vppReply := <-ch.replyChan: - err = vppReply.Error + err = vppReply.err - case <-time.After(healthCheckReplyTimeout): + case <-time.After(HealthCheckReplyTimeout): err = ErrProbeTimeout // check if time since last reply from any other // channel is less than health check reply timeout - conn.lastReplyLock.Lock() + c.lastReplyLock.Lock() sinceLastReply = time.Since(c.lastReply) - conn.lastReplyLock.Unlock() + c.lastReplyLock.Unlock() - if sinceLastReply < healthCheckReplyTimeout { + if sinceLastReply < HealthCheckReplyTimeout { log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply) continue } @@ -324,19 +337,20 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) { if err == ErrProbeTimeout { failedChecks++ - log.Warnf("VPP health check probe timed out after %v (%d. timeout)", healthCheckReplyTimeout, failedChecks) - if failedChecks > healthCheckThreshold { - // in case of exceeded treshold disconnect - log.Errorf("VPP health check exceeded treshold for timeouts (>%d), assuming disconnect", healthCheckThreshold) + log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks) + if failedChecks > HealthCheckThreshold { + // in case of exceeded failed check treshold, assume VPP disconnected + log.Errorf("VPP health check exceeded treshold for timeouts (>%d), assuming disconnect", HealthCheckThreshold) connChan <- ConnectionEvent{Timestamp: time.Now(), State: Disconnected} break } } else if err != nil { - // in case of error disconnect + // in case of error, assume VPP disconnected log.Errorf("VPP health check probe failed: %v", err) - connChan <- ConnectionEvent{Timestamp: time.Now(), State: Disconnected} + connChan <- ConnectionEvent{Timestamp: time.Now(), State: Disconnected, Error: err} break } else if failedChecks > 0 { + // in case of success after failed checks, clear failed check counter failedChecks = 0 log.Infof("VPP health check probe OK") } @@ -350,54 +364,78 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) { c.connectLoop(connChan) } -func (c *Connection) NewAPIChannel() (api.Channel, error) { - return c.newAPIChannelBuffered(requestChannelBufSize, replyChannelBufSize) +func getMsgNameWithCrc(x api.Message) string { + return x.GetMessageName() + "_" + x.GetCrcString() } -func (c *Connection) NewAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (api.Channel, error) { - return c.newAPIChannelBuffered(reqChanBufSize, replyChanBufSize) +func getMsgFactory(msg api.Message) func() api.Message { + return func() api.Message { + return reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) + } } -// NewAPIChannelBuffered returns a new API channel for communication with VPP via govpp core. -// It allows to specify custom buffer sizes for the request and reply Go channels. -func (c *Connection) newAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (*channel, error) { +// GetMessageID returns message identifier of given API message. +func (c *Connection) GetMessageID(msg api.Message) (uint16, error) { if c == nil { - return nil, errors.New("nil connection passed in") + return 0, errors.New("nil connection passed in") } - chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff) - ch := &channel{ - id: chID, - replyTimeout: defaultReplyTimeout, + if msgID, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok { + return msgID, nil } - ch.msgDecoder = c.codec - ch.msgIdentifier = c - // create the communication channels - ch.reqChan = make(chan *api.VppRequest, reqChanBufSize) - ch.replyChan = make(chan *api.VppReply, replyChanBufSize) - ch.notifSubsChan = make(chan *api.NotifSubscribeRequest, reqChanBufSize) - ch.notifSubsReplyChan = make(chan error, replyChanBufSize) + msgID, err := c.vppClient.GetMsgID(msg.GetMessageName(), msg.GetCrcString()) + if err != nil { + return 0, err + } - // store API channel within the client - c.channelsLock.Lock() - c.channels[chID] = ch - c.channelsLock.Unlock() + c.msgIDs[getMsgNameWithCrc(msg)] = msgID + c.msgMap[msgID] = msg - // start watching on the request channel - go c.watchRequests(ch) + return msgID, nil +} + +// LookupByID looks up message name and crc by ID. +func (c *Connection) LookupByID(msgID uint16) (api.Message, error) { + if c == nil { + return nil, errors.New("nil connection passed in") + } + + if msg, ok := c.msgMap[msgID]; ok { + return msg, nil + } - return ch, nil + return nil, fmt.Errorf("unknown message ID: %d", msgID) } -// releaseAPIChannel releases API channel that needs to be closed. -func (c *Connection) releaseAPIChannel(ch *channel) { - log.WithFields(logger.Fields{ - "ID": ch.id, - }).Debug("API channel closed.") +// retrieveMessageIDs retrieves IDs for all registered messages and stores them in map +func (c *Connection) retrieveMessageIDs() (err error) { + t := time.Now() - // delete the channel from channels map - c.channelsLock.Lock() - delete(c.channels, ch.id) - c.channelsLock.Unlock() + msgs := api.GetRegisteredMessages() + + var n int + for name, msg := range msgs { + msgID, err := c.GetMessageID(msg) + if err != nil { + log.Debugf("retrieving msgID for %s failed: %v", name, err) + continue + } + n++ + + if c.pingReqID == 0 && msg.GetMessageName() == msgControlPing.GetMessageName() { + c.pingReqID = msgID + msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) + } else if c.pingReplyID == 0 && msg.GetMessageName() == msgControlPingReply.GetMessageName() { + c.pingReplyID = msgID + msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) + } + + if debugMsgIDs { + log.Debugf("message %q (%s) has ID: %d", name, getMsgNameWithCrc(msg), msgID) + } + } + log.Debugf("retrieved %d/%d msgIDs (took %s)", n, len(msgs), time.Since(t)) + + return nil }