socketclient: wait for socket to be created
[govpp.git] / core / connection.go
index a44d0c4..67c7e1d 100644 (file)
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//go:generate binapi-generator --input-dir=bin_api --output-dir=bin_api
-
 package core
 
 import (
        "errors"
-       "os"
+       "fmt"
+       "reflect"
        "sync"
        "sync/atomic"
        "time"
@@ -28,236 +27,248 @@ import (
        "git.fd.io/govpp.git/adapter"
        "git.fd.io/govpp.git/api"
        "git.fd.io/govpp.git/codec"
-       "git.fd.io/govpp.git/core/bin_api/vpe"
 )
 
-var (
-       msgControlPing      api.Message = &vpe.ControlPing{}
-       msgControlPingReply api.Message = &vpe.ControlPingReply{}
+const (
+       DefaultReconnectInterval    = time.Second // default interval between reconnect attempts
+       DefaultMaxReconnectAttempts = 3           // default maximum number of reconnect attempts
 )
 
-const (
-       requestChannelBufSize      = 100 // default size of the request channel buffers
-       replyChannelBufSize        = 100 // default size of the reply channel buffers
-       notificationChannelBufSize = 100 // default size of the notification channel buffers
+var (
+       RequestChanBufSize      = 100 // default size of the request channel buffer
+       ReplyChanBufSize        = 100 // default size of the reply channel buffer
+       NotificationChanBufSize = 100 // default size of the notification channel buffer
 )
 
 var (
-       healthCheckProbeInterval = time.Second * 1        // default health check probe interval
-       healthCheckReplyTimeout  = time.Millisecond * 100 // timeout for reply to a health check probe
-       healthCheckThreshold     = 1                      // number of failed healthProbe until the error is reported
+       HealthCheckProbeInterval = time.Second            // default health check probe interval
+       HealthCheckReplyTimeout  = time.Millisecond * 100 // timeout for reply to a health check probe
+       HealthCheckThreshold     = 1                      // number of failed health checks until the error is reported
+       DefaultReplyTimeout      = time.Second            // default timeout for replies from VPP
 )
 
-// ConnectionState holds the current state of the connection to VPP.
+// ConnectionState represents the current state of the connection to VPP.
 type ConnectionState int
 
 const (
-       // Connected connection state means that the connection to VPP has been successfully established.
+       // Connected represents state in which the connection has been successfully established.
        Connected ConnectionState = iota
 
-       // Disconnected connection state means that the connection to VPP has been lost.
+       // Disconnected represents state in which the connection has been dropped.
        Disconnected
+
+       // Failed represents state in which the reconnecting failed after exceeding maximum number of attempts.
+       Failed
 )
 
+func (s ConnectionState) String() string {
+       switch s {
+       case Connected:
+               return "Connected"
+       case Disconnected:
+               return "Disconnected"
+       case Failed:
+               return "Failed"
+       default:
+               return fmt.Sprintf("UnknownState(%d)", s)
+       }
+}
+
 // ConnectionEvent is a notification about change in the VPP connection state.
 type ConnectionEvent struct {
-       // Timestamp holds the time when the event has been generated.
+       // Timestamp holds the time when the event has been created.
        Timestamp time.Time
 
-       // State holds the new state of the connection to VPP at the time when the event has been generated.
+       // State holds the new state of the connection at the time when the event has been created.
        State ConnectionState
+
+       // Error holds error if any encountered.
+       Error error
 }
 
 // Connection represents a shared memory connection to VPP via vppAdapter.
 type Connection struct {
-       vpp       adapter.VppAdapter // VPP adapter
-       connected uint32             // non-zero if the adapter is connected to VPP
-       codec     *codec.MsgCodec    // message codec
+       vppClient adapter.VppAPI // VPP binary API client
+       //statsClient adapter.StatsAPI // VPP stats API client
+
+       maxAttempts int           // interval for reconnect attempts
+       recInterval time.Duration // maximum number of reconnect attempts
+
+       vppConnected uint32 // non-zero if the adapter is connected to VPP
 
-       msgIDsLock sync.RWMutex      // lock for the message IDs map
-       msgIDs     map[string]uint16 // map of message IDs indexed by message name + CRC
+       codec  *codec.MsgCodec        // message codec
+       msgIDs map[string]uint16      // map of message IDs indexed by message name + CRC
+       msgMap map[uint16]api.Message // map of messages indexed by message ID
 
+       maxChannelID uint32              // maximum used channel ID (the real limit is 2^15, 32-bit is used for atomic operations)
        channelsLock sync.RWMutex        // lock for the channels map
-       channels     map[uint16]*channel // map of all API channels indexed by the channel ID
+       channels     map[uint16]*Channel // map of all API channels indexed by the channel ID
 
-       notifSubscriptionsLock sync.RWMutex                        // lock for the subscriptions map
-       notifSubscriptions     map[uint16][]*api.NotifSubscription // map od all notification subscriptions indexed by message ID
+       subscriptionsLock sync.RWMutex                  // lock for the subscriptions map
+       subscriptions     map[uint16][]*subscriptionCtx // map od all notification subscriptions indexed by message ID
 
-       maxChannelID uint32 // maximum used channel ID (the real limit is 2^15, 32-bit is used for atomic operations)
-       pingReqID    uint16 // ID if the ControlPing message
-       pingReplyID  uint16 // ID of the ControlPingReply message
+       pingReqID   uint16 // ID if the ControlPing message
+       pingReplyID uint16 // ID of the ControlPingReply message
 
        lastReplyLock sync.Mutex // lock for the last reply
        lastReply     time.Time  // time of the last received reply from VPP
 }
 
-var (
-       log      *logger.Logger // global logger
-       conn     *Connection    // global handle to the Connection (used in the message receive callback)
-       connLock sync.RWMutex   // lock for the global connection
-)
-
-// init initializes global logger, which logs debug level messages to stdout.
-func init() {
-       log = logger.New()
-       log.Out = os.Stdout
-       log.Level = logger.DebugLevel
-}
-
-// SetLogger sets global logger to provided one.
-func SetLogger(l *logger.Logger) {
-       log = l
-}
-
-// SetHealthCheckProbeInterval sets health check probe interval.
-// Beware: Function is not thread-safe. It is recommended to setup this parameter
-// before connecting to vpp.
-func SetHealthCheckProbeInterval(interval time.Duration) {
-       healthCheckProbeInterval = interval
-}
-
-// SetHealthCheckReplyTimeout sets timeout for reply to a health check probe.
-// If reply arrives after the timeout, check is considered as failed.
-// Beware: Function is not thread-safe. It is recommended to setup this parameter
-// before connecting to vpp.
-func SetHealthCheckReplyTimeout(timeout time.Duration) {
-       healthCheckReplyTimeout = timeout
-}
-
-// SetHealthCheckThreshold sets the number of failed healthProbe checks until the error is reported.
-// Beware: Function is not thread-safe. It is recommended to setup this parameter
-// before connecting to vpp.
-func SetHealthCheckThreshold(threshold int) {
-       healthCheckThreshold = threshold
-}
+func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration) *Connection {
+       if attempts == 0 {
+               attempts = DefaultMaxReconnectAttempts
+       }
+       if interval == 0 {
+               interval = DefaultReconnectInterval
+       }
 
-// SetControlPingMessages sets the messages for ControlPing and ControlPingReply
-func SetControlPingMessages(controPing, controlPingReply api.Message) {
-       msgControlPing = controPing
-       msgControlPingReply = controlPingReply
+       c := &Connection{
+               vppClient:     binapi,
+               maxAttempts:   attempts,
+               recInterval:   interval,
+               codec:         &codec.MsgCodec{},
+               msgIDs:        make(map[string]uint16),
+               msgMap:        make(map[uint16]api.Message),
+               channels:      make(map[uint16]*Channel),
+               subscriptions: make(map[uint16][]*subscriptionCtx),
+       }
+       binapi.SetMsgCallback(c.msgCallback)
+       return c
 }
 
-// Connect connects to VPP using specified VPP adapter and returns the connection handle.
-// This call blocks until VPP is connected, or an error occurs. Only one connection attempt will be performed.
-func Connect(vppAdapter adapter.VppAdapter) (*Connection, error) {
+// Connect connects to VPP API using specified adapter and returns a connection handle.
+// This call blocks until it is either connected, or an error occurs.
+// Only one connection attempt will be performed.
+func Connect(binapi adapter.VppAPI) (*Connection, error) {
        // create new connection handle
-       c, err := newConnection(vppAdapter)
-       if err != nil {
-               return nil, err
-       }
+       c := newConnection(binapi, DefaultMaxReconnectAttempts, DefaultReconnectInterval)
 
        // blocking attempt to connect to VPP
-       err = c.connectVPP()
-       if err != nil {
+       if err := c.connectVPP(); err != nil {
                return nil, err
        }
 
-       return conn, nil
+       return c, nil
 }
 
 // AsyncConnect asynchronously connects to VPP using specified VPP adapter and returns the connection handle
 // and ConnectionState channel. This call does not block until connection is established, it
 // returns immediately. The caller is supposed to watch the returned ConnectionState channel for
 // Connected/Disconnected events. In case of disconnect, the library will asynchronously try to reconnect.
-func AsyncConnect(vppAdapter adapter.VppAdapter) (*Connection, chan ConnectionEvent, error) {
+func AsyncConnect(binapi adapter.VppAPI, attempts int, interval time.Duration) (*Connection, chan ConnectionEvent, error) {
        // create new connection handle
-       c, err := newConnection(vppAdapter)
-       if err != nil {
-               return nil, nil, err
-       }
+       c := newConnection(binapi, attempts, interval)
 
        // asynchronously attempt to connect to VPP
-       connChan := make(chan ConnectionEvent, notificationChannelBufSize)
+       connChan := make(chan ConnectionEvent, NotificationChanBufSize)
        go c.connectLoop(connChan)
 
-       return conn, connChan, nil
+       return c, connChan, nil
+}
+
+// connectVPP performs blocking attempt to connect to VPP.
+func (c *Connection) connectVPP() error {
+       log.Debug("Connecting to VPP..")
+
+       // blocking connect
+       if err := c.vppClient.Connect(); err != nil {
+               return err
+       }
+
+       log.Debugf("Connected to VPP.")
+
+       if err := c.retrieveMessageIDs(); err != nil {
+               c.vppClient.Disconnect()
+               return fmt.Errorf("VPP is incompatible: %v", err)
+       }
+
+       // store connected state
+       atomic.StoreUint32(&c.vppConnected, 1)
+
+       return nil
 }
 
-// Disconnect disconnects from VPP and releases all connection-related resources.
+// Disconnect disconnects from VPP API and releases all connection-related resources.
 func (c *Connection) Disconnect() {
        if c == nil {
                return
        }
-       connLock.Lock()
-       defer connLock.Unlock()
 
-       if c != nil && c.vpp != nil {
+       if c.vppClient != nil {
                c.disconnectVPP()
        }
-       conn = nil
 }
 
-// newConnection returns new connection handle.
-func newConnection(vppAdapter adapter.VppAdapter) (*Connection, error) {
-       connLock.Lock()
-       defer connLock.Unlock()
-
-       if conn != nil {
-               return nil, errors.New("only one connection per process is supported")
-       }
-
-       conn = &Connection{
-               vpp:                vppAdapter,
-               codec:              &codec.MsgCodec{},
-               channels:           make(map[uint16]*channel),
-               msgIDs:             make(map[string]uint16),
-               notifSubscriptions: make(map[uint16][]*api.NotifSubscription),
+// disconnectVPP disconnects from VPP in case it is connected.
+func (c *Connection) disconnectVPP() {
+       if atomic.CompareAndSwapUint32(&c.vppConnected, 1, 0) {
+               c.vppClient.Disconnect()
        }
+}
 
-       conn.vpp.SetMsgCallback(msgCallback)
-       return conn, nil
+func (c *Connection) NewAPIChannel() (api.Channel, error) {
+       return c.newAPIChannel(RequestChanBufSize, ReplyChanBufSize)
 }
 
-// connectVPP performs one blocking attempt to connect to VPP.
-func (c *Connection) connectVPP() error {
-       log.Debug("Connecting to VPP...")
+func (c *Connection) NewAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (api.Channel, error) {
+       return c.newAPIChannel(reqChanBufSize, replyChanBufSize)
+}
 
-       // blocking connect
-       err := c.vpp.Connect()
-       if err != nil {
-               log.Warn(err)
-               return err
+// NewAPIChannelBuffered returns a new API channel for communication with VPP via govpp core.
+// It allows to specify custom buffer sizes for the request and reply Go channels.
+func (c *Connection) newAPIChannel(reqChanBufSize, replyChanBufSize int) (*Channel, error) {
+       if c == nil {
+               return nil, errors.New("nil connection passed in")
        }
 
-       // store control ping IDs
-       if c.pingReqID, err = c.GetMessageID(msgControlPing); err != nil {
-               c.vpp.Disconnect()
-               return err
-       }
-       if c.pingReplyID, err = c.GetMessageID(msgControlPingReply); err != nil {
-               c.vpp.Disconnect()
-               return err
-       }
+       // create new channel
+       chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff)
+       channel := newChannel(chID, c, c.codec, c, reqChanBufSize, replyChanBufSize)
 
-       // store connected state
-       atomic.StoreUint32(&c.connected, 1)
+       // store API channel within the client
+       c.channelsLock.Lock()
+       c.channels[chID] = channel
+       c.channelsLock.Unlock()
 
-       log.Info("Connected to VPP.")
-       return nil
+       // start watching on the request channel
+       go c.watchRequests(channel)
+
+       return channel, nil
 }
 
-// disconnectVPP disconnects from VPP in case it is connected.
-func (c *Connection) disconnectVPP() {
-       if atomic.CompareAndSwapUint32(&c.connected, 1, 0) {
-               c.vpp.Disconnect()
-       }
+// releaseAPIChannel releases API channel that needs to be closed.
+func (c *Connection) releaseAPIChannel(ch *Channel) {
+       log.WithFields(logger.Fields{
+               "channel": ch.id,
+       }).Debug("API channel released")
+
+       // delete the channel from channels map
+       c.channelsLock.Lock()
+       delete(c.channels, ch.id)
+       c.channelsLock.Unlock()
 }
 
 // connectLoop attempts to connect to VPP until it succeeds.
 // Then it continues with healthCheckLoop.
 func (c *Connection) connectLoop(connChan chan ConnectionEvent) {
+       reconnectAttempts := 0
+
        // loop until connected
        for {
-               if err := c.vpp.WaitReady(); err != nil {
+               if err := c.vppClient.WaitReady(); err != nil {
                        log.Warnf("wait ready failed: %v", err)
                }
                if err := c.connectVPP(); err == nil {
                        // signal connected event
                        connChan <- ConnectionEvent{Timestamp: time.Now(), State: Connected}
                        break
+               } else if reconnectAttempts < c.maxAttempts {
+                       reconnectAttempts++
+                       log.Errorf("connecting failed (attempt %d/%d): %v", reconnectAttempts, c.maxAttempts, err)
+                       time.Sleep(c.recInterval)
                } else {
-                       log.Errorf("connecting to VPP failed: %v", err)
-                       time.Sleep(time.Second)
+                       connChan <- ConnectionEvent{Timestamp: time.Now(), State: Failed, Error: err}
+                       return
                }
        }
 
@@ -269,21 +280,23 @@ func (c *Connection) connectLoop(connChan chan ConnectionEvent) {
 // it continues with connectLoop and tries to reconnect.
 func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) {
        // create a separate API channel for health check probes
-       ch, err := conn.newAPIChannelBuffered(1, 1)
+       ch, err := c.newAPIChannel(1, 1)
        if err != nil {
                log.Error("Failed to create health check API channel, health check will be disabled:", err)
                return
        }
 
-       var sinceLastReply time.Duration
-       var failedChecks int
+       var (
+               sinceLastReply time.Duration
+               failedChecks   int
+       )
 
        // send health check probes until an error or timeout occurs
        for {
                // sleep until next health check probe period
-               time.Sleep(healthCheckProbeInterval)
+               time.Sleep(HealthCheckProbeInterval)
 
-               if atomic.LoadUint32(&c.connected) == 0 {
+               if atomic.LoadUint32(&c.vppConnected) == 0 {
                        // Disconnect has been called in the meantime, return the healthcheck - reconnect loop
                        log.Debug("Disconnected on request, exiting health check loop.")
                        return
@@ -297,24 +310,24 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) {
                }
 
                // send the control ping request
-               ch.reqChan <- &api.VppRequest{Message: msgControlPing}
+               ch.reqChan <- &vppRequest{msg: msgControlPing}
 
                for {
                        // expect response within timeout period
                        select {
                        case vppReply := <-ch.replyChan:
-                               err = vppReply.Error
+                               err = vppReply.err
 
-                       case <-time.After(healthCheckReplyTimeout):
+                       case <-time.After(HealthCheckReplyTimeout):
                                err = ErrProbeTimeout
 
                                // check if time since last reply from any other
                                // channel is less than health check reply timeout
-                               conn.lastReplyLock.Lock()
+                               c.lastReplyLock.Lock()
                                sinceLastReply = time.Since(c.lastReply)
-                               conn.lastReplyLock.Unlock()
+                               c.lastReplyLock.Unlock()
 
-                               if sinceLastReply < healthCheckReplyTimeout {
+                               if sinceLastReply < HealthCheckReplyTimeout {
                                        log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply)
                                        continue
                                }
@@ -324,19 +337,20 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) {
 
                if err == ErrProbeTimeout {
                        failedChecks++
-                       log.Warnf("VPP health check probe timed out after %v (%d. timeout)", healthCheckReplyTimeout, failedChecks)
-                       if failedChecks > healthCheckThreshold {
-                               // in case of exceeded treshold disconnect
-                               log.Errorf("VPP health check exceeded treshold for timeouts (>%d), assuming disconnect", healthCheckThreshold)
+                       log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks)
+                       if failedChecks > HealthCheckThreshold {
+                               // in case of exceeded failed check treshold, assume VPP disconnected
+                               log.Errorf("VPP health check exceeded treshold for timeouts (>%d), assuming disconnect", HealthCheckThreshold)
                                connChan <- ConnectionEvent{Timestamp: time.Now(), State: Disconnected}
                                break
                        }
                } else if err != nil {
-                       // in case of error disconnect
+                       // in case of error, assume VPP disconnected
                        log.Errorf("VPP health check probe failed: %v", err)
-                       connChan <- ConnectionEvent{Timestamp: time.Now(), State: Disconnected}
+                       connChan <- ConnectionEvent{Timestamp: time.Now(), State: Disconnected, Error: err}
                        break
                } else if failedChecks > 0 {
+                       // in case of success after failed checks, clear failed check counter
                        failedChecks = 0
                        log.Infof("VPP health check probe OK")
                }
@@ -350,54 +364,78 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) {
        c.connectLoop(connChan)
 }
 
-func (c *Connection) NewAPIChannel() (api.Channel, error) {
-       return c.newAPIChannelBuffered(requestChannelBufSize, replyChannelBufSize)
+func getMsgNameWithCrc(x api.Message) string {
+       return x.GetMessageName() + "_" + x.GetCrcString()
 }
 
-func (c *Connection) NewAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (api.Channel, error) {
-       return c.newAPIChannelBuffered(reqChanBufSize, replyChanBufSize)
+func getMsgFactory(msg api.Message) func() api.Message {
+       return func() api.Message {
+               return reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+       }
 }
 
-// NewAPIChannelBuffered returns a new API channel for communication with VPP via govpp core.
-// It allows to specify custom buffer sizes for the request and reply Go channels.
-func (c *Connection) newAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (*channel, error) {
+// GetMessageID returns message identifier of given API message.
+func (c *Connection) GetMessageID(msg api.Message) (uint16, error) {
        if c == nil {
-               return nil, errors.New("nil connection passed in")
+               return 0, errors.New("nil connection passed in")
        }
 
-       chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff)
-       ch := &channel{
-               id:           chID,
-               replyTimeout: defaultReplyTimeout,
+       if msgID, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok {
+               return msgID, nil
        }
-       ch.msgDecoder = c.codec
-       ch.msgIdentifier = c
 
-       // create the communication channels
-       ch.reqChan = make(chan *api.VppRequest, reqChanBufSize)
-       ch.replyChan = make(chan *api.VppReply, replyChanBufSize)
-       ch.notifSubsChan = make(chan *api.NotifSubscribeRequest, reqChanBufSize)
-       ch.notifSubsReplyChan = make(chan error, replyChanBufSize)
+       msgID, err := c.vppClient.GetMsgID(msg.GetMessageName(), msg.GetCrcString())
+       if err != nil {
+               return 0, err
+       }
 
-       // store API channel within the client
-       c.channelsLock.Lock()
-       c.channels[chID] = ch
-       c.channelsLock.Unlock()
+       c.msgIDs[getMsgNameWithCrc(msg)] = msgID
+       c.msgMap[msgID] = msg
 
-       // start watching on the request channel
-       go c.watchRequests(ch)
+       return msgID, nil
+}
+
+// LookupByID looks up message name and crc by ID.
+func (c *Connection) LookupByID(msgID uint16) (api.Message, error) {
+       if c == nil {
+               return nil, errors.New("nil connection passed in")
+       }
+
+       if msg, ok := c.msgMap[msgID]; ok {
+               return msg, nil
+       }
 
-       return ch, nil
+       return nil, fmt.Errorf("unknown message ID: %d", msgID)
 }
 
-// releaseAPIChannel releases API channel that needs to be closed.
-func (c *Connection) releaseAPIChannel(ch *channel) {
-       log.WithFields(logger.Fields{
-               "ID": ch.id,
-       }).Debug("API channel closed.")
+// retrieveMessageIDs retrieves IDs for all registered messages and stores them in map
+func (c *Connection) retrieveMessageIDs() (err error) {
+       t := time.Now()
 
-       // delete the channel from channels map
-       c.channelsLock.Lock()
-       delete(c.channels, ch.id)
-       c.channelsLock.Unlock()
+       msgs := api.GetRegisteredMessages()
+
+       var n int
+       for name, msg := range msgs {
+               msgID, err := c.GetMessageID(msg)
+               if err != nil {
+                       log.Debugf("retrieving msgID for %s failed: %v", name, err)
+                       continue
+               }
+               n++
+
+               if c.pingReqID == 0 && msg.GetMessageName() == msgControlPing.GetMessageName() {
+                       c.pingReqID = msgID
+                       msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+               } else if c.pingReplyID == 0 && msg.GetMessageName() == msgControlPingReply.GetMessageName() {
+                       c.pingReplyID = msgID
+                       msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+               }
+
+               if debugMsgIDs {
+                       log.Debugf("message %q (%s) has ID: %d", name, getMsgNameWithCrc(msg), msgID)
+               }
+       }
+       log.Debugf("retrieved %d/%d msgIDs (took %s)", n, len(msgs), time.Since(t))
+
+       return nil
 }