socketclient: wait for socket to be created
[govpp.git] / core / connection.go
index c77358f..67c7e1d 100644 (file)
@@ -30,40 +30,22 @@ import (
 )
 
 const (
-       requestChannelBufSize      = 100 // default size of the request channel buffer
-       replyChannelBufSize        = 100 // default size of the reply channel buffer
-       notificationChannelBufSize = 100 // default size of the notification channel buffer
-
-       defaultReplyTimeout = time.Second * 1 // default timeout for replies from VPP, can be changed with SetReplyTimeout
+       DefaultReconnectInterval    = time.Second // default interval between reconnect attempts
+       DefaultMaxReconnectAttempts = 3           // default maximum number of reconnect attempts
 )
 
 var (
-       healthCheckInterval     = time.Second * 1        // default health check interval
-       healthCheckReplyTimeout = time.Millisecond * 100 // timeout for reply to a health check
-       healthCheckThreshold    = 1                      // number of failed health checks until the error is reported
+       RequestChanBufSize      = 100 // default size of the request channel buffer
+       ReplyChanBufSize        = 100 // default size of the reply channel buffer
+       NotificationChanBufSize = 100 // default size of the notification channel buffer
 )
 
-// SetHealthCheckProbeInterval sets health check probe interval.
-// Beware: Function is not thread-safe. It is recommended to setup this parameter
-// before connecting to vpp.
-func SetHealthCheckProbeInterval(interval time.Duration) {
-       healthCheckInterval = interval
-}
-
-// SetHealthCheckReplyTimeout sets timeout for reply to a health check probe.
-// If reply arrives after the timeout, check is considered as failed.
-// Beware: Function is not thread-safe. It is recommended to setup this parameter
-// before connecting to vpp.
-func SetHealthCheckReplyTimeout(timeout time.Duration) {
-       healthCheckReplyTimeout = timeout
-}
-
-// SetHealthCheckThreshold sets the number of failed healthProbe checks until the error is reported.
-// Beware: Function is not thread-safe. It is recommended to setup this parameter
-// before connecting to vpp.
-func SetHealthCheckThreshold(threshold int) {
-       healthCheckThreshold = threshold
-}
+var (
+       HealthCheckProbeInterval = time.Second            // default health check probe interval
+       HealthCheckReplyTimeout  = time.Millisecond * 100 // timeout for reply to a health check probe
+       HealthCheckThreshold     = 1                      // number of failed health checks until the error is reported
+       DefaultReplyTimeout      = time.Second            // default timeout for replies from VPP
+)
 
 // ConnectionState represents the current state of the connection to VPP.
 type ConnectionState int
@@ -74,8 +56,24 @@ const (
 
        // Disconnected represents state in which the connection has been dropped.
        Disconnected
+
+       // Failed represents state in which the reconnecting failed after exceeding maximum number of attempts.
+       Failed
 )
 
+func (s ConnectionState) String() string {
+       switch s {
+       case Connected:
+               return "Connected"
+       case Disconnected:
+               return "Disconnected"
+       case Failed:
+               return "Failed"
+       default:
+               return fmt.Sprintf("UnknownState(%d)", s)
+       }
+}
+
 // ConnectionEvent is a notification about change in the VPP connection state.
 type ConnectionEvent struct {
        // Timestamp holds the time when the event has been created.
@@ -88,15 +86,15 @@ type ConnectionEvent struct {
        Error error
 }
 
-var (
-       connLock sync.RWMutex // lock for the global connection
-       conn     *Connection  // global handle to the Connection (used in the message receive callback)
-)
-
 // Connection represents a shared memory connection to VPP via vppAdapter.
 type Connection struct {
-       vpp       adapter.VppAdapter // VPP adapter
-       connected uint32             // non-zero if the adapter is connected to VPP
+       vppClient adapter.VppAPI // VPP binary API client
+       //statsClient adapter.StatsAPI // VPP stats API client
+
+       maxAttempts int           // interval for reconnect attempts
+       recInterval time.Duration // maximum number of reconnect attempts
+
+       vppConnected uint32 // non-zero if the adapter is connected to VPP
 
        codec  *codec.MsgCodec        // message codec
        msgIDs map[string]uint16      // map of message IDs indexed by message name + CRC
@@ -104,10 +102,10 @@ type Connection struct {
 
        maxChannelID uint32              // maximum used channel ID (the real limit is 2^15, 32-bit is used for atomic operations)
        channelsLock sync.RWMutex        // lock for the channels map
-       channels     map[uint16]*channel // map of all API channels indexed by the channel ID
+       channels     map[uint16]*Channel // map of all API channels indexed by the channel ID
 
-       notifSubscriptionsLock sync.RWMutex                        // lock for the subscriptions map
-       notifSubscriptions     map[uint16][]*api.NotifSubscription // map od all notification subscriptions indexed by message ID
+       subscriptionsLock sync.RWMutex                  // lock for the subscriptions map
+       subscriptions     map[uint16][]*subscriptionCtx // map od all notification subscriptions indexed by message ID
 
        pingReqID   uint16 // ID if the ControlPing message
        pingReplyID uint16 // ID of the ControlPingReply message
@@ -116,18 +114,37 @@ type Connection struct {
        lastReply     time.Time  // time of the last received reply from VPP
 }
 
-// Connect connects to VPP using specified VPP adapter and returns the connection handle.
-// This call blocks until VPP is connected, or an error occurs. Only one connection attempt will be performed.
-func Connect(vppAdapter adapter.VppAdapter) (*Connection, error) {
-       // create new connection handle
-       c, err := newConnection(vppAdapter)
-       if err != nil {
-               return nil, err
+func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration) *Connection {
+       if attempts == 0 {
+               attempts = DefaultMaxReconnectAttempts
+       }
+       if interval == 0 {
+               interval = DefaultReconnectInterval
        }
 
+       c := &Connection{
+               vppClient:     binapi,
+               maxAttempts:   attempts,
+               recInterval:   interval,
+               codec:         &codec.MsgCodec{},
+               msgIDs:        make(map[string]uint16),
+               msgMap:        make(map[uint16]api.Message),
+               channels:      make(map[uint16]*Channel),
+               subscriptions: make(map[uint16][]*subscriptionCtx),
+       }
+       binapi.SetMsgCallback(c.msgCallback)
+       return c
+}
+
+// Connect connects to VPP API using specified adapter and returns a connection handle.
+// This call blocks until it is either connected, or an error occurs.
+// Only one connection attempt will be performed.
+func Connect(binapi adapter.VppAPI) (*Connection, error) {
+       // create new connection handle
+       c := newConnection(binapi, DefaultMaxReconnectAttempts, DefaultReconnectInterval)
+
        // blocking attempt to connect to VPP
-       err = c.connectVPP()
-       if err != nil {
+       if err := c.connectVPP(); err != nil {
                return nil, err
        }
 
@@ -138,184 +155,120 @@ func Connect(vppAdapter adapter.VppAdapter) (*Connection, error) {
 // and ConnectionState channel. This call does not block until connection is established, it
 // returns immediately. The caller is supposed to watch the returned ConnectionState channel for
 // Connected/Disconnected events. In case of disconnect, the library will asynchronously try to reconnect.
-func AsyncConnect(vppAdapter adapter.VppAdapter) (*Connection, chan ConnectionEvent, error) {
+func AsyncConnect(binapi adapter.VppAPI, attempts int, interval time.Duration) (*Connection, chan ConnectionEvent, error) {
        // create new connection handle
-       c, err := newConnection(vppAdapter)
-       if err != nil {
-               return nil, nil, err
-       }
+       c := newConnection(binapi, attempts, interval)
 
        // asynchronously attempt to connect to VPP
-       connChan := make(chan ConnectionEvent, notificationChannelBufSize)
+       connChan := make(chan ConnectionEvent, NotificationChanBufSize)
        go c.connectLoop(connChan)
 
        return c, connChan, nil
 }
 
-// Disconnect disconnects from VPP and releases all connection-related resources.
-func (c *Connection) Disconnect() {
-       if c == nil {
-               return
-       }
-
-       connLock.Lock()
-       defer connLock.Unlock()
-
-       if c.vpp != nil {
-               c.disconnectVPP()
-       }
-       conn = nil
-}
-
-// newConnection returns new connection handle.
-func newConnection(vppAdapter adapter.VppAdapter) (*Connection, error) {
-       connLock.Lock()
-       defer connLock.Unlock()
-
-       if conn != nil {
-               return nil, errors.New("only one connection per process is supported")
-       }
-
-       conn = &Connection{
-               vpp:                vppAdapter,
-               codec:              &codec.MsgCodec{},
-               channels:           make(map[uint16]*channel),
-               msgIDs:             make(map[string]uint16),
-               msgMap:             make(map[uint16]api.Message),
-               notifSubscriptions: make(map[uint16][]*api.NotifSubscription),
-       }
-       conn.vpp.SetMsgCallback(conn.msgCallback)
-
-       return conn, nil
-}
-
 // connectVPP performs blocking attempt to connect to VPP.
 func (c *Connection) connectVPP() error {
        log.Debug("Connecting to VPP..")
 
        // blocking connect
-       if err := c.vpp.Connect(); err != nil {
+       if err := c.vppClient.Connect(); err != nil {
                return err
        }
 
        log.Debugf("Connected to VPP.")
 
        if err := c.retrieveMessageIDs(); err != nil {
-               c.vpp.Disconnect()
+               c.vppClient.Disconnect()
                return fmt.Errorf("VPP is incompatible: %v", err)
        }
 
        // store connected state
-       atomic.StoreUint32(&c.connected, 1)
+       atomic.StoreUint32(&c.vppConnected, 1)
 
        return nil
 }
 
-func getMsgNameWithCrc(x api.Message) string {
-       return x.GetMessageName() + "_" + x.GetCrcString()
-}
-
-// retrieveMessageIDs retrieves IDs for all registered messages and stores them in map
-func (c *Connection) retrieveMessageIDs() (err error) {
-       t := time.Now()
-
-       var addMsg = func(msgID uint16, msg api.Message) {
-               c.msgIDs[getMsgNameWithCrc(msg)] = msgID
-               c.msgMap[msgID] = msg
-       }
-
-       msgs := api.GetAllMessages()
-
-       for name, msg := range msgs {
-               msgID, err := c.vpp.GetMsgID(msg.GetMessageName(), msg.GetCrcString())
-               if err != nil {
-                       return err
-               }
-
-               addMsg(msgID, msg)
-
-               if msg.GetMessageName() == msgControlPing.GetMessageName() {
-                       c.pingReqID = msgID
-                       msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
-               } else if msg.GetMessageName() == msgControlPingReply.GetMessageName() {
-                       c.pingReplyID = msgID
-                       msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
-               }
-
-               if debugMsgIDs {
-                       log.Debugf("message %q (%s) has ID: %d", name, getMsgNameWithCrc(msg), msgID)
-               }
+// Disconnect disconnects from VPP API and releases all connection-related resources.
+func (c *Connection) Disconnect() {
+       if c == nil {
+               return
        }
 
-       log.Debugf("retrieving %d message IDs took %s", len(msgs), time.Since(t))
-
-       // fallback for control ping when vpe package is not imported
-       if c.pingReqID == 0 {
-               c.pingReqID, err = c.vpp.GetMsgID(msgControlPing.GetMessageName(), msgControlPing.GetCrcString())
-               if err != nil {
-                       return err
-               }
-               addMsg(c.pingReqID, msgControlPing)
-       }
-       if c.pingReplyID == 0 {
-               c.pingReplyID, err = c.vpp.GetMsgID(msgControlPingReply.GetMessageName(), msgControlPingReply.GetCrcString())
-               if err != nil {
-                       return err
-               }
-               addMsg(c.pingReplyID, msgControlPingReply)
+       if c.vppClient != nil {
+               c.disconnectVPP()
        }
-
-       return nil
 }
 
-// GetMessageID returns message identifier of given API message.
-func (c *Connection) GetMessageID(msg api.Message) (uint16, error) {
-       if c == nil {
-               return 0, errors.New("nil connection passed in")
+// disconnectVPP disconnects from VPP in case it is connected.
+func (c *Connection) disconnectVPP() {
+       if atomic.CompareAndSwapUint32(&c.vppConnected, 1, 0) {
+               c.vppClient.Disconnect()
        }
+}
 
-       if msgID, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok {
-               return msgID, nil
-       }
+func (c *Connection) NewAPIChannel() (api.Channel, error) {
+       return c.newAPIChannel(RequestChanBufSize, ReplyChanBufSize)
+}
 
-       return 0, fmt.Errorf("unknown message: %s (%s)", msg.GetMessageName(), msg.GetCrcString())
+func (c *Connection) NewAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (api.Channel, error) {
+       return c.newAPIChannel(reqChanBufSize, replyChanBufSize)
 }
 
-// LookupByID looks up message name and crc by ID.
-func (c *Connection) LookupByID(msgID uint16) (api.Message, error) {
+// NewAPIChannelBuffered returns a new API channel for communication with VPP via govpp core.
+// It allows to specify custom buffer sizes for the request and reply Go channels.
+func (c *Connection) newAPIChannel(reqChanBufSize, replyChanBufSize int) (*Channel, error) {
        if c == nil {
                return nil, errors.New("nil connection passed in")
        }
 
-       if msg, ok := c.msgMap[msgID]; ok {
-               return msg, nil
-       }
+       // create new channel
+       chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff)
+       channel := newChannel(chID, c, c.codec, c, reqChanBufSize, replyChanBufSize)
 
-       return nil, fmt.Errorf("unknown message ID: %d", msgID)
+       // store API channel within the client
+       c.channelsLock.Lock()
+       c.channels[chID] = channel
+       c.channelsLock.Unlock()
+
+       // start watching on the request channel
+       go c.watchRequests(channel)
+
+       return channel, nil
 }
 
-// disconnectVPP disconnects from VPP in case it is connected.
-func (c *Connection) disconnectVPP() {
-       if atomic.CompareAndSwapUint32(&c.connected, 1, 0) {
-               c.vpp.Disconnect()
-       }
+// releaseAPIChannel releases API channel that needs to be closed.
+func (c *Connection) releaseAPIChannel(ch *Channel) {
+       log.WithFields(logger.Fields{
+               "channel": ch.id,
+       }).Debug("API channel released")
+
+       // delete the channel from channels map
+       c.channelsLock.Lock()
+       delete(c.channels, ch.id)
+       c.channelsLock.Unlock()
 }
 
 // connectLoop attempts to connect to VPP until it succeeds.
 // Then it continues with healthCheckLoop.
 func (c *Connection) connectLoop(connChan chan ConnectionEvent) {
+       reconnectAttempts := 0
+
        // loop until connected
        for {
-               if err := c.vpp.WaitReady(); err != nil {
+               if err := c.vppClient.WaitReady(); err != nil {
                        log.Warnf("wait ready failed: %v", err)
                }
                if err := c.connectVPP(); err == nil {
                        // signal connected event
                        connChan <- ConnectionEvent{Timestamp: time.Now(), State: Connected}
                        break
+               } else if reconnectAttempts < c.maxAttempts {
+                       reconnectAttempts++
+                       log.Errorf("connecting failed (attempt %d/%d): %v", reconnectAttempts, c.maxAttempts, err)
+                       time.Sleep(c.recInterval)
                } else {
-                       log.Errorf("connecting to VPP failed: %v", err)
-                       time.Sleep(time.Second)
+                       connChan <- ConnectionEvent{Timestamp: time.Now(), State: Failed, Error: err}
+                       return
                }
        }
 
@@ -341,9 +294,9 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) {
        // send health check probes until an error or timeout occurs
        for {
                // sleep until next health check probe period
-               time.Sleep(healthCheckInterval)
+               time.Sleep(HealthCheckProbeInterval)
 
-               if atomic.LoadUint32(&c.connected) == 0 {
+               if atomic.LoadUint32(&c.vppConnected) == 0 {
                        // Disconnect has been called in the meantime, return the healthcheck - reconnect loop
                        log.Debug("Disconnected on request, exiting health check loop.")
                        return
@@ -365,7 +318,7 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) {
                        case vppReply := <-ch.replyChan:
                                err = vppReply.err
 
-                       case <-time.After(healthCheckReplyTimeout):
+                       case <-time.After(HealthCheckReplyTimeout):
                                err = ErrProbeTimeout
 
                                // check if time since last reply from any other
@@ -374,7 +327,7 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) {
                                sinceLastReply = time.Since(c.lastReply)
                                c.lastReplyLock.Unlock()
 
-                               if sinceLastReply < healthCheckReplyTimeout {
+                               if sinceLastReply < HealthCheckReplyTimeout {
                                        log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply)
                                        continue
                                }
@@ -384,10 +337,10 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) {
 
                if err == ErrProbeTimeout {
                        failedChecks++
-                       log.Warnf("VPP health check probe timed out after %v (%d. timeout)", healthCheckReplyTimeout, failedChecks)
-                       if failedChecks > healthCheckThreshold {
+                       log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks)
+                       if failedChecks > HealthCheckThreshold {
                                // in case of exceeded failed check treshold, assume VPP disconnected
-                               log.Errorf("VPP health check exceeded treshold for timeouts (>%d), assuming disconnect", healthCheckThreshold)
+                               log.Errorf("VPP health check exceeded treshold for timeouts (>%d), assuming disconnect", HealthCheckThreshold)
                                connChan <- ConnectionEvent{Timestamp: time.Now(), State: Disconnected}
                                break
                        }
@@ -411,52 +364,78 @@ func (c *Connection) healthCheckLoop(connChan chan ConnectionEvent) {
        c.connectLoop(connChan)
 }
 
-func (c *Connection) NewAPIChannel() (api.Channel, error) {
-       return c.newAPIChannel(requestChannelBufSize, replyChannelBufSize)
+func getMsgNameWithCrc(x api.Message) string {
+       return x.GetMessageName() + "_" + x.GetCrcString()
 }
 
-func (c *Connection) NewAPIChannelBuffered(reqChanBufSize, replyChanBufSize int) (api.Channel, error) {
-       return c.newAPIChannel(reqChanBufSize, replyChanBufSize)
+func getMsgFactory(msg api.Message) func() api.Message {
+       return func() api.Message {
+               return reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+       }
 }
 
-// NewAPIChannelBuffered returns a new API channel for communication with VPP via govpp core.
-// It allows to specify custom buffer sizes for the request and reply Go channels.
-func (c *Connection) newAPIChannel(reqChanBufSize, replyChanBufSize int) (*channel, error) {
+// GetMessageID returns message identifier of given API message.
+func (c *Connection) GetMessageID(msg api.Message) (uint16, error) {
        if c == nil {
-               return nil, errors.New("nil connection passed in")
+               return 0, errors.New("nil connection passed in")
        }
 
-       chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff)
-       ch := &channel{
-               id:                 chID,
-               replyTimeout:       defaultReplyTimeout,
-               msgDecoder:         c.codec,
-               msgIdentifier:      c,
-               reqChan:            make(chan *vppRequest, reqChanBufSize),
-               replyChan:          make(chan *vppReply, replyChanBufSize),
-               notifSubsChan:      make(chan *subscriptionRequest, reqChanBufSize),
-               notifSubsReplyChan: make(chan error, replyChanBufSize),
+       if msgID, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok {
+               return msgID, nil
        }
 
-       // store API channel within the client
-       c.channelsLock.Lock()
-       c.channels[chID] = ch
-       c.channelsLock.Unlock()
+       msgID, err := c.vppClient.GetMsgID(msg.GetMessageName(), msg.GetCrcString())
+       if err != nil {
+               return 0, err
+       }
 
-       // start watching on the request channel
-       go c.watchRequests(ch)
+       c.msgIDs[getMsgNameWithCrc(msg)] = msgID
+       c.msgMap[msgID] = msg
 
-       return ch, nil
+       return msgID, nil
 }
 
-// releaseAPIChannel releases API channel that needs to be closed.
-func (c *Connection) releaseAPIChannel(ch *channel) {
-       log.WithFields(logger.Fields{
-               "channel": ch.id,
-       }).Debug("API channel released")
+// LookupByID looks up message name and crc by ID.
+func (c *Connection) LookupByID(msgID uint16) (api.Message, error) {
+       if c == nil {
+               return nil, errors.New("nil connection passed in")
+       }
 
-       // delete the channel from channels map
-       c.channelsLock.Lock()
-       delete(c.channels, ch.id)
-       c.channelsLock.Unlock()
+       if msg, ok := c.msgMap[msgID]; ok {
+               return msg, nil
+       }
+
+       return nil, fmt.Errorf("unknown message ID: %d", msgID)
+}
+
+// retrieveMessageIDs retrieves IDs for all registered messages and stores them in map
+func (c *Connection) retrieveMessageIDs() (err error) {
+       t := time.Now()
+
+       msgs := api.GetRegisteredMessages()
+
+       var n int
+       for name, msg := range msgs {
+               msgID, err := c.GetMessageID(msg)
+               if err != nil {
+                       log.Debugf("retrieving msgID for %s failed: %v", name, err)
+                       continue
+               }
+               n++
+
+               if c.pingReqID == 0 && msg.GetMessageName() == msgControlPing.GetMessageName() {
+                       c.pingReqID = msgID
+                       msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+               } else if c.pingReplyID == 0 && msg.GetMessageName() == msgControlPingReply.GetMessageName() {
+                       c.pingReplyID = msgID
+                       msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+               }
+
+               if debugMsgIDs {
+                       log.Debugf("message %q (%s) has ID: %d", name, getMsgNameWithCrc(msg), msgID)
+               }
+       }
+       log.Debugf("retrieved %d/%d msgIDs (took %s)", n, len(msgs), time.Since(t))
+
+       return nil
 }