X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=core%2Fconnection.go;h=f796f3daa55fb5487e9094736e88527357f207ef;hb=a4112fac7b86fe09650d2bb57969fe46404edd7d;hp=0f54f384022f1815e276b8e0576c899f115c1d5e;hpb=0893e8cffa021073c9a75969db4d090c6ec80f62;p=govpp.git diff --git a/core/connection.go b/core/connection.go index 0f54f38..f796f3d 100644 --- a/core/connection.go +++ b/core/connection.go @@ -17,6 +17,7 @@ package core import ( "errors" "fmt" + "path" "reflect" "sync" "sync/atomic" @@ -101,15 +102,16 @@ type Connection struct { vppConnected uint32 // non-zero if the adapter is connected to VPP - connChan chan ConnectionEvent // connection status events are sent to this channel + connChan chan ConnectionEvent // connection status events are sent to this channel + healthCheckDone chan struct{} // used to terminate health check loop - codec MessageCodec // message codec - msgIDs map[string]uint16 // map of message IDs indexed by message name + CRC - msgMap map[uint16]api.Message // map of messages indexed by message ID + codec MessageCodec // message codec + msgIDs map[string]uint16 // map of message IDs indexed by message name + CRC + msgMapByPath map[string]map[uint16]api.Message // map of messages indexed by message ID which are indexed by path - maxChannelID uint32 // maximum used channel ID (the real limit is 2^15, 32-bit is used for atomic operations) - channelsLock sync.RWMutex // lock for the channels map - channels map[uint16]*Channel // map of all API channels indexed by the channel ID + channelsLock sync.RWMutex // lock for the channels map and the channel ID + nextChannelID uint16 // next potential channel ID (the real limit is 2^15) + channels map[uint16]*Channel // map of all API channels indexed by the channel ID subscriptionsLock sync.RWMutex // lock for the subscriptions map subscriptions map[uint16][]*subscriptionCtx // map od all notification subscriptions indexed by message ID @@ -122,6 +124,8 @@ type Connection struct { msgControlPing api.Message msgControlPingReply api.Message + + apiTrace *trace // API tracer (disabled by default) } func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration) *Connection { @@ -137,13 +141,18 @@ func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration) maxAttempts: attempts, recInterval: interval, connChan: make(chan ConnectionEvent, NotificationChanBufSize), + healthCheckDone: make(chan struct{}), codec: codec.DefaultCodec, msgIDs: make(map[string]uint16), - msgMap: make(map[uint16]api.Message), + msgMapByPath: make(map[string]map[uint16]api.Message), channels: make(map[uint16]*Channel), subscriptions: make(map[uint16][]*subscriptionCtx), msgControlPing: msgControlPing, msgControlPingReply: msgControlPingReply, + apiTrace: &trace{ + list: make([]*api.Record, 0), + mux: &sync.Mutex{}, + }, } binapi.SetMsgCallback(c.msgCallback) return c @@ -207,13 +216,18 @@ func (c *Connection) Disconnect() { return } if c.vppClient != nil { - c.disconnectVPP() + c.disconnectVPP(true) } } -// disconnectVPP disconnects from VPP in case it is connected. -func (c *Connection) disconnectVPP() { +// disconnectVPP disconnects from VPP in case it is connected. terminate tells +// that disconnectVPP() was called from Close(), so healthCheckLoop() can be +// terminated. +func (c *Connection) disconnectVPP(terminate bool) { if atomic.CompareAndSwapUint32(&c.vppConnected, 1, 0) { + if terminate { + close(c.healthCheckDone) + } log.Debug("Disconnecting from VPP..") if err := c.vppClient.Disconnect(); err != nil { @@ -238,14 +252,10 @@ func (c *Connection) newAPIChannel(reqChanBufSize, replyChanBufSize int) (*Chann return nil, errors.New("nil connection passed in") } - // create new channel - chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff) - channel := newChannel(chID, c, c.codec, c, reqChanBufSize, replyChanBufSize) - - // store API channel within the client - c.channelsLock.Lock() - c.channels[chID] = channel - c.channelsLock.Unlock() + channel, err := c.newChannel(reqChanBufSize, replyChanBufSize) + if err != nil { + return nil, err + } // start watching on the request channel go c.watchRequests(channel) @@ -302,6 +312,7 @@ func (c *Connection) healthCheckLoop() { log.Error("Failed to create health check API channel, health check will be disabled:", err) return } + defer ch.Close() var ( sinceLastReply time.Duration @@ -309,73 +320,74 @@ func (c *Connection) healthCheckLoop() { ) // send health check probes until an error or timeout occurs - for { - // sleep until next health check probe period - time.Sleep(HealthCheckProbeInterval) + probeInterval := time.NewTicker(HealthCheckProbeInterval) + defer probeInterval.Stop() - if atomic.LoadUint32(&c.vppConnected) == 0 { - // Disconnect has been called in the meantime, return the healthcheck - reconnect loop +HealthCheck: + for { + select { + case <-c.healthCheckDone: + // Terminate the health check loop on connection disconnect log.Debug("Disconnected on request, exiting health check loop.") return - } - - // try draining probe replies from previous request before sending next one - select { - case <-ch.replyChan: - log.Debug("drained old probe reply from reply channel") - default: - } + case <-probeInterval.C: + // try draining probe replies from previous request before sending next one + select { + case <-ch.replyChan: + log.Debug("drained old probe reply from reply channel") + default: + } - // send the control ping request - ch.reqChan <- &vppRequest{msg: c.msgControlPing} + // send the control ping request + ch.reqChan <- &vppRequest{msg: c.msgControlPing} - for { - // expect response within timeout period - select { - case vppReply := <-ch.replyChan: - err = vppReply.err + for { + // expect response within timeout period + select { + case vppReply := <-ch.replyChan: + err = vppReply.err - case <-time.After(HealthCheckReplyTimeout): - err = ErrProbeTimeout + case <-time.After(HealthCheckReplyTimeout): + err = ErrProbeTimeout - // check if time since last reply from any other - // channel is less than health check reply timeout - c.lastReplyLock.Lock() - sinceLastReply = time.Since(c.lastReply) - c.lastReplyLock.Unlock() + // check if time since last reply from any other + // channel is less than health check reply timeout + c.lastReplyLock.Lock() + sinceLastReply = time.Since(c.lastReply) + c.lastReplyLock.Unlock() - if sinceLastReply < HealthCheckReplyTimeout { - log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply) - continue + if sinceLastReply < HealthCheckReplyTimeout { + log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply) + continue + } } + break } - break - } - if err == ErrProbeTimeout { - failedChecks++ - log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks) - if failedChecks > HealthCheckThreshold { - // in case of exceeded failed check threshold, assume VPP unresponsive - log.Errorf("VPP does not responding, the health check exceeded threshold for timeouts (>%d)", HealthCheckThreshold) - c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: NotResponding}) - break + if err == ErrProbeTimeout { + failedChecks++ + log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks) + if failedChecks > HealthCheckThreshold { + // in case of exceeded failed check threshold, assume VPP unresponsive + log.Errorf("VPP does not responding, the health check exceeded threshold for timeouts (>%d)", HealthCheckThreshold) + c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: NotResponding}) + break HealthCheck + } + } else if err != nil { + // in case of error, assume VPP disconnected + log.Errorf("VPP health check probe failed: %v", err) + c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: Disconnected, Error: err}) + break HealthCheck + } else if failedChecks > 0 { + // in case of success after failed checks, clear failed check counter + failedChecks = 0 + log.Infof("VPP health check probe OK") } - } else if err != nil { - // in case of error, assume VPP disconnected - log.Errorf("VPP health check probe failed: %v", err) - c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: Disconnected, Error: err}) - break - } else if failedChecks > 0 { - // in case of success after failed checks, clear failed check counter - failedChecks = 0 - log.Infof("VPP health check probe OK") } } // cleanup - ch.Close() - c.disconnectVPP() + c.disconnectVPP(false) // we are now disconnected, start connect loop c.connectLoop() @@ -400,69 +412,74 @@ func (c *Connection) GetMessageID(msg api.Message) (uint16, error) { if c == nil { return 0, errors.New("nil connection passed in") } - - if msgID, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok { - return msgID, nil - } - + pkgPath := c.GetMessagePath(msg) msgID, err := c.vppClient.GetMsgID(msg.GetMessageName(), msg.GetCrcString()) if err != nil { return 0, err } - + if pathMsgs, pathOk := c.msgMapByPath[pkgPath]; !pathOk { + c.msgMapByPath[pkgPath] = make(map[uint16]api.Message) + c.msgMapByPath[pkgPath][msgID] = msg + } else if _, msgOk := pathMsgs[msgID]; !msgOk { + c.msgMapByPath[pkgPath][msgID] = msg + } + if _, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok { + return msgID, nil + } c.msgIDs[getMsgNameWithCrc(msg)] = msgID - c.msgMap[msgID] = msg - return msgID, nil } // LookupByID looks up message name and crc by ID. -func (c *Connection) LookupByID(msgID uint16) (api.Message, error) { +func (c *Connection) LookupByID(path string, msgID uint16) (api.Message, error) { if c == nil { return nil, errors.New("nil connection passed in") } - - if msg, ok := c.msgMap[msgID]; ok { + if msg, ok := c.msgMapByPath[path][msgID]; ok { return msg, nil } + return nil, fmt.Errorf("unknown message ID %d for path '%s'", msgID, path) +} - return nil, fmt.Errorf("unknown message ID: %d", msgID) +// GetMessagePath returns path for the given message +func (c *Connection) GetMessagePath(msg api.Message) string { + return path.Dir(reflect.TypeOf(msg).Elem().PkgPath()) } // retrieveMessageIDs retrieves IDs for all registered messages and stores them in map func (c *Connection) retrieveMessageIDs() (err error) { t := time.Now() - msgs := api.GetRegisteredMessages() + msgsByPath := api.GetRegisteredMessages() var n int - for name, msg := range msgs { - typ := reflect.TypeOf(msg).Elem() - path := fmt.Sprintf("%s.%s", typ.PkgPath(), typ.Name()) + for pkgPath, msgs := range msgsByPath { + for _, msg := range msgs { + msgID, err := c.GetMessageID(msg) + if err != nil { + if debugMsgIDs { + log.Debugf("retrieving message ID for %s.%s failed: %v", + pkgPath, msg.GetMessageName(), err) + } + continue + } + n++ + + if c.pingReqID == 0 && msg.GetMessageName() == c.msgControlPing.GetMessageName() { + c.pingReqID = msgID + c.msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) + } else if c.pingReplyID == 0 && msg.GetMessageName() == c.msgControlPingReply.GetMessageName() { + c.pingReplyID = msgID + c.msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) + } - msgID, err := c.GetMessageID(msg) - if err != nil { if debugMsgIDs { - log.Debugf("retrieving message ID for %s failed: %v", path, err) + log.Debugf("message %q (%s) has ID: %d", msg.GetMessageName(), getMsgNameWithCrc(msg), msgID) } - continue - } - n++ - - if c.pingReqID == 0 && msg.GetMessageName() == c.msgControlPing.GetMessageName() { - c.pingReqID = msgID - c.msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) - } else if c.pingReplyID == 0 && msg.GetMessageName() == c.msgControlPingReply.GetMessageName() { - c.pingReplyID = msgID - c.msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message) - } - - if debugMsgIDs { - log.Debugf("message %q (%s) has ID: %d", name, getMsgNameWithCrc(msg), msgID) } + log.WithField("took", time.Since(t)). + Debugf("retrieved IDs for %d messages (registered %d) from path %s", n, len(msgs), pkgPath) } - log.WithField("took", time.Since(t)). - Debugf("retrieved IDs for %d messages (registered %d)", n, len(msgs)) return nil } @@ -474,3 +491,24 @@ func (c *Connection) sendConnEvent(event ConnectionEvent) { log.Warn("Connection state channel is full, discarding value.") } } + +// Trace gives access to the API trace interface +func (c *Connection) Trace() api.Trace { + return c.apiTrace +} + +// trace records api message +func (c *Connection) trace(msg api.Message, chId uint16, t time.Time, isReceived bool) { + if atomic.LoadInt32(&c.apiTrace.isEnabled) == 0 { + return + } + entry := &api.Record{ + Message: msg, + Timestamp: t, + IsReceived: isReceived, + ChannelID: chId, + } + c.apiTrace.mux.Lock() + c.apiTrace.list = append(c.apiTrace.list, entry) + c.apiTrace.mux.Unlock() +}