aboutsummaryrefslogtreecommitdiffstats
path: root/core/connection.go
diff options
context:
space:
mode:
Diffstat (limited to 'core/connection.go')
-rw-r--r--core/connection.go258
1 files changed, 148 insertions, 110 deletions
diff --git a/core/connection.go b/core/connection.go
index 53a9acf..2c05333 100644
--- a/core/connection.go
+++ b/core/connection.go
@@ -17,6 +17,7 @@ package core
import (
"errors"
"fmt"
+ "path"
"reflect"
"sync"
"sync/atomic"
@@ -24,9 +25,9 @@ import (
logger "github.com/sirupsen/logrus"
- "git.fd.io/govpp.git/adapter"
- "git.fd.io/govpp.git/api"
- "git.fd.io/govpp.git/codec"
+ "go.fd.io/govpp/adapter"
+ "go.fd.io/govpp/api"
+ "go.fd.io/govpp/codec"
)
const (
@@ -42,8 +43,8 @@ var (
var (
HealthCheckProbeInterval = time.Second // default health check probe interval
- HealthCheckReplyTimeout = time.Millisecond * 100 // timeout for reply to a health check probe
- HealthCheckThreshold = 1 // number of failed health checks until the error is reported
+ HealthCheckReplyTimeout = time.Millisecond * 250 // timeout for reply to a health check probe
+ HealthCheckThreshold = 2 // number of failed health checks until the error is reported
DefaultReplyTimeout = time.Second // default timeout for replies from VPP
)
@@ -101,15 +102,16 @@ type Connection struct {
vppConnected uint32 // non-zero if the adapter is connected to VPP
- connChan chan ConnectionEvent // connection status events are sent to this channel
+ connChan chan ConnectionEvent // connection status events are sent to this channel
+ healthCheckDone chan struct{} // used to terminate health check loop
- codec MessageCodec // message codec
- msgIDs map[string]uint16 // map of message IDs indexed by message name + CRC
- msgMap map[uint16]api.Message // map of messages indexed by message ID
+ codec MessageCodec // message codec
+ msgIDs map[string]uint16 // map of message IDs indexed by message name + CRC
+ msgMapByPath map[string]map[uint16]api.Message // map of messages indexed by message ID which are indexed by path
- maxChannelID uint32 // maximum used channel ID (the real limit is 2^15, 32-bit is used for atomic operations)
- channelsLock sync.RWMutex // lock for the channels map
- channels map[uint16]*Channel // map of all API channels indexed by the channel ID
+ channelsLock sync.RWMutex // lock for the channels map and the channel ID
+ nextChannelID uint16 // next potential channel ID (the real limit is 2^15)
+ channels map[uint16]*Channel // map of all API channels indexed by the channel ID
subscriptionsLock sync.RWMutex // lock for the subscriptions map
subscriptions map[uint16][]*subscriptionCtx // map od all notification subscriptions indexed by message ID
@@ -122,6 +124,8 @@ type Connection struct {
msgControlPing api.Message
msgControlPingReply api.Message
+
+ apiTrace *trace // API tracer (disabled by default)
}
func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration) *Connection {
@@ -137,13 +141,18 @@ func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration)
maxAttempts: attempts,
recInterval: interval,
connChan: make(chan ConnectionEvent, NotificationChanBufSize),
+ healthCheckDone: make(chan struct{}),
codec: codec.DefaultCodec,
msgIDs: make(map[string]uint16),
- msgMap: make(map[uint16]api.Message),
+ msgMapByPath: make(map[string]map[uint16]api.Message),
channels: make(map[uint16]*Channel),
subscriptions: make(map[uint16][]*subscriptionCtx),
msgControlPing: msgControlPing,
msgControlPingReply: msgControlPingReply,
+ apiTrace: &trace{
+ list: make([]*api.Record, 0),
+ mux: &sync.Mutex{},
+ },
}
binapi.SetMsgCallback(c.msgCallback)
return c
@@ -207,13 +216,18 @@ func (c *Connection) Disconnect() {
return
}
if c.vppClient != nil {
- c.disconnectVPP()
+ c.disconnectVPP(true)
}
}
-// disconnectVPP disconnects from VPP in case it is connected.
-func (c *Connection) disconnectVPP() {
+// disconnectVPP disconnects from VPP in case it is connected. terminate tells
+// that disconnectVPP() was called from Close(), so healthCheckLoop() can be
+// terminated.
+func (c *Connection) disconnectVPP(terminate bool) {
if atomic.CompareAndSwapUint32(&c.vppConnected, 1, 0) {
+ if terminate {
+ close(c.healthCheckDone)
+ }
log.Debug("Disconnecting from VPP..")
if err := c.vppClient.Disconnect(); err != nil {
@@ -238,14 +252,10 @@ func (c *Connection) newAPIChannel(reqChanBufSize, replyChanBufSize int) (*Chann
return nil, errors.New("nil connection passed in")
}
- // create new channel
- chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff)
- channel := newChannel(chID, c, c.codec, c, reqChanBufSize, replyChanBufSize)
-
- // store API channel within the client
- c.channelsLock.Lock()
- c.channels[chID] = channel
- c.channelsLock.Unlock()
+ channel, err := c.newChannel(reqChanBufSize, replyChanBufSize)
+ if err != nil {
+ return nil, err
+ }
// start watching on the request channel
go c.watchRequests(channel)
@@ -302,6 +312,7 @@ func (c *Connection) healthCheckLoop() {
log.Error("Failed to create health check API channel, health check will be disabled:", err)
return
}
+ defer ch.Close()
var (
sinceLastReply time.Duration
@@ -309,73 +320,74 @@ func (c *Connection) healthCheckLoop() {
)
// send health check probes until an error or timeout occurs
- for {
- // sleep until next health check probe period
- time.Sleep(HealthCheckProbeInterval)
+ probeInterval := time.NewTicker(HealthCheckProbeInterval)
+ defer probeInterval.Stop()
- if atomic.LoadUint32(&c.vppConnected) == 0 {
- // Disconnect has been called in the meantime, return the healthcheck - reconnect loop
+HealthCheck:
+ for {
+ select {
+ case <-c.healthCheckDone:
+ // Terminate the health check loop on connection disconnect
log.Debug("Disconnected on request, exiting health check loop.")
return
- }
-
- // try draining probe replies from previous request before sending next one
- select {
- case <-ch.replyChan:
- log.Debug("drained old probe reply from reply channel")
- default:
- }
+ case <-probeInterval.C:
+ // try draining probe replies from previous request before sending next one
+ select {
+ case <-ch.replyChan:
+ log.Debug("drained old probe reply from reply channel")
+ default:
+ }
- // send the control ping request
- ch.reqChan <- &vppRequest{msg: c.msgControlPing}
+ // send the control ping request
+ ch.reqChan <- &vppRequest{msg: c.msgControlPing}
- for {
- // expect response within timeout period
- select {
- case vppReply := <-ch.replyChan:
- err = vppReply.err
+ for {
+ // expect response within timeout period
+ select {
+ case vppReply := <-ch.replyChan:
+ err = vppReply.err
- case <-time.After(HealthCheckReplyTimeout):
- err = ErrProbeTimeout
+ case <-time.After(HealthCheckReplyTimeout):
+ err = ErrProbeTimeout
- // check if time since last reply from any other
- // channel is less than health check reply timeout
- c.lastReplyLock.Lock()
- sinceLastReply = time.Since(c.lastReply)
- c.lastReplyLock.Unlock()
+ // check if time since last reply from any other
+ // channel is less than health check reply timeout
+ c.lastReplyLock.Lock()
+ sinceLastReply = time.Since(c.lastReply)
+ c.lastReplyLock.Unlock()
- if sinceLastReply < HealthCheckReplyTimeout {
- log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply)
- continue
+ if sinceLastReply < HealthCheckReplyTimeout {
+ log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply)
+ continue
+ }
}
+ break
}
- break
- }
- if err == ErrProbeTimeout {
- failedChecks++
- log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks)
- if failedChecks > HealthCheckThreshold {
- // in case of exceeded failed check threshold, assume VPP unresponsive
- log.Errorf("VPP does not responding, the health check exceeded threshold for timeouts (>%d)", HealthCheckThreshold)
- c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: NotResponding})
- break
+ if err == ErrProbeTimeout {
+ failedChecks++
+ log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks)
+ if failedChecks > HealthCheckThreshold {
+ // in case of exceeded failed check threshold, assume VPP unresponsive
+ log.Errorf("VPP does not responding, the health check exceeded threshold for timeouts (>%d)", HealthCheckThreshold)
+ c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: NotResponding})
+ break HealthCheck
+ }
+ } else if err != nil {
+ // in case of error, assume VPP disconnected
+ log.Errorf("VPP health check probe failed: %v", err)
+ c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: Disconnected, Error: err})
+ break HealthCheck
+ } else if failedChecks > 0 {
+ // in case of success after failed checks, clear failed check counter
+ failedChecks = 0
+ log.Infof("VPP health check probe OK")
}
- } else if err != nil {
- // in case of error, assume VPP disconnected
- log.Errorf("VPP health check probe failed: %v", err)
- c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: Disconnected, Error: err})
- break
- } else if failedChecks > 0 {
- // in case of success after failed checks, clear failed check counter
- failedChecks = 0
- log.Infof("VPP health check probe OK")
}
}
// cleanup
- ch.Close()
- c.disconnectVPP()
+ c.disconnectVPP(false)
// we are now disconnected, start connect loop
c.connectLoop()
@@ -400,69 +412,74 @@ func (c *Connection) GetMessageID(msg api.Message) (uint16, error) {
if c == nil {
return 0, errors.New("nil connection passed in")
}
-
- if msgID, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok {
- return msgID, nil
- }
-
+ pkgPath := c.GetMessagePath(msg)
msgID, err := c.vppClient.GetMsgID(msg.GetMessageName(), msg.GetCrcString())
if err != nil {
return 0, err
}
-
+ if pathMsgs, pathOk := c.msgMapByPath[pkgPath]; !pathOk {
+ c.msgMapByPath[pkgPath] = make(map[uint16]api.Message)
+ c.msgMapByPath[pkgPath][msgID] = msg
+ } else if _, msgOk := pathMsgs[msgID]; !msgOk {
+ c.msgMapByPath[pkgPath][msgID] = msg
+ }
+ if _, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok {
+ return msgID, nil
+ }
c.msgIDs[getMsgNameWithCrc(msg)] = msgID
- c.msgMap[msgID] = msg
-
return msgID, nil
}
// LookupByID looks up message name and crc by ID.
-func (c *Connection) LookupByID(msgID uint16) (api.Message, error) {
+func (c *Connection) LookupByID(path string, msgID uint16) (api.Message, error) {
if c == nil {
return nil, errors.New("nil connection passed in")
}
-
- if msg, ok := c.msgMap[msgID]; ok {
+ if msg, ok := c.msgMapByPath[path][msgID]; ok {
return msg, nil
}
+ return nil, fmt.Errorf("unknown message ID %d for path '%s'", msgID, path)
+}
- return nil, fmt.Errorf("unknown message ID: %d", msgID)
+// GetMessagePath returns path for the given message
+func (c *Connection) GetMessagePath(msg api.Message) string {
+ return path.Dir(reflect.TypeOf(msg).Elem().PkgPath())
}
// retrieveMessageIDs retrieves IDs for all registered messages and stores them in map
func (c *Connection) retrieveMessageIDs() (err error) {
t := time.Now()
- msgs := api.GetRegisteredMessages()
+ msgsByPath := api.GetRegisteredMessages()
var n int
- for name, msg := range msgs {
- typ := reflect.TypeOf(msg).Elem()
- path := fmt.Sprintf("%s.%s", typ.PkgPath(), typ.Name())
+ for pkgPath, msgs := range msgsByPath {
+ for _, msg := range msgs {
+ msgID, err := c.GetMessageID(msg)
+ if err != nil {
+ if debugMsgIDs {
+ log.Debugf("retrieving message ID for %s.%s failed: %v",
+ pkgPath, msg.GetMessageName(), err)
+ }
+ continue
+ }
+ n++
+
+ if c.pingReqID == 0 && msg.GetMessageName() == c.msgControlPing.GetMessageName() {
+ c.pingReqID = msgID
+ c.msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+ } else if c.pingReplyID == 0 && msg.GetMessageName() == c.msgControlPingReply.GetMessageName() {
+ c.pingReplyID = msgID
+ c.msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+ }
- msgID, err := c.GetMessageID(msg)
- if err != nil {
if debugMsgIDs {
- log.Debugf("retrieving message ID for %s failed: %v", path, err)
+ log.Debugf("message %q (%s) has ID: %d", msg.GetMessageName(), getMsgNameWithCrc(msg), msgID)
}
- continue
- }
- n++
-
- if c.pingReqID == 0 && msg.GetMessageName() == c.msgControlPing.GetMessageName() {
- c.pingReqID = msgID
- c.msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
- } else if c.pingReplyID == 0 && msg.GetMessageName() == c.msgControlPingReply.GetMessageName() {
- c.pingReplyID = msgID
- c.msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
- }
-
- if debugMsgIDs {
- log.Debugf("message %q (%s) has ID: %d", name, getMsgNameWithCrc(msg), msgID)
}
+ log.WithField("took", time.Since(t)).
+ Debugf("retrieved IDs for %d messages (registered %d) from path %s", n, len(msgs), pkgPath)
}
- log.WithField("took", time.Since(t)).
- Debugf("retrieved IDs for %d messages (registered %d)", n, len(msgs))
return nil
}
@@ -474,3 +491,24 @@ func (c *Connection) sendConnEvent(event ConnectionEvent) {
log.Warn("Connection state channel is full, discarding value.")
}
}
+
+// Trace gives access to the API trace interface
+func (c *Connection) Trace() api.Trace {
+ return c.apiTrace
+}
+
+// trace records api message
+func (c *Connection) trace(msg api.Message, chId uint16, t time.Time, isReceived bool) {
+ if atomic.LoadInt32(&c.apiTrace.isEnabled) == 0 {
+ return
+ }
+ entry := &api.Record{
+ Message: msg,
+ Timestamp: t,
+ IsReceived: isReceived,
+ ChannelID: chId,
+ }
+ c.apiTrace.mux.Lock()
+ c.apiTrace.list = append(c.apiTrace.list, entry)
+ c.apiTrace.mux.Unlock()
+}