1 files changed, 148 insertions, 110 deletions
diff --git a/core/connection.go b/core/connection.go
index 53a9acf..2c05333 100644
--- a/core/connection.go
+++ b/core/connection.go
@@ -17,6 +17,7 @@ package core
 import (
 	"errors"
 	"fmt"
+	"path"
 	"reflect"
 	"sync"
 	"sync/atomic"
@@ -24,9 +25,9 @@ import (
 
 	logger "github.com/sirupsen/logrus"
 
-	"git.fd.io/govpp.git/adapter"
-	"git.fd.io/govpp.git/api"
-	"git.fd.io/govpp.git/codec"
+	"go.fd.io/govpp/adapter"
+	"go.fd.io/govpp/api"
+	"go.fd.io/govpp/codec"
 )
 
 const (
@@ -42,8 +43,8 @@ var (
 
 var (
 	HealthCheckProbeInterval = time.Second            // default health check probe interval
-	HealthCheckReplyTimeout  = time.Millisecond * 100 // timeout for reply to a health check probe
-	HealthCheckThreshold     = 1                      // number of failed health checks until the error is reported
+	HealthCheckReplyTimeout  = time.Millisecond * 250 // timeout for reply to a health check probe
+	HealthCheckThreshold     = 2                      // number of failed health checks until the error is reported
 	DefaultReplyTimeout      = time.Second            // default timeout for replies from VPP
 )
 
@@ -101,15 +102,16 @@ type Connection struct {
 
 	vppConnected uint32 // non-zero if the adapter is connected to VPP
 
-	connChan chan ConnectionEvent // connection status events are sent to this channel
+	connChan        chan ConnectionEvent // connection status events are sent to this channel
+	healthCheckDone chan struct{}        // used to terminate health check loop
 
-	codec  MessageCodec           // message codec
-	msgIDs map[string]uint16      // map of message IDs indexed by message name + CRC
-	msgMap map[uint16]api.Message // map of messages indexed by message ID
+	codec        MessageCodec                      // message codec
+	msgIDs       map[string]uint16                 // map of message IDs indexed by message name + CRC
+	msgMapByPath map[string]map[uint16]api.Message // map of messages indexed by message ID which are indexed by path
 
-	maxChannelID uint32              // maximum used channel ID (the real limit is 2^15, 32-bit is used for atomic operations)
-	channelsLock sync.RWMutex        // lock for the channels map
-	channels     map[uint16]*Channel // map of all API channels indexed by the channel ID
+	channelsLock  sync.RWMutex        // lock for the channels map and the channel ID
+	nextChannelID uint16              // next potential channel ID (the real limit is 2^15)
+	channels      map[uint16]*Channel // map of all API channels indexed by the channel ID
 
 	subscriptionsLock sync.RWMutex                  // lock for the subscriptions map
 	subscriptions     map[uint16][]*subscriptionCtx // map od all notification subscriptions indexed by message ID
@@ -122,6 +124,8 @@ type Connection struct {
 
 	msgControlPing      api.Message
 	msgControlPingReply api.Message
+
+	apiTrace *trace // API tracer (disabled by default)
 }
 
 func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration) *Connection {
@@ -137,13 +141,18 @@ func newConnection(binapi adapter.VppAPI, attempts int, interval time.Duration)
 		maxAttempts:         attempts,
 		recInterval:         interval,
 		connChan:            make(chan ConnectionEvent, NotificationChanBufSize),
+		healthCheckDone:     make(chan struct{}),
 		codec:               codec.DefaultCodec,
 		msgIDs:              make(map[string]uint16),
-		msgMap:              make(map[uint16]api.Message),
+		msgMapByPath:        make(map[string]map[uint16]api.Message),
 		channels:            make(map[uint16]*Channel),
 		subscriptions:       make(map[uint16][]*subscriptionCtx),
 		msgControlPing:      msgControlPing,
 		msgControlPingReply: msgControlPingReply,
+		apiTrace: &trace{
+			list: make([]*api.Record, 0),
+			mux:  &sync.Mutex{},
+		},
 	}
 	binapi.SetMsgCallback(c.msgCallback)
 	return c
@@ -207,13 +216,18 @@ func (c *Connection) Disconnect() {
 		return
 	}
 	if c.vppClient != nil {
-		c.disconnectVPP()
+		c.disconnectVPP(true)
 	}
 }
 
-// disconnectVPP disconnects from VPP in case it is connected.
-func (c *Connection) disconnectVPP() {
+// disconnectVPP disconnects from VPP in case it is connected. terminate tells
+// that disconnectVPP() was called from Close(), so healthCheckLoop() can be
+// terminated.
+func (c *Connection) disconnectVPP(terminate bool) {
 	if atomic.CompareAndSwapUint32(&c.vppConnected, 1, 0) {
+		if terminate {
+			close(c.healthCheckDone)
+		}
 		log.Debug("Disconnecting from VPP..")
 
 		if err := c.vppClient.Disconnect(); err != nil {
@@ -238,14 +252,10 @@ func (c *Connection) newAPIChannel(reqChanBufSize, replyChanBufSize int) (*Chann
 		return nil, errors.New("nil connection passed in")
 	}
 
-	// create new channel
-	chID := uint16(atomic.AddUint32(&c.maxChannelID, 1) & 0x7fff)
-	channel := newChannel(chID, c, c.codec, c, reqChanBufSize, replyChanBufSize)
-
-	// store API channel within the client
-	c.channelsLock.Lock()
-	c.channels[chID] = channel
-	c.channelsLock.Unlock()
+	channel, err := c.newChannel(reqChanBufSize, replyChanBufSize)
+	if err != nil {
+		return nil, err
+	}
 
 	// start watching on the request channel
 	go c.watchRequests(channel)
@@ -302,6 +312,7 @@ func (c *Connection) healthCheckLoop() {
 		log.Error("Failed to create health check API channel, health check will be disabled:", err)
 		return
 	}
+	defer ch.Close()
 
 	var (
 		sinceLastReply time.Duration
@@ -309,73 +320,74 @@ func (c *Connection) healthCheckLoop() {
 	)
 
 	// send health check probes until an error or timeout occurs
-	for {
-		// sleep until next health check probe period
-		time.Sleep(HealthCheckProbeInterval)
+	probeInterval := time.NewTicker(HealthCheckProbeInterval)
+	defer probeInterval.Stop()
 
-		if atomic.LoadUint32(&c.vppConnected) == 0 {
-			// Disconnect has been called in the meantime, return the healthcheck - reconnect loop
+HealthCheck:
+	for {
+		select {
+		case <-c.healthCheckDone:
+			// Terminate the health check loop on connection disconnect
 			log.Debug("Disconnected on request, exiting health check loop.")
 			return
-		}
-
-		// try draining probe replies from previous request before sending next one
-		select {
-		case <-ch.replyChan:
-			log.Debug("drained old probe reply from reply channel")
-		default:
-		}
+		case <-probeInterval.C:
+			// try draining probe replies from previous request before sending next one
+			select {
+			case <-ch.replyChan:
+				log.Debug("drained old probe reply from reply channel")
+			default:
+			}
 
-		// send the control ping request
-		ch.reqChan <- &vppRequest{msg: c.msgControlPing}
+			// send the control ping request
+			ch.reqChan <- &vppRequest{msg: c.msgControlPing}
 
-		for {
-			// expect response within timeout period
-			select {
-			case vppReply := <-ch.replyChan:
-				err = vppReply.err
+			for {
+				// expect response within timeout period
+				select {
+				case vppReply := <-ch.replyChan:
+					err = vppReply.err
 
-			case <-time.After(HealthCheckReplyTimeout):
-				err = ErrProbeTimeout
+				case <-time.After(HealthCheckReplyTimeout):
+					err = ErrProbeTimeout
 
-				// check if time since last reply from any other
-				// channel is less than health check reply timeout
-				c.lastReplyLock.Lock()
-				sinceLastReply = time.Since(c.lastReply)
-				c.lastReplyLock.Unlock()
+					// check if time since last reply from any other
+					// channel is less than health check reply timeout
+					c.lastReplyLock.Lock()
+					sinceLastReply = time.Since(c.lastReply)
+					c.lastReplyLock.Unlock()
 
-				if sinceLastReply < HealthCheckReplyTimeout {
-					log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply)
-					continue
+					if sinceLastReply < HealthCheckReplyTimeout {
+						log.Warnf("VPP health check probe timing out, but some request on other channel was received %v ago, continue waiting!", sinceLastReply)
+						continue
+					}
 				}
+				break
 			}
-			break
-		}
 
-		if err == ErrProbeTimeout {
-			failedChecks++
-			log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks)
-			if failedChecks > HealthCheckThreshold {
-				// in case of exceeded failed check threshold, assume VPP unresponsive
-				log.Errorf("VPP does not responding, the health check exceeded threshold for timeouts (>%d)", HealthCheckThreshold)
-				c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: NotResponding})
-				break
+			if err == ErrProbeTimeout {
+				failedChecks++
+				log.Warnf("VPP health check probe timed out after %v (%d. timeout)", HealthCheckReplyTimeout, failedChecks)
+				if failedChecks > HealthCheckThreshold {
+					// in case of exceeded failed check threshold, assume VPP unresponsive
+					log.Errorf("VPP does not responding, the health check exceeded threshold for timeouts (>%d)", HealthCheckThreshold)
+					c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: NotResponding})
+					break HealthCheck
+				}
+			} else if err != nil {
+				// in case of error, assume VPP disconnected
+				log.Errorf("VPP health check probe failed: %v", err)
+				c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: Disconnected, Error: err})
+				break HealthCheck
+			} else if failedChecks > 0 {
+				// in case of success after failed checks, clear failed check counter
+				failedChecks = 0
+				log.Infof("VPP health check probe OK")
 			}
-		} else if err != nil {
-			// in case of error, assume VPP disconnected
-			log.Errorf("VPP health check probe failed: %v", err)
-			c.sendConnEvent(ConnectionEvent{Timestamp: time.Now(), State: Disconnected, Error: err})
-			break
-		} else if failedChecks > 0 {
-			// in case of success after failed checks, clear failed check counter
-			failedChecks = 0
-			log.Infof("VPP health check probe OK")
 		}
 	}
 
 	// cleanup
-	ch.Close()
-	c.disconnectVPP()
+	c.disconnectVPP(false)
 
 	// we are now disconnected, start connect loop
 	c.connectLoop()
@@ -400,69 +412,74 @@ func (c *Connection) GetMessageID(msg api.Message) (uint16, error) {
 	if c == nil {
 		return 0, errors.New("nil connection passed in")
 	}
-
-	if msgID, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok {
-		return msgID, nil
-	}
-
+	pkgPath := c.GetMessagePath(msg)
 	msgID, err := c.vppClient.GetMsgID(msg.GetMessageName(), msg.GetCrcString())
 	if err != nil {
 		return 0, err
 	}
-
+	if pathMsgs, pathOk := c.msgMapByPath[pkgPath]; !pathOk {
+		c.msgMapByPath[pkgPath] = make(map[uint16]api.Message)
+		c.msgMapByPath[pkgPath][msgID] = msg
+	} else if _, msgOk := pathMsgs[msgID]; !msgOk {
+		c.msgMapByPath[pkgPath][msgID] = msg
+	}
+	if _, ok := c.msgIDs[getMsgNameWithCrc(msg)]; ok {
+		return msgID, nil
+	}
 	c.msgIDs[getMsgNameWithCrc(msg)] = msgID
-	c.msgMap[msgID] = msg
-
 	return msgID, nil
 }
 
 // LookupByID looks up message name and crc by ID.
-func (c *Connection) LookupByID(msgID uint16) (api.Message, error) {
+func (c *Connection) LookupByID(path string, msgID uint16) (api.Message, error) {
 	if c == nil {
 		return nil, errors.New("nil connection passed in")
 	}
-
-	if msg, ok := c.msgMap[msgID]; ok {
+	if msg, ok := c.msgMapByPath[path][msgID]; ok {
 		return msg, nil
 	}
+	return nil, fmt.Errorf("unknown message ID %d for path '%s'", msgID, path)
+}
 
-	return nil, fmt.Errorf("unknown message ID: %d", msgID)
+// GetMessagePath returns path for the given message
+func (c *Connection) GetMessagePath(msg api.Message) string {
+	return path.Dir(reflect.TypeOf(msg).Elem().PkgPath())
 }
 
 // retrieveMessageIDs retrieves IDs for all registered messages and stores them in map
 func (c *Connection) retrieveMessageIDs() (err error) {
 	t := time.Now()
 
-	msgs := api.GetRegisteredMessages()
+	msgsByPath := api.GetRegisteredMessages()
 
 	var n int
-	for name, msg := range msgs {
-		typ := reflect.TypeOf(msg).Elem()
-		path := fmt.Sprintf("%s.%s", typ.PkgPath(), typ.Name())
+	for pkgPath, msgs := range msgsByPath {
+		for _, msg := range msgs {
+			msgID, err := c.GetMessageID(msg)
+			if err != nil {
+				if debugMsgIDs {
+					log.Debugf("retrieving message ID for %s.%s failed: %v",
+						pkgPath, msg.GetMessageName(), err)
+				}
+				continue
+			}
+			n++
+
+			if c.pingReqID == 0 && msg.GetMessageName() == c.msgControlPing.GetMessageName() {
+				c.pingReqID = msgID
+				c.msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+			} else if c.pingReplyID == 0 && msg.GetMessageName() == c.msgControlPingReply.GetMessageName() {
+				c.pingReplyID = msgID
+				c.msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
+			}
 
-		msgID, err := c.GetMessageID(msg)
-		if err != nil {
 			if debugMsgIDs {
-				log.Debugf("retrieving message ID for %s failed: %v", path, err)
+				log.Debugf("message %q (%s) has ID: %d", msg.GetMessageName(), getMsgNameWithCrc(msg), msgID)
 			}
-			continue
-		}
-		n++
-
-		if c.pingReqID == 0 && msg.GetMessageName() == c.msgControlPing.GetMessageName() {
-			c.pingReqID = msgID
-			c.msgControlPing = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
-		} else if c.pingReplyID == 0 && msg.GetMessageName() == c.msgControlPingReply.GetMessageName() {
-			c.pingReplyID = msgID
-			c.msgControlPingReply = reflect.New(reflect.TypeOf(msg).Elem()).Interface().(api.Message)
-		}
-
-		if debugMsgIDs {
-			log.Debugf("message %q (%s) has ID: %d", name, getMsgNameWithCrc(msg), msgID)
 		}
+		log.WithField("took", time.Since(t)).
+			Debugf("retrieved IDs for %d messages (registered %d) from path %s", n, len(msgs), pkgPath)
 	}
-	log.WithField("took", time.Since(t)).
-		Debugf("retrieved IDs for %d messages (registered %d)", n, len(msgs))
 
 	return nil
 }
@@ -474,3 +491,24 @@ func (c *Connection) sendConnEvent(event ConnectionEvent) {
 		log.Warn("Connection state channel is full, discarding value.")
 	}
 }
+
+// Trace gives access to the API trace interface
+func (c *Connection) Trace() api.Trace {
+	return c.apiTrace
+}
+
+// trace records api message
+func (c *Connection) trace(msg api.Message, chId uint16, t time.Time, isReceived bool) {
+	if atomic.LoadInt32(&c.apiTrace.isEnabled) == 0 {
+		return
+	}
+	entry := &api.Record{
+		Message:    msg,
+		Timestamp:  t,
+		IsReceived: isReceived,
+		ChannelID:  chId,
+	}
+	c.apiTrace.mux.Lock()
+	c.apiTrace.list = append(c.apiTrace.list, entry)
+	c.apiTrace.mux.Unlock()
+}