From 945b7c7ae69c414ef851f85596be4edeb1d9290e Mon Sep 17 00:00:00 2001 From: Sergey Elantsev Date: Fri, 1 Apr 2022 22:43:40 +0300 Subject: fixed data race in core.Connection.Close(). The problem is triggered by the following events: 1. VPP stops. 2. core.Connection.healthCheckLoop() detects it and calls disconnectVPP(), which does close healthCheckDone channel for the first time. At this point Connection becomes unusable - re-entrance to connectLoop() will return immediately because of a closed healthCheckDone channel. But, the race is that connection may set c.vppConnected = 1 before terminating the connect loop. 3. User calls Connection.Close(), who seed c.vppConnected = 1, and tries to close already closed channel, which leads to a panic. This commit fixes this race by telling disconnectVPP() whether it is a terminal stop triggered via Close(), or a temporary one from connectLoop(). Change-Id: I555149da35ca3dc2b606bad59f2101266c0ef6b9 Signed-off-by: Sergey Elantsev --- core/connection.go | 14 +++++++++----- core/connection_test.go | 23 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) (limited to 'core') diff --git a/core/connection.go b/core/connection.go index 1bfcae5..f796f3d 100644 --- a/core/connection.go +++ b/core/connection.go @@ -216,14 +216,18 @@ func (c *Connection) Disconnect() { return } if c.vppClient != nil { - c.disconnectVPP() + c.disconnectVPP(true) } } -// disconnectVPP disconnects from VPP in case it is connected. -func (c *Connection) disconnectVPP() { +// disconnectVPP disconnects from VPP in case it is connected. terminate tells +// that disconnectVPP() was called from Close(), so healthCheckLoop() can be +// terminated. +func (c *Connection) disconnectVPP(terminate bool) { if atomic.CompareAndSwapUint32(&c.vppConnected, 1, 0) { - close(c.healthCheckDone) + if terminate { + close(c.healthCheckDone) + } log.Debug("Disconnecting from VPP..") if err := c.vppClient.Disconnect(); err != nil { @@ -383,7 +387,7 @@ HealthCheck: } // cleanup - c.disconnectVPP() + c.disconnectVPP(false) // we are now disconnected, start connect loop c.connectLoop() diff --git a/core/connection_test.go b/core/connection_test.go index 230eea5..81b145c 100644 --- a/core/connection_test.go +++ b/core/connection_test.go @@ -16,6 +16,7 @@ package core_test import ( "testing" + "time" . "github.com/onsi/gomega" @@ -91,6 +92,28 @@ func TestAsyncConnection(t *testing.T) { Expect(ev.State).Should(BeEquivalentTo(core.Connected)) } +func TestAsyncConnectionProcessesVppTimeout(t *testing.T) { + ctx := setupTest(t, false) + defer ctx.teardownTest() + + ctx.conn.Disconnect() + conn, statusChan, err := core.AsyncConnect(ctx.mockVpp, core.DefaultMaxReconnectAttempts, core.DefaultReconnectInterval) + ctx.conn = conn + + Expect(err).ShouldNot(HaveOccurred()) + Expect(conn).ShouldNot(BeNil()) + + ev := <-statusChan + Expect(ev.State).Should(BeEquivalentTo(core.Connected)) + + // make control ping reply fail so that connection.healthCheckLoop() + // initiates reconnection. + ctx.mockVpp.MockReply(&vpe.ControlPingReply{ + Retval: -1, + }) + time.Sleep(time.Duration(1+core.HealthCheckThreshold) * (core.HealthCheckInterval + 2*core.HealthCheckReplyTimeout)) +} + func TestCodec(t *testing.T) { RegisterTestingT(t) -- cgit 1.2.3-korg