summaryrefslogtreecommitdiffstats
path: root/src/vlib
diff options
context:
space:
mode:
authorChristian E. Hopps <chopps@chopps.org>2019-09-28 21:36:36 -0400
committerAndrew Yourtchenko <ayourtch@gmail.com>2019-10-18 14:39:40 +0000
commitc518bd63c3c6823c9f1e45da3da938df560c7877 (patch)
treed097e2790a3c78a146c8936123bae1739334d33b /src/vlib
parentb8f61f4863c29cc59d71183c87a691dbf2b85a92 (diff)
vlib: move thread barrier around mod of global node next data
The old code modified the node next array prior to obtaining the thread barrier. Then it updated the runtime node data, and upon barrier release caused reforking of each worker thread. The reforking clones the main thread nodes and reconstructs the runtime node structure. This cloning is not 100% "deep" in the sense that the node next array is shared (i.e., only the pointer is copied). So prior to the barrier being obtained the node's next array is being changed while workers are actively using it (bad). Treating the node next array as read-only in the workers and sharing it is a decent optimization so instead of trying to fix that just move the barrier a little earlier in the process to protect the node next array as well. This was tripping an assert in next frame ownership change by way of the ip4-arp node. The assert verifies that the node's next array length is equal to the runtime next node count. The race above was lost and the node next array data was updated in the main thread while the arp code was still executing in a worker. This was being hit when many arp requests were being sent from both ends of a tunnel during which the add next node function was called, which often led to an assert b/c the next node array was out of sync with the runtime next node count. - PS#2 update - move barrier sync to just above code that modifies state. Ticket: VPP-1783 Type: fix Signed-off-by: Christian E. Hopps <chopps@chopps.org> Change-Id: I868784e28f994ee0922aaaae11c4894a3f4f1fe7 Signed-off-by: Christian E. Hopps <chopps@chopps.org> (cherry picked from commit d3122ef4ecfa9a515cc39c1632d29e43fa771b2a)
Diffstat (limited to 'src/vlib')
-rw-r--r--src/vlib/node.c11
1 files changed, 5 insertions, 6 deletions
diff --git a/src/vlib/node.c b/src/vlib/node.c
index 0e6038d01fa..b6a44b2e99d 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -128,10 +128,6 @@ vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index)
vlib_pending_frame_t *pf;
i32 i, j, n_insert;
- ASSERT (vlib_get_thread_index () == 0);
-
- vlib_worker_thread_barrier_sync (vm);
-
node = vec_elt (nm->nodes, node_index);
r = vlib_node_get_runtime (vm, node_index);
@@ -176,8 +172,6 @@ vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index)
nf->node_runtime_index = next_node->runtime_index;
vlib_worker_thread_node_runtime_update ();
-
- vlib_worker_thread_barrier_release (vm);
}
uword
@@ -210,6 +204,8 @@ vlib_node_add_next_with_slot (vlib_main_t * vm,
vlib_node_t *node, *next;
uword *p;
+ ASSERT (vlib_get_thread_index () == 0);
+
node = vec_elt (nm->nodes, node_index);
next = vec_elt (nm->nodes, next_node_index);
@@ -224,6 +220,8 @@ vlib_node_add_next_with_slot (vlib_main_t * vm,
return p[0];
}
+ vlib_worker_thread_barrier_sync (vm);
+
if (slot == ~0)
slot = vec_len (node->next_nodes);
@@ -254,6 +252,7 @@ vlib_node_add_next_with_slot (vlib_main_t * vm,
/* *INDENT-ON* */
}
+ vlib_worker_thread_barrier_release (vm);
return slot;
}