summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeale Ranns <nranns@cisco.com>2016-12-07 15:38:14 +0000
committerDamjan Marion <dmarion.lists@gmail.com>2016-12-07 17:17:30 +0000
commit19c68d2a02770189d32ead3ff964efc073db630a (patch)
tree3a985d73e731c776b76dc72c28c5d38865c9b848
parent9677a944d5188f10188657bf627301b239fe1da7 (diff)
Prevent re-entrant walks on an adjacency.
The re-entrant walks were caused when the walk from an IP adj updated a fib_netry with an MPLS adj which in turn triggers a walk of the IP adj. Re-entrant walks do unnecessary work. also fixed a walk merge issue where the encountered walk should only be checked for equivalence woth the most recent alk, not any in the list. Otherwise an UO,DOWN,UP beceoms (2*)UP,DOWN Change-Id: Ib8b27f055dc6c1366d33740276d1c26cd314220a Signed-off-by: Neale Ranns <nranns@cisco.com>
-rw-r--r--vnet/vnet/adj/adj.c1
-rw-r--r--vnet/vnet/adj/adj_nbr.c53
-rw-r--r--vnet/vnet/ethernet/arp.c15
-rw-r--r--vnet/vnet/fib/fib_walk.c97
-rw-r--r--vnet/vnet/ip/ip6_neighbor.c14
-rw-r--r--vnet/vnet/ip/lookup.h17
6 files changed, 140 insertions, 57 deletions
diff --git a/vnet/vnet/adj/adj.c b/vnet/vnet/adj/adj.c
index 2741c885cee..e740c4cb79b 100644
--- a/vnet/vnet/adj/adj.c
+++ b/vnet/vnet/adj/adj.c
@@ -66,6 +66,7 @@ adj_alloc (fib_protocol_t proto)
fib_node_init(&adj->ia_node,
FIB_NODE_TYPE_ADJ);
adj->ia_nh_proto = proto;
+ adj->ia_flags = 0;
ip4_main.lookup_main.adjacency_heap = adj_pool;
ip6_main.lookup_main.adjacency_heap = adj_pool;
diff --git a/vnet/vnet/adj/adj_nbr.c b/vnet/vnet/adj/adj_nbr.c
index 95d1254a8c4..1344bb67fcc 100644
--- a/vnet/vnet/adj/adj_nbr.c
+++ b/vnet/vnet/adj/adj_nbr.c
@@ -297,9 +297,11 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
u32 next_node,
u8 *rewrite)
{
+ ip_adjacency_t *walk_adj;
adj_index_t walk_ai;
vlib_main_t * vm;
u32 old_next;
+ int do_walk;
vm = vlib_get_main();
old_next = adj->lookup_next_index;
@@ -319,6 +321,39 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
}
/*
+ * Don't call the walk re-entrantly
+ */
+ if (ADJ_INDEX_INVALID != walk_ai)
+ {
+ walk_adj = adj_get(walk_ai);
+ if (IP_ADJ_SYNC_WALK_ACTIVE & walk_adj->ia_flags)
+ {
+ do_walk = 0;
+ }
+ else
+ {
+ /*
+ * Prevent re-entrant walk of the same adj
+ */
+ walk_adj->ia_flags |= IP_ADJ_SYNC_WALK_ACTIVE;
+ do_walk = 1;
+ }
+ }
+ else
+ {
+ do_walk = 0;
+ }
+
+ /*
+ * lock the adjacencies that are affected by updates this walk will provoke.
+ * Since the aim of the walk is to update children to link to a different
+ * DPO, this adj will no longer be in use and its lock count will drop to 0.
+ * We don't want it to be deleted as part of this endevour.
+ */
+ adj_lock(adj_get_index(adj));
+ adj_lock(walk_ai);
+
+ /*
* Updating a rewrite string is not atomic;
* - the rewrite string is too long to write in one instruction
* - when swapping from incomplete to complete, we also need to update
@@ -346,7 +381,8 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
* 1 - mac change
* 2 - adj type change
*/
- if (old_next != adj_next_index &&
+ if (do_walk &&
+ old_next != adj_next_index &&
ADJ_INDEX_INVALID != walk_ai)
{
/*
@@ -407,8 +443,9 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
*/
vlib_worker_thread_barrier_release(vm);
- if (old_next != adj->lookup_next_index &&
- ADJ_INDEX_INVALID != walk_ai)
+ if (do_walk &&
+ (old_next != adj->lookup_next_index) &&
+ (ADJ_INDEX_INVALID != walk_ai))
{
/*
* backwalk to the children so they can stack on the now updated
@@ -420,6 +457,16 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
}
+ /*
+ * Prevent re-entrant walk of the same adj
+ */
+ if (do_walk)
+ {
+ walk_adj->ia_flags &= ~IP_ADJ_SYNC_WALK_ACTIVE;
+ }
+
+ adj_unlock(adj_get_index(adj));
+ adj_unlock(walk_ai);
}
typedef struct adj_db_count_ctx_t_ {
diff --git a/vnet/vnet/ethernet/arp.c b/vnet/vnet/ethernet/arp.c
index ec138586ff4..d020c8a55d8 100644
--- a/vnet/vnet/ethernet/arp.c
+++ b/vnet/vnet/ethernet/arp.c
@@ -374,13 +374,15 @@ arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
}
static void
-arp_mk_incomplete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
+arp_mk_incomplete (adj_index_t ai)
{
+ ip_adjacency_t *adj = adj_get (ai);
+
adj_nbr_update_rewrite
(ai,
ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
ethernet_build_rewrite (vnet_get_main (),
- e->sw_if_index,
+ adj->rewrite_header.sw_if_index,
VNET_LINK_ARP,
VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
}
@@ -417,9 +419,7 @@ arp_mk_complete_walk (adj_index_t ai, void *ctx)
static adj_walk_rc_t
arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
{
- ethernet_arp_ip4_entry_t *e = ctx;
-
- arp_mk_incomplete (ai, e);
+ arp_mk_incomplete (ai);
return (ADJ_WALK_RC_CONTINUE);
}
@@ -1620,9 +1620,10 @@ vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
if (NULL != e)
{
- adj_nbr_walk_nh4 (e->sw_if_index,
- &e->ip4_address, arp_mk_incomplete_walk, e);
arp_entry_free (eai, e);
+
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_incomplete_walk, NULL);
}
return 0;
diff --git a/vnet/vnet/fib/fib_walk.c b/vnet/vnet/fib/fib_walk.c
index e7376bf47a2..938f7b8c1c6 100644
--- a/vnet/vnet/fib/fib_walk.c
+++ b/vnet/vnet/fib/fib_walk.c
@@ -177,6 +177,7 @@ static u64 fib_walk_hist_vists_per_walk[HISTOGRAM_VISITS_PER_WALK_N_BUCKETS];
* @brief History of state for the last 128 walks
*/
#define HISTORY_N_WALKS 128
+#define MAX_HISTORY_REASONS 16
static u32 history_last_walk_pos;
typedef struct fib_walk_history_t_ {
u32 fwh_n_visits;
@@ -184,7 +185,7 @@ typedef struct fib_walk_history_t_ {
f64 fwh_completed;
fib_node_ptr_t fwh_parent;
fib_walk_flags_t fwh_flags;
- fib_node_bw_reason_flag_t fwh_reason;
+ fib_node_bw_reason_flag_t fwh_reason[MAX_HISTORY_REASONS];
} fib_walk_history_t;
static fib_walk_history_t fib_walk_history[HISTORY_N_WALKS];
@@ -241,8 +242,7 @@ fib_walk_queue_get_front (fib_walk_priority_t prio)
static void
fib_walk_destroy (fib_walk_t *fwalk)
{
- fib_node_back_walk_ctx_t *ctx;
- u32 bucket;
+ u32 bucket, ii;
if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling)
{
@@ -277,16 +277,19 @@ fib_walk_destroy (fib_walk_t *fwalk)
fib_walk_history[history_last_walk_pos].fwh_flags =
fwalk->fw_flags;
- fib_walk_history[history_last_walk_pos].fwh_reason = 0;
- vec_foreach(ctx, fwalk->fw_ctx)
+ vec_foreach_index(ii, fwalk->fw_ctx)
{
- fib_walk_history[history_last_walk_pos].fwh_reason |=
- ctx->fnbw_reason;
+ if (ii < MAX_HISTORY_REASONS)
+ {
+ fib_walk_history[history_last_walk_pos].fwh_reason[ii] =
+ fwalk->fw_ctx[ii].fnbw_reason;
+ }
}
history_last_walk_pos = (history_last_walk_pos + 1) % HISTORY_N_WALKS;
fib_node_deinit(&fwalk->fw_node);
+ vec_free(fwalk->fw_ctx);
pool_put(fib_walk_pool, fwalk);
}
@@ -315,7 +318,7 @@ typedef enum fib_walk_advance_rc_t_
static fib_walk_advance_rc_t
fib_walk_advance (fib_node_index_t fwi)
{
- fib_node_back_walk_ctx_t *ctx;
+ fib_node_back_walk_ctx_t *ctx, *old;
fib_node_back_walk_rc_t wrc;
fib_node_ptr_t sibling;
fib_walk_t *fwalk;
@@ -332,6 +335,8 @@ fib_walk_advance (fib_node_index_t fwi)
if (more_elts)
{
+ old = fwalk->fw_ctx;
+
vec_foreach(ctx, fwalk->fw_ctx)
{
wrc = fib_node_back_walk_one(&sibling, ctx);
@@ -347,6 +352,14 @@ fib_walk_advance (fib_node_index_t fwi)
*/
return (FIB_WALK_ADVANCE_MERGE);
}
+ if (old != fwalk->fw_ctx)
+ {
+ /*
+ * nasty re-entrant addition of a walk has realloc'd the vector
+ * break out
+ */
+ return (FIB_WALK_ADVANCE_MERGE);
+ }
}
/*
* move foward to the next node to visit
@@ -565,7 +578,7 @@ fib_walk_alloc (fib_node_type_t parent_type,
/*
* make a copy of the backwalk context so the depth count remains
* the same for each sibling visitsed. This is important in the case
- * where a parents has a loop via one child, but all the others are not.
+ * where a parent has a loop via one child, but all the others are not.
* if the looped child were visited first, the depth count would exceed, the
* max and the walk would terminate before it reached the other siblings.
*/
@@ -801,42 +814,40 @@ static fib_node_back_walk_rc_t
fib_walk_back_walk_notify (fib_node_t *node,
fib_node_back_walk_ctx_t *ctx)
{
- fib_node_back_walk_ctx_t *old;
+ fib_node_back_walk_ctx_t *last;
fib_walk_t *fwalk;
fwalk = fib_walk_get_from_node(node);
/*
- * check whether the walk context can be merge with another,
- * or whether it needs to be appended.
+ * check whether the walk context can be merged with the most recent.
+ * the most recent was the one last added and is thus at the back of the vector.
+ * we can merge walks if the reason for the walk is the same.
*/
- vec_foreach(old, fwalk->fw_ctx)
+ last = vec_end(fwalk->fw_ctx) - 1;
+
+ if (last->fnbw_reason == ctx->fnbw_reason)
{
- /*
- * we can merge walks if the reason for the walk is the same.
- */
- if (old->fnbw_reason == ctx->fnbw_reason)
- {
- /*
- * copy the largest of the depth values. in the presence of a loop,
- * the same walk will merge with itself. if we take the smaller depth
- * then it will never end.
- */
- old->fnbw_depth = ((old->fnbw_depth >= ctx->fnbw_depth) ?
- old->fnbw_depth :
- ctx->fnbw_depth);
- goto out;
- }
+ /*
+ * copy the largest of the depth values. in the presence of a loop,
+ * the same walk will merge with itself. if we take the smaller depth
+ * then it will never end.
+ */
+ last->fnbw_depth = ((last->fnbw_depth >= ctx->fnbw_depth) ?
+ last->fnbw_depth :
+ ctx->fnbw_depth);
+ }
+ else
+ {
+ /*
+ * walks could not be merged, this means that the walk infront needs to
+ * perform different action to this one that has caught up. the one in
+ * front was scheduled first so append the new walk context to the back
+ * of the list.
+ */
+ vec_add1(fwalk->fw_ctx, *ctx);
}
- /*
- * walks could not be merged, this means that the walk infront needs to
- * perform different action to this one that has caught up. the one in front
- * was scheduled first so append the new walk context to the back of the list.
- */
- vec_add1(fwalk->fw_ctx, *ctx);
-
-out:
return (FIB_NODE_BACK_WALK_MERGE);
}
@@ -979,10 +990,11 @@ fib_walk_show (vlib_main_t * vm,
while (ii != history_last_walk_pos)
{
- if (0 != fib_walk_history[ii].fwh_reason)
+ if (0 != fib_walk_history[ii].fwh_reason[0])
{
fib_node_back_walk_reason_t reason;
u8 *s = NULL;
+ u32 jj;
s = format(s, "[@%d]: %s:%d visits:%d duration:%.2f completed:%.2f ",
ii, fib_node_type_get_name(fib_walk_history[ii].fwh_parent.fnp_type),
@@ -996,10 +1008,15 @@ fib_walk_show (vlib_main_t * vm,
s = format(s, "async, ");
s = format(s, "reason:");
- FOR_EACH_FIB_NODE_BW_REASON(reason) {
- if ((1<<reason) & fib_walk_history[ii].fwh_reason) {
- s = format (s, "%s,", fib_node_bw_reason_names[reason]);
+ jj = 0;
+ while (0 != fib_walk_history[ii].fwh_reason[jj])
+ {
+ FOR_EACH_FIB_NODE_BW_REASON(reason) {
+ if ((1<<reason) & fib_walk_history[ii].fwh_reason[jj]) {
+ s = format (s, "%s,", fib_node_bw_reason_names[reason]);
+ }
}
+ jj++;
}
vlib_cli_output(vm, "%v", s);
}
diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c
index 92417a44025..15b3f764622 100644
--- a/vnet/vnet/ip/ip6_neighbor.c
+++ b/vnet/vnet/ip/ip6_neighbor.c
@@ -402,13 +402,15 @@ ip6_nd_mk_complete (adj_index_t ai, ip6_neighbor_t * nbr)
}
static void
-ip6_nd_mk_incomplete (adj_index_t ai, ip6_neighbor_t * nbr)
+ip6_nd_mk_incomplete (adj_index_t ai)
{
+ ip_adjacency_t *adj = adj_get(ai);
+
adj_nbr_update_rewrite (
ai,
ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
ethernet_build_rewrite (vnet_get_main (),
- nbr->key.sw_if_index,
+ adj->rewrite_header.sw_if_index,
adj_get_link_type(ai),
VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
}
@@ -452,9 +454,7 @@ ip6_nd_mk_complete_walk (adj_index_t ai, void *ctx)
static adj_walk_rc_t
ip6_nd_mk_incomplete_walk (adj_index_t ai, void *ctx)
{
- ip6_neighbor_t *nbr = ctx;
-
- ip6_nd_mk_incomplete (ai, nbr);
+ ip6_nd_mk_incomplete (ai);
return (ADJ_WALK_RC_CONTINUE);
}
@@ -683,13 +683,13 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
}
n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
adj_nbr_walk_nh6 (sw_if_index,
&n->key.ip6_address,
ip6_nd_mk_incomplete_walk,
- n);
+ NULL);
- mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
fib_table_entry_delete_index (n->fib_entry_index, FIB_SOURCE_ADJ);
pool_put (nm->neighbor_pool, n);
diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h
index a609e2fe7c0..4b6aaa10da9 100644
--- a/vnet/vnet/ip/lookup.h
+++ b/vnet/vnet/ip/lookup.h
@@ -168,6 +168,17 @@ typedef void (*adj_midchain_fixup_t)(vlib_main_t * vm,
struct ip_adjacency_t_ *adj,
vlib_buffer_t * b0);
+/**
+ * @brief Flags on an IP adjacency
+ */
+typedef enum ip_adjacency_flags_t_
+{
+ /**
+ * Currently a sync walk is active. Used to prevent re-entrant walking
+ */
+ IP_ADJ_SYNC_WALK_ACTIVE = (1 << 0),
+} ip_adjacency_flags_t;
+
/** @brief IP unicast adjacency.
@note cache aligned.
*/
@@ -254,6 +265,12 @@ typedef struct ip_adjacency_t_ {
* remaining cachelines
*/
fib_node_t ia_node;
+
+ /**
+ * Flags on the adjacency
+ */
+ ip_adjacency_flags_t ia_flags;
+
} ip_adjacency_t;
STATIC_ASSERT((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0),