From 19c68d2a02770189d32ead3ff964efc073db630a Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 7 Dec 2016 15:38:14 +0000 Subject: Prevent re-entrant walks on an adjacency. The re-entrant walks were caused when the walk from an IP adj updated a fib_netry with an MPLS adj which in turn triggers a walk of the IP adj. Re-entrant walks do unnecessary work. also fixed a walk merge issue where the encountered walk should only be checked for equivalence woth the most recent alk, not any in the list. Otherwise an UO,DOWN,UP beceoms (2*)UP,DOWN Change-Id: Ib8b27f055dc6c1366d33740276d1c26cd314220a Signed-off-by: Neale Ranns --- vnet/vnet/adj/adj.c | 1 + vnet/vnet/adj/adj_nbr.c | 53 +++++++++++++++++++++++-- vnet/vnet/ethernet/arp.c | 15 +++---- vnet/vnet/fib/fib_walk.c | 97 ++++++++++++++++++++++++++------------------- vnet/vnet/ip/ip6_neighbor.c | 14 +++---- vnet/vnet/ip/lookup.h | 17 ++++++++ 6 files changed, 140 insertions(+), 57 deletions(-) diff --git a/vnet/vnet/adj/adj.c b/vnet/vnet/adj/adj.c index 2741c885cee..e740c4cb79b 100644 --- a/vnet/vnet/adj/adj.c +++ b/vnet/vnet/adj/adj.c @@ -66,6 +66,7 @@ adj_alloc (fib_protocol_t proto) fib_node_init(&adj->ia_node, FIB_NODE_TYPE_ADJ); adj->ia_nh_proto = proto; + adj->ia_flags = 0; ip4_main.lookup_main.adjacency_heap = adj_pool; ip6_main.lookup_main.adjacency_heap = adj_pool; diff --git a/vnet/vnet/adj/adj_nbr.c b/vnet/vnet/adj/adj_nbr.c index 95d1254a8c4..1344bb67fcc 100644 --- a/vnet/vnet/adj/adj_nbr.c +++ b/vnet/vnet/adj/adj_nbr.c @@ -297,9 +297,11 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, u32 next_node, u8 *rewrite) { + ip_adjacency_t *walk_adj; adj_index_t walk_ai; vlib_main_t * vm; u32 old_next; + int do_walk; vm = vlib_get_main(); old_next = adj->lookup_next_index; @@ -318,6 +320,39 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, adj->rewrite_header.sw_if_index); } + /* + * Don't call the walk re-entrantly + */ + if (ADJ_INDEX_INVALID != walk_ai) + { + walk_adj = adj_get(walk_ai); + if (IP_ADJ_SYNC_WALK_ACTIVE & walk_adj->ia_flags) + { + do_walk = 0; + } + else + { + /* + * Prevent re-entrant walk of the same adj + */ + walk_adj->ia_flags |= IP_ADJ_SYNC_WALK_ACTIVE; + do_walk = 1; + } + } + else + { + do_walk = 0; + } + + /* + * lock the adjacencies that are affected by updates this walk will provoke. + * Since the aim of the walk is to update children to link to a different + * DPO, this adj will no longer be in use and its lock count will drop to 0. + * We don't want it to be deleted as part of this endevour. + */ + adj_lock(adj_get_index(adj)); + adj_lock(walk_ai); + /* * Updating a rewrite string is not atomic; * - the rewrite string is too long to write in one instruction @@ -346,7 +381,8 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, * 1 - mac change * 2 - adj type change */ - if (old_next != adj_next_index && + if (do_walk && + old_next != adj_next_index && ADJ_INDEX_INVALID != walk_ai) { /* @@ -407,8 +443,9 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, */ vlib_worker_thread_barrier_release(vm); - if (old_next != adj->lookup_next_index && - ADJ_INDEX_INVALID != walk_ai) + if (do_walk && + (old_next != adj->lookup_next_index) && + (ADJ_INDEX_INVALID != walk_ai)) { /* * backwalk to the children so they can stack on the now updated @@ -420,6 +457,16 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx); } + /* + * Prevent re-entrant walk of the same adj + */ + if (do_walk) + { + walk_adj->ia_flags &= ~IP_ADJ_SYNC_WALK_ACTIVE; + } + + adj_unlock(adj_get_index(adj)); + adj_unlock(walk_ai); } typedef struct adj_db_count_ctx_t_ { diff --git a/vnet/vnet/ethernet/arp.c b/vnet/vnet/ethernet/arp.c index ec138586ff4..d020c8a55d8 100644 --- a/vnet/vnet/ethernet/arp.c +++ b/vnet/vnet/ethernet/arp.c @@ -374,13 +374,15 @@ arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e) } static void -arp_mk_incomplete (adj_index_t ai, ethernet_arp_ip4_entry_t * e) +arp_mk_incomplete (adj_index_t ai) { + ip_adjacency_t *adj = adj_get (ai); + adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_INCOMPLETE, ethernet_build_rewrite (vnet_get_main (), - e->sw_if_index, + adj->rewrite_header.sw_if_index, VNET_LINK_ARP, VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); } @@ -417,9 +419,7 @@ arp_mk_complete_walk (adj_index_t ai, void *ctx) static adj_walk_rc_t arp_mk_incomplete_walk (adj_index_t ai, void *ctx) { - ethernet_arp_ip4_entry_t *e = ctx; - - arp_mk_incomplete (ai, e); + arp_mk_incomplete (ai); return (ADJ_WALK_RC_CONTINUE); } @@ -1620,9 +1620,10 @@ vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm, if (NULL != e) { - adj_nbr_walk_nh4 (e->sw_if_index, - &e->ip4_address, arp_mk_incomplete_walk, e); arp_entry_free (eai, e); + + adj_nbr_walk_nh4 (e->sw_if_index, + &e->ip4_address, arp_mk_incomplete_walk, NULL); } return 0; diff --git a/vnet/vnet/fib/fib_walk.c b/vnet/vnet/fib/fib_walk.c index e7376bf47a2..938f7b8c1c6 100644 --- a/vnet/vnet/fib/fib_walk.c +++ b/vnet/vnet/fib/fib_walk.c @@ -177,6 +177,7 @@ static u64 fib_walk_hist_vists_per_walk[HISTOGRAM_VISITS_PER_WALK_N_BUCKETS]; * @brief History of state for the last 128 walks */ #define HISTORY_N_WALKS 128 +#define MAX_HISTORY_REASONS 16 static u32 history_last_walk_pos; typedef struct fib_walk_history_t_ { u32 fwh_n_visits; @@ -184,7 +185,7 @@ typedef struct fib_walk_history_t_ { f64 fwh_completed; fib_node_ptr_t fwh_parent; fib_walk_flags_t fwh_flags; - fib_node_bw_reason_flag_t fwh_reason; + fib_node_bw_reason_flag_t fwh_reason[MAX_HISTORY_REASONS]; } fib_walk_history_t; static fib_walk_history_t fib_walk_history[HISTORY_N_WALKS]; @@ -241,8 +242,7 @@ fib_walk_queue_get_front (fib_walk_priority_t prio) static void fib_walk_destroy (fib_walk_t *fwalk) { - fib_node_back_walk_ctx_t *ctx; - u32 bucket; + u32 bucket, ii; if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling) { @@ -277,16 +277,19 @@ fib_walk_destroy (fib_walk_t *fwalk) fib_walk_history[history_last_walk_pos].fwh_flags = fwalk->fw_flags; - fib_walk_history[history_last_walk_pos].fwh_reason = 0; - vec_foreach(ctx, fwalk->fw_ctx) + vec_foreach_index(ii, fwalk->fw_ctx) { - fib_walk_history[history_last_walk_pos].fwh_reason |= - ctx->fnbw_reason; + if (ii < MAX_HISTORY_REASONS) + { + fib_walk_history[history_last_walk_pos].fwh_reason[ii] = + fwalk->fw_ctx[ii].fnbw_reason; + } } history_last_walk_pos = (history_last_walk_pos + 1) % HISTORY_N_WALKS; fib_node_deinit(&fwalk->fw_node); + vec_free(fwalk->fw_ctx); pool_put(fib_walk_pool, fwalk); } @@ -315,7 +318,7 @@ typedef enum fib_walk_advance_rc_t_ static fib_walk_advance_rc_t fib_walk_advance (fib_node_index_t fwi) { - fib_node_back_walk_ctx_t *ctx; + fib_node_back_walk_ctx_t *ctx, *old; fib_node_back_walk_rc_t wrc; fib_node_ptr_t sibling; fib_walk_t *fwalk; @@ -332,6 +335,8 @@ fib_walk_advance (fib_node_index_t fwi) if (more_elts) { + old = fwalk->fw_ctx; + vec_foreach(ctx, fwalk->fw_ctx) { wrc = fib_node_back_walk_one(&sibling, ctx); @@ -347,6 +352,14 @@ fib_walk_advance (fib_node_index_t fwi) */ return (FIB_WALK_ADVANCE_MERGE); } + if (old != fwalk->fw_ctx) + { + /* + * nasty re-entrant addition of a walk has realloc'd the vector + * break out + */ + return (FIB_WALK_ADVANCE_MERGE); + } } /* * move foward to the next node to visit @@ -565,7 +578,7 @@ fib_walk_alloc (fib_node_type_t parent_type, /* * make a copy of the backwalk context so the depth count remains * the same for each sibling visitsed. This is important in the case - * where a parents has a loop via one child, but all the others are not. + * where a parent has a loop via one child, but all the others are not. * if the looped child were visited first, the depth count would exceed, the * max and the walk would terminate before it reached the other siblings. */ @@ -801,42 +814,40 @@ static fib_node_back_walk_rc_t fib_walk_back_walk_notify (fib_node_t *node, fib_node_back_walk_ctx_t *ctx) { - fib_node_back_walk_ctx_t *old; + fib_node_back_walk_ctx_t *last; fib_walk_t *fwalk; fwalk = fib_walk_get_from_node(node); /* - * check whether the walk context can be merge with another, - * or whether it needs to be appended. + * check whether the walk context can be merged with the most recent. + * the most recent was the one last added and is thus at the back of the vector. + * we can merge walks if the reason for the walk is the same. */ - vec_foreach(old, fwalk->fw_ctx) + last = vec_end(fwalk->fw_ctx) - 1; + + if (last->fnbw_reason == ctx->fnbw_reason) { - /* - * we can merge walks if the reason for the walk is the same. - */ - if (old->fnbw_reason == ctx->fnbw_reason) - { - /* - * copy the largest of the depth values. in the presence of a loop, - * the same walk will merge with itself. if we take the smaller depth - * then it will never end. - */ - old->fnbw_depth = ((old->fnbw_depth >= ctx->fnbw_depth) ? - old->fnbw_depth : - ctx->fnbw_depth); - goto out; - } + /* + * copy the largest of the depth values. in the presence of a loop, + * the same walk will merge with itself. if we take the smaller depth + * then it will never end. + */ + last->fnbw_depth = ((last->fnbw_depth >= ctx->fnbw_depth) ? + last->fnbw_depth : + ctx->fnbw_depth); + } + else + { + /* + * walks could not be merged, this means that the walk infront needs to + * perform different action to this one that has caught up. the one in + * front was scheduled first so append the new walk context to the back + * of the list. + */ + vec_add1(fwalk->fw_ctx, *ctx); } - /* - * walks could not be merged, this means that the walk infront needs to - * perform different action to this one that has caught up. the one in front - * was scheduled first so append the new walk context to the back of the list. - */ - vec_add1(fwalk->fw_ctx, *ctx); - -out: return (FIB_NODE_BACK_WALK_MERGE); } @@ -979,10 +990,11 @@ fib_walk_show (vlib_main_t * vm, while (ii != history_last_walk_pos) { - if (0 != fib_walk_history[ii].fwh_reason) + if (0 != fib_walk_history[ii].fwh_reason[0]) { fib_node_back_walk_reason_t reason; u8 *s = NULL; + u32 jj; s = format(s, "[@%d]: %s:%d visits:%d duration:%.2f completed:%.2f ", ii, fib_node_type_get_name(fib_walk_history[ii].fwh_parent.fnp_type), @@ -996,10 +1008,15 @@ fib_walk_show (vlib_main_t * vm, s = format(s, "async, "); s = format(s, "reason:"); - FOR_EACH_FIB_NODE_BW_REASON(reason) { - if ((1<key.sw_if_index, + adj->rewrite_header.sw_if_index, adj_get_link_type(ai), VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); } @@ -452,9 +454,7 @@ ip6_nd_mk_complete_walk (adj_index_t ai, void *ctx) static adj_walk_rc_t ip6_nd_mk_incomplete_walk (adj_index_t ai, void *ctx) { - ip6_neighbor_t *nbr = ctx; - - ip6_nd_mk_incomplete (ai, nbr); + ip6_nd_mk_incomplete (ai); return (ADJ_WALK_RC_CONTINUE); } @@ -683,13 +683,13 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, } n = pool_elt_at_index (nm->neighbor_pool, p[0]); + mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); adj_nbr_walk_nh6 (sw_if_index, &n->key.ip6_address, ip6_nd_mk_incomplete_walk, - n); + NULL); - mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); fib_table_entry_delete_index (n->fib_entry_index, FIB_SOURCE_ADJ); pool_put (nm->neighbor_pool, n); diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h index a609e2fe7c0..4b6aaa10da9 100644 --- a/vnet/vnet/ip/lookup.h +++ b/vnet/vnet/ip/lookup.h @@ -168,6 +168,17 @@ typedef void (*adj_midchain_fixup_t)(vlib_main_t * vm, struct ip_adjacency_t_ *adj, vlib_buffer_t * b0); +/** + * @brief Flags on an IP adjacency + */ +typedef enum ip_adjacency_flags_t_ +{ + /** + * Currently a sync walk is active. Used to prevent re-entrant walking + */ + IP_ADJ_SYNC_WALK_ACTIVE = (1 << 0), +} ip_adjacency_flags_t; + /** @brief IP unicast adjacency. @note cache aligned. */ @@ -254,6 +265,12 @@ typedef struct ip_adjacency_t_ { * remaining cachelines */ fib_node_t ia_node; + + /** + * Flags on the adjacency + */ + ip_adjacency_flags_t ia_flags; + } ip_adjacency_t; STATIC_ASSERT((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0), -- cgit 1.2.3-korg