diff options
author | Neale Ranns <nranns@cisco.com> | 2017-04-18 09:09:40 -0700 |
---|---|---|
committer | Dave Barach <openvpp@barachs.net> | 2017-04-24 12:06:48 +0000 |
commit | f12a83f54ff2239d70494d577af3e1bb253692e1 (patch) | |
tree | bd1983c3cd93c5f50f2a8a7ce5da78e059bc16ed /src/vnet | |
parent | a5464817522c7a7dc760af4612f1d6a68ed0afc8 (diff) |
Improve Load-Balance MAPs
- only build them for popular path-lists (where popular means more than 64 children)
the reason to have a map is to improve convergence speed for recursive prefixes - if there are only a few this technique is not needed
- only build them when there is at least one path that has recursive constraints, i.e. a path that can 'fail' in a PIC scenario.
- Use the MAPS in the switch path.
- PIC test cases for functionality (not convergence performance)
Change-Id: I70705444c8469d22b07ae34be82cfb6a01358e10
Signed-off-by: Neale Ranns <nranns@cisco.com>
Diffstat (limited to 'src/vnet')
-rw-r--r-- | src/vnet/dpo/load_balance.c | 3 | ||||
-rw-r--r-- | src/vnet/dpo/load_balance_map.h | 31 | ||||
-rw-r--r-- | src/vnet/fib/fib_entry_src.c | 15 | ||||
-rw-r--r-- | src/vnet/fib/fib_entry_src_rr.c | 2 | ||||
-rw-r--r-- | src/vnet/fib/fib_path.c | 6 | ||||
-rw-r--r-- | src/vnet/fib/fib_path.h | 2 | ||||
-rw-r--r-- | src/vnet/fib/fib_path_list.c | 73 | ||||
-rw-r--r-- | src/vnet/fib/fib_path_list.h | 24 | ||||
-rw-r--r-- | src/vnet/fib/fib_table.c | 12 | ||||
-rw-r--r-- | src/vnet/fib/fib_test.c | 116 | ||||
-rw-r--r-- | src/vnet/fib/fib_walk.c | 122 | ||||
-rw-r--r-- | src/vnet/fib/fib_walk.h | 3 | ||||
-rw-r--r-- | src/vnet/ip/ip4_forward.c | 137 | ||||
-rw-r--r-- | src/vnet/ip/ip6_forward.c | 104 | ||||
-rw-r--r-- | src/vnet/ip/ip6_neighbor.c | 2 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_lookup.c | 118 |
16 files changed, 571 insertions, 199 deletions
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index 6b0eda0e8b1..af054f1c3f4 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -118,7 +118,8 @@ load_balance_format (index_t lbi, buckets = load_balance_get_buckets(lb); s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE); - s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets); + s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto); + s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets); s = format(s, "uRPF:%d ", lb->lb_urpf); s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes); if (0 != via.packets) diff --git a/src/vnet/dpo/load_balance_map.h b/src/vnet/dpo/load_balance_map.h index 454bf4b3763..237f24b0138 100644 --- a/src/vnet/dpo/load_balance_map.h +++ b/src/vnet/dpo/load_balance_map.h @@ -73,6 +73,37 @@ load_balance_map_get (index_t lbmi) return (pool_elt_at_index(load_balance_map_pool, lbmi)); } +static inline u16 +load_balance_map_translate (index_t lbmi, + u16 bucket) +{ + load_balance_map_t*lbm; + + lbm = load_balance_map_get(lbmi); + + return (lbm->lbm_buckets[bucket]); +} + +static inline const dpo_id_t * +load_balance_get_fwd_bucket (const load_balance_t *lb, + u16 bucket) +{ + ASSERT(bucket < lb->lb_n_buckets); + + if (INDEX_INVALID != lb->lb_map) + { + bucket = load_balance_map_translate(lb->lb_map, bucket); + } + + if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb))) + { + return (&lb->lb_buckets_inline[bucket]); + } + else + { + return (&lb->lb_buckets[bucket]); + } +} extern void load_balance_map_module_init(void); diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c index a700282ee9b..fd80497c453 100644 --- a/src/vnet/fib/fib_entry_src.c +++ b/src/vnet/fib/fib_entry_src.c @@ -192,7 +192,7 @@ typedef struct fib_entry_src_collect_forwarding_ctx_t_ const fib_entry_t *fib_entry; const fib_entry_src_t *esrc; fib_forward_chain_type_t fct; - int is_recursive; + int n_recursive_constrained; } fib_entry_src_collect_forwarding_ctx_t; /** @@ -203,10 +203,11 @@ load_balance_flags_t fib_entry_calc_lb_flags (fib_entry_src_collect_forwarding_ctx_t *ctx) { /** - * We'll use a LB map is the path-list has recursive paths. + * We'll use a LB map if the path-list has multiple recursive paths. * recursive paths implies BGP, and hence scale. */ - if (ctx->is_recursive) + if (ctx->n_recursive_constrained > 1 && + fib_path_list_is_popular(ctx->esrc->fes_pl)) { return (LOAD_BALANCE_FLAG_USES_MAP); } @@ -282,9 +283,9 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index, return (!0); } - if (fib_path_is_recursive(path_index)) + if (fib_path_is_recursive_constrained(path_index)) { - ctx->is_recursive = 1; + ctx->n_recursive_constrained += 1; } /* @@ -397,7 +398,7 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry, .esrc = esrc, .fib_entry = fib_entry, .next_hops = NULL, - .is_recursive = 0, + .n_recursive_constrained = 0, .fct = fct, }; @@ -409,7 +410,7 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry, vec_validate(ctx.next_hops, fib_path_list_get_n_paths(esrc->fes_pl)); vec_reset_length(ctx.next_hops); - lb_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto); + lb_proto = fib_forw_chain_type_to_dpo_proto(fct); fib_path_list_walk(esrc->fes_pl, fib_entry_src_collect_forwarding, diff --git a/src/vnet/fib/fib_entry_src_rr.c b/src/vnet/fib/fib_entry_src_rr.c index ff15c54e281..c145aaa2915 100644 --- a/src/vnet/fib/fib_entry_src_rr.c +++ b/src/vnet/fib/fib_entry_src_rr.c @@ -103,7 +103,7 @@ fib_entry_src_rr_activate (fib_entry_src_t *src, fib_entry_cover_track(cover, fib_entry_get_index(fib_entry)); /* - * if the ocver is attached then install an attached-host path + * if the cover is attached then install an attached-host path * (like an adj-fib). Otherwise inherit the forwarding from the cover */ if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover)) diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 70c8790579a..889317fd83c 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -2025,13 +2025,15 @@ fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index, } int -fib_path_is_recursive (fib_node_index_t path_index) +fib_path_is_recursive_constrained (fib_node_index_t path_index) { fib_path_t *path; path = fib_path_get(path_index); - return (FIB_PATH_TYPE_RECURSIVE == path->fp_type); + return ((FIB_PATH_TYPE_RECURSIVE == path->fp_type) && + ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED) || + (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST))); } int diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h index 334be6f56e6..b6bf1e4fa83 100644 --- a/src/vnet/fib/fib_path.h +++ b/src/vnet/fib/fib_path.h @@ -144,7 +144,7 @@ extern fib_node_index_t fib_path_copy(fib_node_index_t path_index, fib_node_index_t path_list_index); extern int fib_path_resolve(fib_node_index_t path_index); extern int fib_path_is_resolved(fib_node_index_t path_index); -extern int fib_path_is_recursive(fib_node_index_t path_index); +extern int fib_path_is_recursive_constrained(fib_node_index_t path_index); extern int fib_path_is_exclusive(fib_node_index_t path_index); extern int fib_path_is_deag(fib_node_index_t path_index); extern int fib_path_is_looped(fib_node_index_t path_index); diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c index ea6565dd19b..64917f95107 100644 --- a/src/vnet/fib/fib_path_list.c +++ b/src/vnet/fib/fib_path_list.c @@ -26,6 +26,16 @@ #include <vnet/fib/fib_urpf_list.h> /** + * The magic number of child entries that make a path-list popular. + * There's a trade-off here between convergnece and forwarding speed. + * Popular path-lists generate load-balance maps for the entires that + * use them. If the map is present there is a switch path cost to indirect + * through the map - this indirection provides the fast convergence - so + * without the map convergence is slower. + */ +#define FIB_PATH_LIST_POPULAR 64 + +/** * FIB path-list * A representation of the list/set of path trough which a prefix is reachable */ @@ -454,14 +464,7 @@ fib_path_list_back_walk (fib_node_index_t path_list_index, /* * propagate the backwalk further */ - if (32 >= fib_node_list_get_size(path_list->fpl_node.fn_children)) - { - /* - * only a few children. continue the walk synchronously - */ - fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx); - } - else + if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR) { /* * many children. schedule a async walk @@ -471,6 +474,13 @@ fib_path_list_back_walk (fib_node_index_t path_list_index, FIB_WALK_PRIORITY_LOW, ctx); } + else + { + /* + * only a few children. continue the walk synchronously + */ + fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx); + } } /* @@ -625,6 +635,16 @@ fib_path_list_is_looped (fib_node_index_t path_list_index) return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED); } +int +fib_path_list_is_popular (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR); +} + static fib_path_list_flags_t fib_path_list_flags_fixup (fib_path_list_flags_t flags) { @@ -807,6 +827,7 @@ fib_path_list_path_add (fib_node_index_t path_list_index, */ if (0 == fib_path_cmp(new_path_index, *orig_path_index)) { + fib_path_destroy(new_path_index); return (*orig_path_index); } } @@ -1173,10 +1194,38 @@ fib_path_list_child_add (fib_node_index_t path_list_index, fib_node_type_t child_type, fib_node_index_t child_index) { - return (fib_node_child_add(FIB_NODE_TYPE_PATH_LIST, - path_list_index, - child_type, - child_index)); + u32 sibling; + + sibling = fib_node_child_add(FIB_NODE_TYPE_PATH_LIST, + path_list_index, + child_type, + child_index); + + if (FIB_PATH_LIST_POPULAR == fib_node_get_n_children(FIB_NODE_TYPE_PATH_LIST, + path_list_index)) + { + /* + * Set the popular flag on the path-list once we pass the magic + * threshold. then walk children to update. + * We don't undo this action. The rational being that the number + * of entries using this prefix is large enough such that it is a + * non-trival amount of effort to converge them. If we get into the + * situation where we are adding and removing entries such that we + * flip-flop over the threshold, then this non-trivial work is added + * to each of those routes adds/deletes - not a situation we want. + */ + fib_node_back_walk_ctx_t ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE, + }; + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + path_list->fpl_flags |= FIB_PATH_LIST_FLAG_POPULAR; + + fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, &ctx); + } + + return (sibling); } void diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h index 9d2462119fb..376cb726dd6 100644 --- a/src/vnet/fib/fib_path_list.h +++ b/src/vnet/fib/fib_path_list.h @@ -39,11 +39,6 @@ typedef enum fib_path_list_attribute_t_ { */ FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST, /** - * Indexed means the path-list keeps a hash table of all paths for - * fast lookup. The lookup result is the fib_node_index of the path. - */ - FIB_PATH_LIST_ATTRIBUTE_INDEXED, - /** * explicit drop path-list. Used when the entry source needs to * force a drop, despite the fact the path info is present. */ @@ -66,36 +61,42 @@ typedef enum fib_path_list_attribute_t_ { */ FIB_PATH_LIST_ATTRIBUTE_LOOPED, /** + * a popular path-ist is one that is shared amongst many entries. + * Path list become popular as they gain more children, but they + * don't become unpopular as they lose them. + */ + FIB_PATH_LIST_ATTRIBUTE_POPULAR, + /** * no uRPF - do not generate unicast RPF list for this path-list */ FIB_PATH_LIST_ATTRIBUTE_NO_URPF, /** * Marher. Add new flags before this one, and then update it. */ - FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_LOOPED, + FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_NO_URPF, } fib_path_list_attribute_t; typedef enum fib_path_list_flags_t_ { FIB_PATH_LIST_FLAG_NONE = 0, FIB_PATH_LIST_FLAG_SHARED = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED), - FIB_PATH_LIST_FLAG_INDEXED = (1 << FIB_PATH_LIST_ATTRIBUTE_INDEXED), FIB_PATH_LIST_FLAG_DROP = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP), FIB_PATH_LIST_FLAG_LOCAL = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL), FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE), FIB_PATH_LIST_FLAG_RESOLVED = (1 << FIB_PATH_LIST_ATTRIBUTE_RESOLVED), FIB_PATH_LIST_FLAG_LOOPED = (1 << FIB_PATH_LIST_ATTRIBUTE_LOOPED), + FIB_PATH_LIST_FLAG_POPULAR = (1 << FIB_PATH_LIST_ATTRIBUTE_POPULAR), FIB_PATH_LIST_FLAG_NO_URPF = (1 << FIB_PATH_LIST_ATTRIBUTE_NO_URPF), } fib_path_list_flags_t; #define FIB_PATH_LIST_ATTRIBUTES { \ [FIB_PATH_LIST_ATTRIBUTE_SHARED] = "shared", \ - [FIB_PATH_LIST_ATTRIBUTE_INDEXED] = "indexed", \ [FIB_PATH_LIST_ATTRIBUTE_RESOLVED] = "resolved", \ [FIB_PATH_LIST_ATTRIBUTE_DROP] = "drop", \ [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ - [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \ - [FIB_PATH_LIST_ATTRIBUTE_LOOPED] = "looped", \ - [FIB_PATH_LIST_ATTRIBUTE_NO_URPF] = "no-uRPF", \ + [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \ + [FIB_PATH_LIST_ATTRIBUTE_LOOPED] = "looped", \ + [FIB_PATH_LIST_ATTRIBUTE_POPULAR] = "popular", \ + [FIB_PATH_LIST_ATTRIBUTE_NO_URPF] = "no-uRPF", \ } #define FOR_EACH_PATH_LIST_ATTRIBUTE(_item) \ @@ -148,6 +149,7 @@ extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index, fib_node_index_t **entry_indicies); extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index); extern int fib_path_list_is_looped(fib_node_index_t path_list_index); +extern int fib_path_list_is_popular(fib_node_index_t path_list_index); extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index); extern u8 * fib_path_list_format(fib_node_index_t pl_index, u8 * s); diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 0938ce9bd2f..ff428049b66 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -608,11 +608,19 @@ fib_table_entry_path_remove2 (u32 fib_index, fib_entry_src_flag_t src_flag; int was_sourced; - /* + /* + * if it's not sourced, then there's nowt to remove + */ + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + if (!was_sourced) + { + return; + } + + /* * don't nobody go nowhere */ fib_entry_lock(fib_entry_index); - was_sourced = fib_entry_is_sourced(fib_entry_index, source); for (ii = 0; ii < vec_len(rpath); ii++) { diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index cbb5640ae36..d3bdfa35c06 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -729,6 +729,9 @@ fib_test_v4 (void) .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), }; + FIB_TEST((0 == pool_elts(load_balance_map_pool)), "LB-map pool size is %d", + pool_elts(load_balance_map_pool)); + tm = &test_main; /* record the nubmer of load-balances in use before we start */ @@ -3090,6 +3093,43 @@ fib_test_v4 (void) NULL, FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + /* + * add a bunch load more entries using this path combo so that we get + * an LB-map created. + */ +#define N_P 128 + fib_prefix_t bgp_78s[N_P]; + for (ii = 0; ii < N_P; ii++) + { + bgp_78s[ii].fp_len = 32; + bgp_78s[ii].fp_proto = FIB_PROTOCOL_IP4; + bgp_78s[ii].fp_addr.ip4.as_u32 = clib_host_to_net_u32(0x4e000000+ii); + + + fib_table_entry_path_add(fib_index, + &bgp_78s[ii], + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_3_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + fib_table_entry_path_add(fib_index, + &bgp_78s[ii], + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_1_1_1_1, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + } + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); dpo = fib_entry_contribute_ip_forwarding(fei); @@ -3138,6 +3178,9 @@ fib_test_v4 (void) 1, FIB_ROUTE_PATH_FLAG_NONE); + /* suspend so the update walk kicks int */ + vlib_process_suspend(vlib_get_main(), 1e-5); + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); FIB_TEST(!dpo_cmp(dpo, fib_entry_contribute_ip_forwarding(fei)), "post PIC 200.200.200.200/32 was inplace modified"); @@ -3175,6 +3218,9 @@ fib_test_v4 (void) NULL, FIB_ROUTE_PATH_FLAG_NONE); + /* suspend so the update walk kicks in */ + vlib_process_suspend(vlib_get_main(), 1e-5); + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket_i(lb, 0)), "post PIC recovery adj for 200.200.200.200/32 is recursive " "via adj for 1.1.1.1"); @@ -3201,6 +3247,20 @@ fib_test_v4 (void) 1, NULL, FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + for (ii = 0; ii < N_P; ii++) + { + fib_table_entry_path_add(fib_index, + &bgp_78s[ii], + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + } fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); dpo = fib_entry_contribute_ip_forwarding(fei); @@ -3233,6 +3293,8 @@ fib_test_v4 (void) ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); + /* suspend so the update walk kicks int */ + vlib_process_suspend(vlib_get_main(), 1e-5); fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); dpo = fib_entry_contribute_ip_forwarding(fei); @@ -3270,6 +3332,16 @@ fib_test_v4 (void) NULL, FIB_ROUTE_PATH_FLAG_NONE); + for (ii = 0; ii < N_P; ii++) + { + fib_table_entry_delete(fib_index, + &bgp_78s[ii], + FIB_SOURCE_API); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &bgp_78s[ii])), + "%U removed", + format_fib_prefix, &bgp_78s[ii]); + } fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, @@ -3303,6 +3375,8 @@ fib_test_v4 (void) fib_table_entry_delete(fib_index, &pfx_1_1_1_0_s_28, FIB_SOURCE_API); + /* suspend so the update walk kicks int */ + vlib_process_suspend(vlib_get_main(), 1e-5); FIB_TEST((FIB_NODE_INDEX_INVALID == fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28)), "1.1.1.1/28 removed"); @@ -3821,7 +3895,7 @@ fib_test_v4 (void) /* * -2 entries and -2 non-shared path-list */ - FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); FIB_TEST((PNBR == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); @@ -3855,7 +3929,7 @@ fib_test_v4 (void) FIB_TEST((ENBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d", pool_elts(fib_urpf_list_pool)); FIB_TEST((0 == pool_elts(load_balance_map_pool)), "LB-map pool size is %d", - pool_elts(load_balance_map_pool)); + pool_elts(load_balance_map_pool)); FIB_TEST((lb_count == pool_elts(load_balance_pool)), "LB pool size is %d", pool_elts(load_balance_pool)); @@ -5900,6 +5974,12 @@ fib_test_label (void) .adj = DPO_PROTO_IP4, }, }; + fib_test_lb_bucket_t mpls_bucket_drop = { + .type = FT_LB_SPECIAL, + .special = { + .adj = DPO_PROTO_MPLS, + }, + }; fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, @@ -5932,9 +6012,9 @@ fib_test_label (void) &pfx_24001_neos); FIB_TEST(fib_test_validate_entry(fei, FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, - 1, - &bucket_drop), - "24001/eos LB 1 buckets via: DROP"); + 1, + &mpls_bucket_drop), + "24001/neos LB 1 buckets via: DROP"); /* * add back the path with the valid label @@ -7709,6 +7789,12 @@ lfib_test (void) fib_test_lb_bucket_t bucket_drop = { .type = FT_LB_SPECIAL, .special = { + .adj = DPO_PROTO_IP4, + }, + }; + fib_test_lb_bucket_t mpls_bucket_drop = { + .type = FT_LB_SPECIAL, + .special = { .adj = DPO_PROTO_MPLS, }, }; @@ -7735,7 +7821,12 @@ lfib_test (void) FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1, &bucket_drop), - "2.2.2.4/32 LB 1 buckets via: ip4-DROP"); + "1200/neos LB 1 buckets via: ip4-DROP"); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &mpls_bucket_drop), + "1200/neos LB 1 buckets via: mpls-DROP"); fib_table_entry_delete(fib_index, &pfx_2_2_2_4_s_32, FIB_SOURCE_API); @@ -7940,18 +8031,19 @@ fib_test (vlib_main_t * vm, } else { - /* - * These walk UT aren't run as part of the full suite, since the - * fib-walk process must be disabled in order for the tests to work - * - * fib_test_walk(); - */ res += fib_test_v4(); res += fib_test_v6(); res += fib_test_ae(); res += fib_test_bfd(); res += fib_test_label(); res += lfib_test(); + + /* + * fib-walk process must be disabled in order for the walk tests to work + */ + fib_walk_process_disable(); + res += fib_test_walk(); + fib_walk_process_enable(); } if (res) diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c index 938f7b8c1c6..c570476d130 100644 --- a/src/vnet/fib/fib_walk.c +++ b/src/vnet/fib/fib_walk.c @@ -96,11 +96,6 @@ typedef struct fib_walk_t_ static fib_walk_t *fib_walk_pool; /** - * @brief There's only one event type sent to the walk process - */ -#define FIB_WALK_EVENT 0 - -/** * Statistics maintained per-walk queue */ typedef enum fib_walk_queue_stats_t_ @@ -240,10 +235,13 @@ fib_walk_queue_get_front (fib_walk_priority_t prio) } static void -fib_walk_destroy (fib_walk_t *fwalk) +fib_walk_destroy (index_t fwi) { + fib_walk_t *fwalk; u32 bucket, ii; + fwalk = fib_walk_get(fwi); + if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling) { fib_node_list_elt_remove(fwalk->fw_prio_sibling); @@ -253,6 +251,12 @@ fib_walk_destroy (fib_walk_t *fwalk) fwalk->fw_dep_sibling); /* + * refetch the walk object. More walks could have been spawned as a result + * of releasing the lock on the parent. + */ + fwalk = fib_walk_get(fwi); + + /* * add the stats to the continuous histogram collection. */ bucket = (fwalk->fw_n_visits / HISTOGRAM_VISITS_PER_WALK_INCR); @@ -466,8 +470,7 @@ fib_walk_process_queues (vlib_main_t * vm, */ if (FIB_WALK_ADVANCE_MORE != rc) { - fwalk = fib_walk_get(fwi); - fib_walk_destroy(fwalk); + fib_walk_destroy(fwi); fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_COMPLETED]++; } else @@ -511,6 +514,16 @@ that_will_do_for_now: } /** + * Events sent to the FIB walk process + */ +typedef enum fib_walk_process_event_t_ +{ + FIB_WALK_PROCESS_EVENT_DATA, + FIB_WALK_PROCESS_EVENT_ENABLE, + FIB_WALK_PROCESS_EVENT_DISABLE, +} fib_walk_process_event; + +/** * @brief The 'fib-walk' process's main loop. */ static uword @@ -518,22 +531,47 @@ fib_walk_process (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) { + uword event_type, *event_data = 0; f64 sleep_time; + int enabled; + enabled = 1; sleep_time = fib_walk_sleep_duration[FIB_WALK_SHORT_SLEEP]; while (1) { - vlib_process_wait_for_event_or_clock(vm, sleep_time); + /* + * the feature to disable/enable this walk process is only + * for testing purposes + */ + if (enabled) + { + vlib_process_wait_for_event_or_clock(vm, sleep_time); + } + else + { + vlib_process_wait_for_event(vm); + } - /* - * there may be lots of event queued between the processes, - * but the walks we want to schedule are in the priority queues, - * so we ignore the process events. - */ - vlib_process_get_events(vm, NULL); + event_type = vlib_process_get_events(vm, &event_data); + vec_reset_length(event_data); + + switch (event_type) + { + case FIB_WALK_PROCESS_EVENT_ENABLE: + enabled = 1; + break; + case FIB_WALK_PROCESS_EVENT_DISABLE: + enabled = 0; + break; + default: + break; + } - sleep_time = fib_walk_process_queues(vm, quota); + if (enabled) + { + sleep_time = fib_walk_process_queues(vm, quota); + } } /* @@ -610,8 +648,8 @@ fib_walk_prio_queue_enquue (fib_walk_priority_t prio, */ vlib_process_signal_event(vlib_get_main(), fib_walk_process_node.index, - FIB_WALK_EVENT, - FIB_WALK_EVENT); + FIB_WALK_PROCESS_EVENT_DATA, + 0); return (sibling); } @@ -742,7 +780,7 @@ fib_walk_sync (fib_node_type_t parent_type, ASSERT(FIB_NODE_INDEX_INVALID != merged_walk.fnp_index); ASSERT(FIB_NODE_TYPE_WALK == merged_walk.fnp_type); - fib_walk_destroy(fwalk); + fib_walk_destroy(fwi); fwi = merged_walk.fnp_index; fwalk = fib_walk_get(fwi); @@ -774,7 +812,7 @@ fib_walk_sync (fib_node_type_t parent_type, if (NULL != fwalk) { - fib_walk_destroy(fwalk); + fib_walk_destroy(fwi); } } @@ -1106,3 +1144,47 @@ VLIB_CLI_COMMAND (fib_walk_clear_command, static) = { .short_help = "clear fib walk", .function = fib_walk_clear, }; + +void +fib_walk_process_enable (void) +{ + vlib_process_signal_event(vlib_get_main(), + fib_walk_process_node.index, + FIB_WALK_PROCESS_EVENT_ENABLE, + 0); +} + +void +fib_walk_process_disable (void) +{ + vlib_process_signal_event(vlib_get_main(), + fib_walk_process_node.index, + FIB_WALK_PROCESS_EVENT_DISABLE, + 0); +} + +static clib_error_t * +fib_walk_process_enable_disable (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + if (unformat (input, "enable")) + { + fib_walk_process_enable(); + } + else if (unformat (input, "disable")) + { + fib_walk_process_disable(); + } + else + { + return clib_error_return(0, "choose enable or disable"); + } + return (NULL); +} + +VLIB_CLI_COMMAND (fib_walk_process_command, static) = { + .path = "test fib-walk-process", + .short_help = "test fib-walk-process [enable|disable]", + .function = fib_walk_process_enable_disable, +}; diff --git a/src/vnet/fib/fib_walk.h b/src/vnet/fib/fib_walk.h index 7413d8a2c78..fdf2f10c196 100644 --- a/src/vnet/fib/fib_walk.h +++ b/src/vnet/fib/fib_walk.h @@ -54,5 +54,8 @@ extern void fib_walk_sync(fib_node_type_t parent_type, extern u8* format_fib_walk_priority(u8 *s, va_list ap); +extern void fib_walk_process_enable(void); +extern void fib_walk_process_disable(void); + #endif diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 0f562037759..697d2169b10 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -49,6 +49,7 @@ #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */ #include <vnet/fib/ip4_fib.h> #include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> #include <vnet/dpo/classify_dpo.h> #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */ @@ -89,7 +90,6 @@ ip4_lookup_inline (vlib_main_t * vm, { vlib_buffer_t *p0, *p1, *p2, *p3; ip4_header_t *ip0, *ip1, *ip2, *ip3; - __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3; ip_lookup_next_t next0, next1, next2, next3; const load_balance_t *lb0, *lb1, *lb2, *lb3; ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3; @@ -188,11 +188,6 @@ ip4_lookup_inline (vlib_main_t * vm, leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3); } - tcp0 = (void *) (ip0 + 1); - tcp1 = (void *) (ip1 + 1); - tcp2 = (void *) (ip2 + 1); - tcp3 = (void *) (ip3 + 1); - if (!lookup_for_responses_to_locally_received_packets) { leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); @@ -230,6 +225,15 @@ ip4_lookup_inline (vlib_main_t * vm, lb2 = load_balance_get (lb_index2); lb3 = load_balance_get (lb_index3); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + ASSERT (lb2->lb_n_buckets > 0); + ASSERT (is_pow2 (lb2->lb_n_buckets)); + ASSERT (lb3->lb_n_buckets > 0); + ASSERT (is_pow2 (lb3->lb_n_buckets)); + /* Use flow hash to compute multipath adjacency. */ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0; @@ -240,47 +244,57 @@ ip4_lookup_inline (vlib_main_t * vm, flow_hash_config0 = lb0->lb_hash_config; hash_c0 = vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash (ip0, flow_hash_config0); + dpo0 = + load_balance_get_fwd_bucket (lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) { flow_hash_config1 = lb1->lb_hash_config; hash_c1 = vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash (ip1, flow_hash_config1); + dpo1 = + load_balance_get_fwd_bucket (lb1, + (hash_c1 & + (lb1->lb_n_buckets_minus_1))); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); } if (PREDICT_FALSE (lb2->lb_n_buckets > 1)) { flow_hash_config2 = lb2->lb_hash_config; hash_c2 = vnet_buffer (p2)->ip.flow_hash = ip4_compute_flow_hash (ip2, flow_hash_config2); + dpo2 = + load_balance_get_fwd_bucket (lb2, + (hash_c2 & + (lb2->lb_n_buckets_minus_1))); + } + else + { + dpo2 = load_balance_get_bucket_i (lb2, 0); } if (PREDICT_FALSE (lb3->lb_n_buckets > 1)) { flow_hash_config3 = lb3->lb_hash_config; hash_c3 = vnet_buffer (p3)->ip.flow_hash = ip4_compute_flow_hash (ip3, flow_hash_config3); + dpo3 = + load_balance_get_fwd_bucket (lb3, + (hash_c3 & + (lb3->lb_n_buckets_minus_1))); + } + else + { + dpo3 = load_balance_get_bucket_i (lb3, 0); } - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - ASSERT (lb2->lb_n_buckets > 0); - ASSERT (is_pow2 (lb2->lb_n_buckets)); - ASSERT (lb3->lb_n_buckets > 0); - ASSERT (is_pow2 (lb3->lb_n_buckets)); - - dpo0 = load_balance_get_bucket_i (lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - dpo1 = load_balance_get_bucket_i (lb1, - (hash_c1 & - (lb1->lb_n_buckets_minus_1))); - dpo2 = load_balance_get_bucket_i (lb2, - (hash_c2 & - (lb2->lb_n_buckets_minus_1))); - dpo3 = load_balance_get_bucket_i (lb3, - (hash_c3 & - (lb3->lb_n_buckets_minus_1))); next0 = dpo0->dpoi_next_node; vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; @@ -293,20 +307,16 @@ ip4_lookup_inline (vlib_main_t * vm, vlib_increment_combined_counter (cm, thread_index, lb_index0, 1, - vlib_buffer_length_in_chain (vm, p0) - + sizeof (ethernet_header_t)); + vlib_buffer_length_in_chain (vm, p0)); vlib_increment_combined_counter (cm, thread_index, lb_index1, 1, - vlib_buffer_length_in_chain (vm, p1) - + sizeof (ethernet_header_t)); + vlib_buffer_length_in_chain (vm, p1)); vlib_increment_combined_counter (cm, thread_index, lb_index2, 1, - vlib_buffer_length_in_chain (vm, p2) - + sizeof (ethernet_header_t)); + vlib_buffer_length_in_chain (vm, p2)); vlib_increment_combined_counter (cm, thread_index, lb_index3, 1, - vlib_buffer_length_in_chain (vm, p3) - + sizeof (ethernet_header_t)); + vlib_buffer_length_in_chain (vm, p3)); vlib_validate_buffer_enqueue_x4 (vm, node, next, to_next, n_left_to_next, @@ -318,7 +328,6 @@ ip4_lookup_inline (vlib_main_t * vm, { vlib_buffer_t *p0; ip4_header_t *ip0; - __attribute__ ((unused)) tcp_header_t *tcp0; ip_lookup_next_t next0; const load_balance_t *lb0; ip4_fib_mtrie_t *mtrie0; @@ -352,8 +361,6 @@ ip4_lookup_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); } - tcp0 = (void *) (ip0 + 1); - if (!lookup_for_responses_to_locally_received_packets) leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); @@ -371,6 +378,9 @@ ip4_lookup_inline (vlib_main_t * vm, ASSERT (lbi0); lb0 = load_balance_get (lbi0); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + /* Use flow hash to compute multipath adjacency. */ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) @@ -379,20 +389,22 @@ ip4_lookup_inline (vlib_main_t * vm, hash_c0 = vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash (ip0, flow_hash_config0); + dpo0 = + load_balance_get_fwd_bucket (lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - - dpo0 = load_balance_get_bucket_i (lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); next0 = dpo0->dpoi_next_node; vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, + p0)); from += 1; to_next += 1; @@ -555,6 +567,12 @@ ip4_load_balance (vlib_main_t * vm, hc0 = vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash (ip0, lb0->lb_hash_config); } + dpo0 = load_balance_get_fwd_bucket + (lb0, (hc0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) { @@ -568,14 +586,13 @@ ip4_load_balance (vlib_main_t * vm, hc1 = vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash (ip1, lb1->lb_hash_config); } + dpo1 = load_balance_get_fwd_bucket + (lb1, (hc1 & (lb1->lb_n_buckets_minus_1))); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); } - - dpo0 = - load_balance_get_bucket_i (lb0, - hc0 & (lb0->lb_n_buckets_minus_1)); - dpo1 = - load_balance_get_bucket_i (lb1, - hc1 & (lb1->lb_n_buckets_minus_1)); next0 = dpo0->dpoi_next_node; next1 = dpo1->dpoi_next_node; @@ -629,11 +646,13 @@ ip4_load_balance (vlib_main_t * vm, hc0 = vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash (ip0, lb0->lb_hash_config); } + dpo0 = load_balance_get_fwd_bucket + (lb0, (hc0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } - - dpo0 = - load_balance_get_bucket_i (lb0, - hc0 & (lb0->lb_n_buckets_minus_1)); next0 = dpo0->dpoi_next_node; vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 98bfd4d1713..3bc07d0e679 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -45,7 +45,7 @@ #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */ #include <vnet/fib/ip6_fib.h> #include <vnet/mfib/ip6_mfib.h> -#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> #include <vnet/dpo/classify_dpo.h> #include <vppinfra/bihash_template.c> @@ -138,6 +138,10 @@ ip6_lookup_inline (vlib_main_t * vm, lb0 = load_balance_get (lbi0); lb1 = load_balance_get (lbi1); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (is_pow2 (lb1->lb_n_buckets)); vnet_buffer (p0)->ip.flow_hash = vnet_buffer (p1)->ip.flow_hash = 0; @@ -146,25 +150,29 @@ ip6_lookup_inline (vlib_main_t * vm, flow_hash_config0 = lb0->lb_hash_config; vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, flow_hash_config0); + dpo0 = + load_balance_get_fwd_bucket (lb0, + (vnet_buffer (p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) { flow_hash_config1 = lb1->lb_hash_config; vnet_buffer (p1)->ip.flow_hash = ip6_compute_flow_hash (ip1, flow_hash_config1); + dpo1 = + load_balance_get_fwd_bucket (lb1, + (vnet_buffer (p1)->ip.flow_hash & + (lb1->lb_n_buckets_minus_1))); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); } - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - dpo0 = load_balance_get_bucket_i (lb0, - (vnet_buffer (p0)->ip.flow_hash & - lb0->lb_n_buckets_minus_1)); - dpo1 = load_balance_get_bucket_i (lb1, - (vnet_buffer (p1)->ip.flow_hash & - lb1->lb_n_buckets_minus_1)); - next0 = dpo0->dpoi_next_node; next1 = dpo1->dpoi_next_node; @@ -266,16 +274,24 @@ ip6_lookup_inline (vlib_main_t * vm, lb0 = load_balance_get (lbi0); vnet_buffer (p0)->ip.flow_hash = 0; + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) { flow_hash_config0 = lb0->lb_hash_config; vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, flow_hash_config0); + dpo0 = + load_balance_get_fwd_bucket (lb0, + (vnet_buffer (p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); dpo0 = load_balance_get_bucket_i (lb0, (vnet_buffer (p0)->ip.flow_hash & lb0->lb_n_buckets_minus_1)); @@ -337,10 +353,18 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, { fib_node_index_t fei; - fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP6, NULL, /* No next-hop address */ - sw_if_index, ~0, // invalid FIB index - 1, NULL, // no label stack - FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_entry_update_one_path (fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP6, + /* No next-hop address */ + NULL, sw_if_index, + /* invalid FIB index */ + ~0, 1, + /* no label stack */ + NULL, FIB_ROUTE_PATH_FLAG_NONE); a->neighbor_probe_adj_index = fib_entry_get_adj (fei); } @@ -366,7 +390,13 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, } } - fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP6, &pfx.fp_addr, sw_if_index, ~0, // invalid FIB index + fib_table_entry_update_one_path (fib_index, &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP6, + &pfx.fp_addr, + sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); } @@ -780,6 +810,14 @@ ip6_load_balance (vlib_main_t * vm, hc0 = vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, lb0->lb_hash_config); } + dpo0 = + load_balance_get_fwd_bucket (lb0, + (hc0 & + lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) { @@ -793,14 +831,15 @@ ip6_load_balance (vlib_main_t * vm, hc1 = vnet_buffer (p1)->ip.flow_hash = ip6_compute_flow_hash (ip1, lb1->lb_hash_config); } + dpo1 = + load_balance_get_fwd_bucket (lb1, + (hc1 & + lb1->lb_n_buckets_minus_1)); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); } - - dpo0 = - load_balance_get_bucket_i (lb0, - hc0 & (lb0->lb_n_buckets_minus_1)); - dpo1 = - load_balance_get_bucket_i (lb1, - hc1 & (lb1->lb_n_buckets_minus_1)); next0 = dpo0->dpoi_next_node; next1 = dpo1->dpoi_next_node; @@ -869,10 +908,15 @@ ip6_load_balance (vlib_main_t * vm, hc0 = vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, lb0->lb_hash_config); } + dpo0 = + load_balance_get_fwd_bucket (lb0, + (hc0 & + lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } - dpo0 = - load_balance_get_bucket_i (lb0, - hc0 & (lb0->lb_n_buckets_minus_1)); next0 = dpo0->dpoi_next_node; vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 31182770261..ee80ee3d4b4 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -630,7 +630,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, n->fib_entry_index = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_NONE, + FIB_ENTRY_FLAG_ATTACHED, FIB_PROTOCOL_IP6, &pfx.fp_addr, n->key.sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c index 3c6be7e85ec..4b8a3eef13b 100644 --- a/src/vnet/mpls/mpls_lookup.c +++ b/src/vnet/mpls/mpls_lookup.c @@ -19,7 +19,7 @@ #include <vnet/pg/pg.h> #include <vnet/mpls/mpls.h> #include <vnet/fib/mpls_fib.h> -#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> #include <vnet/dpo/replicate_dpo.h> /** @@ -47,7 +47,7 @@ format_mpls_lookup_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *); - s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %d" + s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %#x " "label %d eos %d", t->next_index, t->lfib_index, t->lb_index, t->hash, vnet_mpls_uc_get_label( @@ -64,8 +64,15 @@ always_inline u32 mpls_compute_flow_hash (const mpls_unicast_header_t * hdr, flow_hash_config_t flow_hash_config) { - // FIXME - return (vnet_mpls_uc_get_label(hdr->label_exp_s_ttl)); + /* + * improve this to include: + * - all labels in the stack. + * - recognise entropy labels. + * + * We need to byte swap so we use the numerical value. i.e. an odd label + * leads to an odd bucket. ass opposed to a label above and below value X. + */ + return (vnet_mpls_uc_get_label(clib_net_to_host_u32(hdr->label_exp_s_ttl))); } static inline uword @@ -179,17 +186,21 @@ mpls_lookup (vlib_main_t * vm, else { lb0 = load_balance_get(lbi0); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { hash_c0 = vnet_buffer (b0)->ip.flow_hash = mpls_compute_flow_hash(h0, lb0->lb_hash_config); + dpo0 = load_balance_get_fwd_bucket + (lb0, + (hash_c0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - dpo0 = load_balance_get_bucket_i(lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); next0 = dpo0->dpoi_next_node; vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; @@ -207,17 +218,21 @@ mpls_lookup (vlib_main_t * vm, else { lb1 = load_balance_get(lbi1); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) { hash_c1 = vnet_buffer (b1)->ip.flow_hash = mpls_compute_flow_hash(h1, lb1->lb_hash_config); + dpo1 = load_balance_get_fwd_bucket + (lb1, + (hash_c1 & (lb1->lb_n_buckets_minus_1))); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); } - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - dpo1 = load_balance_get_bucket_i(lb1, - (hash_c1 & - (lb1->lb_n_buckets_minus_1))); next1 = dpo1->dpoi_next_node; vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; @@ -235,17 +250,21 @@ mpls_lookup (vlib_main_t * vm, else { lb2 = load_balance_get(lbi2); + ASSERT (lb2->lb_n_buckets > 0); + ASSERT (is_pow2 (lb2->lb_n_buckets)); if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) { hash_c2 = vnet_buffer (b2)->ip.flow_hash = mpls_compute_flow_hash(h2, lb2->lb_hash_config); + dpo2 = load_balance_get_fwd_bucket + (lb2, + (hash_c2 & (lb2->lb_n_buckets_minus_1))); + } + else + { + dpo2 = load_balance_get_bucket_i (lb2, 0); } - ASSERT (lb2->lb_n_buckets > 0); - ASSERT (is_pow2 (lb2->lb_n_buckets)); - dpo2 = load_balance_get_bucket_i(lb2, - (hash_c2 & - (lb2->lb_n_buckets_minus_1))); next2 = dpo2->dpoi_next_node; vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; @@ -263,17 +282,21 @@ mpls_lookup (vlib_main_t * vm, else { lb3 = load_balance_get(lbi3); + ASSERT (lb3->lb_n_buckets > 0); + ASSERT (is_pow2 (lb3->lb_n_buckets)); if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) { hash_c3 = vnet_buffer (b3)->ip.flow_hash = mpls_compute_flow_hash(h3, lb3->lb_hash_config); + dpo3 = load_balance_get_fwd_bucket + (lb3, + (hash_c3 & (lb3->lb_n_buckets_minus_1))); + } + else + { + dpo3 = load_balance_get_bucket_i (lb3, 0); } - ASSERT (lb3->lb_n_buckets > 0); - ASSERT (is_pow2 (lb3->lb_n_buckets)); - dpo3 = load_balance_get_bucket_i(lb3, - (hash_c3 & - (lb3->lb_n_buckets_minus_1))); next3 = dpo3->dpoi_next_node; vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; @@ -393,20 +416,21 @@ mpls_lookup (vlib_main_t * vm, else { lb0 = load_balance_get(lbi0); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { hash_c0 = vnet_buffer (b0)->ip.flow_hash = mpls_compute_flow_hash(h0, lb0->lb_hash_config); + dpo0 = load_balance_get_fwd_bucket + (lb0, + (hash_c0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - - dpo0 = load_balance_get_bucket_i(lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - next0 = dpo0->dpoi_next_node; vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; @@ -467,7 +491,7 @@ VLIB_REGISTER_NODE (mpls_lookup_node, static) = { .n_errors = MPLS_N_ERROR, .error_strings = mpls_error_strings, - .sibling_of = "ip4-lookup", + .sibling_of = "mpls-load-balance", .format_buffer = format_mpls_header, .format_trace = format_mpls_lookup_trace, @@ -574,6 +598,11 @@ mpls_load_balance (vlib_main_t * vm, { hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); } + dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) { @@ -585,10 +614,12 @@ mpls_load_balance (vlib_main_t * vm, { hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1); } + dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1)); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); } - - dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); - dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1)); next0 = dpo0->dpoi_next_node; next1 = dpo1->dpoi_next_node; @@ -650,9 +681,12 @@ mpls_load_balance (vlib_main_t * vm, { hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); } + dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); } - - dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); next0 = dpo0->dpoi_next_node; vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; @@ -676,9 +710,13 @@ VLIB_REGISTER_NODE (mpls_load_balance_node) = { .function = mpls_load_balance, .name = "mpls-load-balance", .vector_size = sizeof (u32), - .sibling_of = "mpls-lookup", - .format_trace = format_mpls_load_balance_trace, + .n_next_nodes = 1, + .next_nodes = + { + [0] = "mpls-drop", + }, + }; VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance) |