diff options
-rw-r--r-- | src/vnet/cop/ip4_whitelist.c | 9 | ||||
-rw-r--r-- | src/vnet/dpo/load_balance.c | 6 | ||||
-rw-r--r-- | src/vnet/dpo/lookup_dpo.c | 4 | ||||
-rw-r--r-- | src/vnet/fib/fib.c | 2 | ||||
-rw-r--r-- | src/vnet/fib/fib_entry.c | 4 | ||||
-rw-r--r-- | src/vnet/fib/fib_path.c | 1 | ||||
-rw-r--r-- | src/vnet/fib/fib_table.c | 24 | ||||
-rw-r--r-- | src/vnet/fib/fib_table.h | 12 | ||||
-rw-r--r-- | src/vnet/fib/fib_test.c | 6 | ||||
-rw-r--r-- | src/vnet/fib/ip4_fib.c | 60 | ||||
-rw-r--r-- | src/vnet/fib/ip4_fib.h | 35 | ||||
-rw-r--r-- | src/vnet/fib/ip6_fib.c | 18 | ||||
-rw-r--r-- | src/vnet/fib/ip6_fib.h | 2 | ||||
-rw-r--r-- | src/vnet/fib/mpls_fib.c | 21 | ||||
-rw-r--r-- | src/vnet/fib/mpls_fib.h | 28 | ||||
-rw-r--r-- | src/vnet/ip/ip4.h | 29 | ||||
-rw-r--r-- | src/vnet/ip/ip4_forward.c | 21 | ||||
-rw-r--r-- | src/vnet/ip/ip4_mtrie.c | 611 | ||||
-rw-r--r-- | src/vnet/ip/ip4_mtrie.h | 106 | ||||
-rw-r--r-- | src/vnet/ip/ip4_packet.h | 1 | ||||
-rw-r--r-- | src/vnet/ip/ip4_source_check.c | 8 | ||||
-rw-r--r-- | src/vnet/ip/ip6.h | 3 | ||||
-rw-r--r-- | src/vnet/ip/ip_api.c | 43 | ||||
-rw-r--r-- | src/vnet/mpls/interface.c | 1 | ||||
-rw-r--r-- | src/vnet/mpls/mpls.h | 26 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_api.c | 38 | ||||
-rw-r--r-- | src/vpp/api/api.c | 6 | ||||
-rw-r--r-- | src/vpp/stats/stats.c | 8 |
28 files changed, 720 insertions, 413 deletions
diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c index ccb9dc03..6ef3d7d7 100644 --- a/src/vnet/cop/ip4_whitelist.c +++ b/src/vnet/cop/ip4_whitelist.c @@ -128,9 +128,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, @@ -167,9 +164,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 1); - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, @@ -264,9 +258,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index d5e98e4e..6b0eda0e 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -827,14 +827,18 @@ const static char* const * const load_balance_nodes[DPO_PROTO_NUM] = void load_balance_module_init (void) { + index_t lbi; + dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes); /* * Special LB with index zero. we need to define this since the v4 mtrie * assumes an index of 0 implies the ply is empty. therefore all 'real' * adjs need a non-zero index. + * This should never be used, but just in case, stack it on a drop. */ - load_balance_create(0, DPO_PROTO_IP4, 0); + lbi = load_balance_create(1, DPO_PROTO_IP4, 0); + load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4)); load_balance_map_module_init(); } diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 3726c8fe..e94e871c 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -211,7 +211,6 @@ ip4_src_fib_lookup_one (u32 src_fib_index0, mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); @@ -235,9 +234,6 @@ ip4_src_fib_lookup_two (u32 src_fib_index0, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2); diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c index 413f93e8..b430e113 100644 --- a/src/vnet/fib/fib.c +++ b/src/vnet/fib/fib.c @@ -28,6 +28,8 @@ fib_module_init (vlib_main_t * vm) return (error); if ((error = vlib_call_init_function (vm, adj_module_init))) return (error); + if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init))) + return (error); fib_entry_module_init(); fib_entry_src_module_init(); diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index 25005e11..6ac5461d 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -924,10 +924,10 @@ fib_entry_path_remove (fib_node_index_t fib_entry_index, /* * no more sources left. this entry is toast. */ - fib_entry_src_action_uninstall(fib_entry); fib_entry = fib_entry_post_flag_update_actions(fib_entry, source, bflags); + fib_entry_src_action_uninstall(fib_entry); return (FIB_ENTRY_SRC_FLAG_NONE); } @@ -1014,10 +1014,10 @@ fib_entry_special_remove (fib_node_index_t fib_entry_index, /* * no more sources left. this entry is toast. */ - fib_entry_src_action_uninstall(fib_entry); fib_entry = fib_entry_post_flag_update_actions(fib_entry, source, bflags); + fib_entry_src_action_uninstall(fib_entry); return (FIB_ENTRY_SRC_FLAG_NONE); } diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 3ed309f3..928a9d43 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -32,6 +32,7 @@ #include <vnet/fib/fib_path_list.h> #include <vnet/fib/fib_internal.h> #include <vnet/fib/fib_urpf_list.h> +#include <vnet/fib/mpls_fib.h> /** * Enurmeration of path types diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 7818d02e..6c3162e7 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -47,7 +47,7 @@ fib_table_lookup_i (fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - return (ip4_fib_table_lookup(&fib_table->v4, + return (ip4_fib_table_lookup(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len)); case FIB_PROTOCOL_IP6: @@ -55,7 +55,7 @@ fib_table_lookup_i (fib_table_t *fib_table, &prefix->fp_addr.ip6, prefix->fp_len)); case FIB_PROTOCOL_MPLS: - return (mpls_fib_table_lookup(&fib_table->mpls, + return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos)); } @@ -76,7 +76,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - return (ip4_fib_table_lookup_exact_match(&fib_table->v4, + return (ip4_fib_table_lookup_exact_match(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len)); case FIB_PROTOCOL_IP6: @@ -84,7 +84,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table, &prefix->fp_addr.ip6, prefix->fp_len)); case FIB_PROTOCOL_MPLS: - return (mpls_fib_table_lookup(&fib_table->mpls, + return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos)); } @@ -148,7 +148,7 @@ fib_table_entry_remove (fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - ip4_fib_table_entry_remove(&fib_table->v4, + ip4_fib_table_entry_remove(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len); break; @@ -158,7 +158,7 @@ fib_table_entry_remove (fib_table_t *fib_table, prefix->fp_len); break; case FIB_PROTOCOL_MPLS: - mpls_fib_table_entry_remove(&fib_table->mpls, + mpls_fib_table_entry_remove(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos); break; @@ -208,7 +208,7 @@ fib_table_entry_insert (fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - ip4_fib_table_entry_insert(&fib_table->v4, + ip4_fib_table_entry_insert(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len, fib_entry_index); @@ -220,7 +220,7 @@ fib_table_entry_insert (fib_table_t *fib_table, fib_entry_index); break; case FIB_PROTOCOL_MPLS: - mpls_fib_table_entry_insert(&fib_table->mpls, + mpls_fib_table_entry_insert(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos, fib_entry_index); @@ -270,7 +270,9 @@ fib_table_fwding_dpo_remove (u32 fib_index, return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index), &prefix->fp_addr.ip4, prefix->fp_len, - dpo)); + dpo, + fib_table_get_less_specific(fib_index, + prefix))); case FIB_PROTOCOL_IP6: return (ip6_fib_table_fwding_dpo_remove(fib_index, &prefix->fp_addr.ip6, @@ -1034,13 +1036,13 @@ fib_table_destroy (fib_table_t *fib_table) switch (fib_table->ft_proto) { case FIB_PROTOCOL_IP4: - ip4_fib_table_destroy(&fib_table->v4); + ip4_fib_table_destroy(fib_table->ft_index); break; case FIB_PROTOCOL_IP6: ip6_fib_table_destroy(fib_table->ft_index); break; case FIB_PROTOCOL_MPLS: - mpls_fib_table_destroy(&fib_table->mpls); + mpls_fib_table_destroy(fib_table->ft_index); break; } } diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index e7e66acb..b310aea6 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -29,18 +29,6 @@ typedef struct fib_table_t_ { /** - * A union of the protocol specific FIBs that provide the - * underlying LPM mechanism. - * This element is first in the struct so that it is in the - * first cache line. - */ - union { - ip4_fib_t v4; - ip6_fib_t v6; - mpls_fib_t mpls; - }; - - /** * Which protocol this table serves. Used to switch on the union above. */ fib_protocol_t ft_proto; diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 1a9cce24..92141ddf 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -40,8 +40,6 @@ fformat(stderr, "FAIL:%d: " _comment "\n", \ __LINE__, ##_args); \ } else { \ - fformat(stderr, "PASS:%d: " _comment "\n", \ - __LINE__, ##_args); \ } \ _evald; \ }) @@ -5727,7 +5725,7 @@ fib_test_label (void) &a_o_10_10_11_1, &adj_o_10_10_11_2), "1.1.1.1/32 LB 2 buckets via: " - "adj over 10.10.11.1", + "adj over 10.10.11.1, " "adj-v4 over 10.10.11.2"); fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, @@ -5738,7 +5736,7 @@ fib_test_label (void) &a_o_10_10_11_1, &adj_o_10_10_11_2), "24001/eos LB 2 buckets via: " - "adj over 10.10.11.1", + "adj over 10.10.11.1, " "adj-v4 over 10.10.11.2"); fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index a7915620..98d4e52f 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -104,29 +104,35 @@ static u32 ip4_create_fib_with_table_id (u32 table_id) { fib_table_t *fib_table; + ip4_fib_t *v4_fib; pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); memset(fib_table, 0, sizeof(*fib_table)); + pool_get_aligned(ip4_main.v4_fibs, v4_fib, CLIB_CACHE_LINE_BYTES); + + ASSERT((fib_table - ip4_main.fibs) == + (v4_fib - ip4_main.v4_fibs)); + fib_table->ft_proto = FIB_PROTOCOL_IP4; fib_table->ft_index = - fib_table->v4.index = + v4_fib->index = (fib_table - ip4_main.fibs); hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index); fib_table->ft_table_id = - fib_table->v4.table_id = + v4_fib->table_id = table_id; fib_table->ft_flow_hash_config = - fib_table->v4.flow_hash_config = + v4_fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; - fib_table->v4.fwd_classify_table_index = ~0; - fib_table->v4.rev_classify_table_index = ~0; + v4_fib->fwd_classify_table_index = ~0; + v4_fib->rev_classify_table_index = ~0; fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4); - ip4_mtrie_init(&fib_table->v4.mtrie); + ip4_mtrie_init(&v4_fib->mtrie); /* * add the special entries into the new FIB @@ -151,9 +157,10 @@ ip4_create_fib_with_table_id (u32 table_id) } void -ip4_fib_table_destroy (ip4_fib_t *fib) +ip4_fib_table_destroy (u32 fib_index) { - fib_table_t *fib_table = (fib_table_t*)fib; + fib_table_t *fib_table = pool_elt_at_index(ip4_main.fibs, fib_index); + ip4_fib_t *v4_fib = pool_elt_at_index(ip4_main.v4_fibs, fib_index); int ii; /* @@ -185,6 +192,10 @@ ip4_fib_table_destroy (ip4_fib_t *fib) { hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id); } + + ip4_mtrie_free(&v4_fib->mtrie); + + pool_put(ip4_main.v4_fibs, v4_fib); pool_put(ip4_main.fibs, fib_table); } @@ -367,16 +378,33 @@ ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib, u32 len, const dpo_id_t *dpo) { - ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 0); // ADD + ip4_fib_mtrie_route_add(&fib->mtrie, addr, len, dpo->dpoi_index); } void ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib, const ip4_address_t *addr, u32 len, - const dpo_id_t *dpo) + const dpo_id_t *dpo, + u32 cover_index) { - ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE + fib_prefix_t cover_prefix = { + .fp_len = 0, + }; + const dpo_id_t *cover_dpo; + + /* + * We need to pass the MTRIE the LB index and address length of the + * covering prefix, so it can fill the plys with the correct replacement + * for the entry being removed + */ + fib_entry_get_prefix(cover_index, &cover_prefix); + cover_dpo = fib_entry_contribute_ip_forwarding(cover_index); + + ip4_fib_mtrie_route_del(&fib->mtrie, + addr, len, dpo->dpoi_index, + cover_prefix.fp_len, + cover_dpo->dpoi_index); } void @@ -498,7 +526,7 @@ ip4_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im4->fibs, ({ - ip4_fib_t *fib = &fib_table->v4; + ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index); if (table_id >= 0 && table_id != (int)fib->table_id) continue; @@ -523,6 +551,11 @@ ip4_show_fib (vlib_main_t * vm, } continue; } + if (mtrie) + { + vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); + continue; + } if (!matching) { @@ -532,9 +565,6 @@ ip4_show_fib (vlib_main_t * vm, { ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask); } - - if (mtrie) - vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); })); return 0; diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index 243fd77f..4cf9e58a 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -34,6 +34,33 @@ #include <vnet/ip/ip.h> #include <vnet/fib/fib_entry.h> #include <vnet/fib/fib_table.h> +#include <vnet/ip/ip4_mtrie.h> + +typedef struct ip4_fib_t_ +{ + /** + * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. + * First member so it's in the first cacheline. + */ + ip4_fib_mtrie_t mtrie; + + /* Hash table for each prefix length mapping. */ + uword *fib_entry_by_dst_address[33]; + + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; + + /* flow hash configuration */ + flow_hash_config_t flow_hash_config; + + /* N-tuple classifier indices */ + u32 fwd_classify_table_index; + u32 rev_classify_table_index; + +} ip4_fib_t; extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib, const ip4_address_t *addr, @@ -50,7 +77,7 @@ extern void ip4_fib_table_entry_insert(ip4_fib_t *fib, const ip4_address_t *addr, u32 len, fib_node_index_t fib_entry_index); -extern void ip4_fib_table_destroy(ip4_fib_t *fib); +extern void ip4_fib_table_destroy(u32 fib_index); extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib, const ip4_address_t *addr, @@ -60,7 +87,8 @@ extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib, extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib, const ip4_address_t *addr, u32 len, - const dpo_id_t *dpo); + const dpo_id_t *dpo, + fib_node_index_t cover_index); extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib, const ip4_address_t * dst); @@ -79,7 +107,7 @@ extern void ip4_fib_table_walk(ip4_fib_t *fib, static inline ip4_fib_t * ip4_fib_get (u32 index) { - return (&(pool_elt_at_index(ip4_main.fibs, index)->v4)); + return (pool_elt_at_index(ip4_main.v4_fibs, index)); } always_inline u32 @@ -134,7 +162,6 @@ ip4_fib_forwarding_lookup (u32 fib_index, mtrie = &ip4_fib_get(fib_index)->mtrie; leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr); - leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3); diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 343ff55e..0ee029d3 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -55,22 +55,29 @@ static u32 create_fib_with_table_id (u32 table_id) { fib_table_t *fib_table; + ip6_fib_t *v6_fib; pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + pool_get_aligned(ip6_main.v6_fibs, v6_fib, CLIB_CACHE_LINE_BYTES); + memset(fib_table, 0, sizeof(*fib_table)); + memset(v6_fib, 0, sizeof(*v6_fib)); + ASSERT((fib_table - ip6_main.fibs) == + (v6_fib - ip6_main.v6_fibs)); + fib_table->ft_proto = FIB_PROTOCOL_IP6; fib_table->ft_index = - fib_table->v6.index = - (fib_table - ip6_main.fibs); + v6_fib->index = + (fib_table - ip6_main.fibs); hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index); fib_table->ft_table_id = - fib_table->v6.table_id = + v6_fib->table_id = table_id; fib_table->ft_flow_hash_config = - fib_table->v6.flow_hash_config = + v6_fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; vnet_ip6_fib_init(fib_table->ft_index); @@ -188,6 +195,7 @@ ip6_fib_table_destroy (u32 fib_index) { hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id); } + pool_put_index(ip6_main.v6_fibs, fib_table->ft_index); pool_put(ip6_main.fibs, fib_table); } @@ -620,7 +628,7 @@ ip6_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im6->fibs, ({ - fib = &(fib_table->v6); + fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index); if (table_id >= 0 && table_id != (int)fib->table_id) continue; if (fib_index != ~0 && fib_index != (int)fib->index) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index af864a75..e2f28452 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -115,7 +115,7 @@ static inline ip6_fib_t * ip6_fib_get (fib_node_index_t index) { ASSERT(!pool_is_free_index(ip6_main.fibs, index)); - return (&pool_elt_at_index (ip6_main.fibs, index)->v6); + return (pool_elt_at_index (ip6_main.v6_fibs, index)); } static inline diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index 5cd0fd23..4b2b76ea 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -97,11 +97,15 @@ mpls_fib_create_with_table_id (u32 table_id) int i; pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + pool_get_aligned(mpls_main.mpls_fibs, mf, CLIB_CACHE_LINE_BYTES); + + ASSERT((fib_table - mpls_main.fibs) == + (mf - mpls_main.mpls_fibs)); + memset(fib_table, 0, sizeof(*fib_table)); fib_table->ft_proto = FIB_PROTOCOL_MPLS; - fib_table->ft_index = - (fib_table - mpls_main.fibs); + fib_table->ft_index = (fib_table - mpls_main.fibs); hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index); @@ -109,8 +113,6 @@ mpls_fib_create_with_table_id (u32 table_id) table_id; fib_table->ft_flow_hash_config = MPLS_FLOW_HASH_DEFAULT; - fib_table->v4.fwd_classify_table_index = ~0; - fib_table->v4.rev_classify_table_index = ~0; fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS); @@ -122,7 +124,6 @@ mpls_fib_create_with_table_id (u32 table_id) drop_dpo_get(DPO_PROTO_MPLS)); } - mf = &fib_table->mpls; mf->mf_entries = hash_create(0, sizeof(fib_node_index_t)); for (i = 0; i < MPLS_FIB_DB_SIZE; i++) { @@ -241,9 +242,10 @@ mpls_fib_table_create_and_lock (void) } void -mpls_fib_table_destroy (mpls_fib_t *mf) +mpls_fib_table_destroy (u32 fib_index) { - fib_table_t *fib_table = (fib_table_t*)mf; + fib_table_t *fib_table = pool_elt_at_index(mpls_main.fibs, fib_index); + mpls_fib_t *mf = pool_elt_at_index(mpls_main.mpls_fibs, fib_index); fib_prefix_t prefix = { .fp_proto = FIB_PROTOCOL_MPLS, }; @@ -274,6 +276,7 @@ mpls_fib_table_destroy (mpls_fib_t *mf) } hash_free(mf->mf_entries); + pool_put(mpls_main.mpls_fibs, mf); pool_put(mpls_main.fibs, fib_table); } @@ -436,11 +439,11 @@ mpls_fib_show (vlib_main_t * vm, if (MPLS_LABEL_INVALID == label) { - mpls_fib_table_show_all(&(fib_table->mpls), vm); + mpls_fib_table_show_all(mpls_fib_get(fib_table->ft_index), vm); } else { - mpls_fib_table_show_one(&(fib_table->mpls), label, vm); + mpls_fib_table_show_one(mpls_fib_get(fib_table->ft_index), label, vm); } })); diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h index 779decaa..78a61a14 100644 --- a/src/vnet/fib/mpls_fib.h +++ b/src/vnet/fib/mpls_fib.h @@ -25,10 +25,33 @@ #include <vnet/mpls/mpls.h> #include <vnet/fib/fib_table.h> +#define MPLS_FIB_DEFAULT_TABLE_ID 0 + +/** + * Type exposure is to allow the DP fast/inlined access + */ +#define MPLS_FIB_KEY_SIZE 21 +#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) + +typedef struct mpls_fib_t_ +{ + /** + * A hash table of entries. 21 bit key + * Hash table for reduced memory footprint + */ + uword * mf_entries; + + /** + * The load-balance indices keyed by 21 bit label+eos bit. + * A flat array for maximum lookup performace. + */ + index_t mf_lbs[MPLS_FIB_DB_SIZE]; +} mpls_fib_t; + static inline mpls_fib_t* mpls_fib_get (fib_node_index_t index) { - return (&(pool_elt_at_index(mpls_main.fibs, index)->mpls)); + return (pool_elt_at_index(mpls_main.mpls_fibs, index)); } extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id); @@ -56,8 +79,7 @@ extern void mpls_fib_table_entry_insert(mpls_fib_t *mf, mpls_label_t label, mpls_eos_bit_t eos, fib_node_index_t fei); -extern void mpls_fib_table_destroy(mpls_fib_t *mf); - +extern void mpls_fib_table_destroy(u32 fib_index); extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf, diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index 4e075d0f..71640def 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -40,34 +40,10 @@ #ifndef included_ip_ip4_h #define included_ip_ip4_h -#include <vnet/ip/ip4_mtrie.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/lookup.h> #include <vnet/feature/feature.h> -typedef struct ip4_fib_t -{ - /* Hash table for each prefix length mapping. */ - uword *fib_entry_by_dst_address[33]; - - /* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */ - ip4_fib_mtrie_t mtrie; - - /* Table ID (hash key) for this FIB. */ - u32 table_id; - - /* Index into FIB vector. */ - u32 index; - - /* flow hash configuration */ - flow_hash_config_t flow_hash_config; - - /* N-tuple classifier indices */ - u32 fwd_classify_table_index; - u32 rev_classify_table_index; - -} ip4_fib_t; - typedef struct ip4_mfib_t { /* Hash table for each prefix length mapping. */ @@ -111,6 +87,9 @@ typedef struct ip4_main_t /** Vector of FIBs. */ struct fib_table_t_ *fibs; + /** Vector of MTries. */ + struct ip4_fib_t_ *v4_fibs; + /** Vector of MFIBs. */ struct mfib_table_t_ *mfibs; @@ -284,8 +263,6 @@ serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main; int vnet_set_ip4_flow_hash (u32 table_id, flow_hash_config_t flow_hash_config); -void ip4_mtrie_init (ip4_fib_mtrie_t * m); - int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, u32 table_index); diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index ef6dded5..ee1703e7 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -182,7 +182,6 @@ ip4_lookup_inline (vlib_main_t * vm, mtrie2 = &ip4_fib_get (fib_index2)->mtrie; mtrie3 = &ip4_fib_get (fib_index3)->mtrie; - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1); leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2); @@ -196,14 +195,6 @@ ip4_lookup_inline (vlib_main_t * vm, if (!lookup_for_responses_to_locally_received_packets) { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1); - } - - if (!lookup_for_responses_to_locally_received_packets) - { leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2); @@ -364,9 +355,6 @@ ip4_lookup_inline (vlib_main_t * vm, tcp0 = (void *) (ip0 + 1); if (!lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); - - if (!lookup_for_responses_to_locally_received_packets) leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); if (!lookup_for_responses_to_locally_received_packets) @@ -1622,11 +1610,6 @@ ip4_local_inline (vlib_main_t * vm, good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; good_tcp_udp1 |= is_udp1 && udp1->checksum == 0; - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); - /* Verify UDP length. */ ip_len0 = clib_net_to_host_u16 (ip0->length); ip_len1 = clib_net_to_host_u16 (ip1->length); @@ -1812,9 +1795,6 @@ ip4_local_inline (vlib_main_t * vm, /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - /* Verify UDP length. */ ip_len0 = clib_net_to_host_u16 (ip0->length); udp_len0 = clib_net_to_host_u16 (udp0->length); @@ -2913,7 +2893,6 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) mtrie0 = &ip4_fib_get (fib_index0)->mtrie; leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3); diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index 317d8f10..adc95125 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -38,10 +38,17 @@ */ #include <vnet/ip/ip.h> -#include <vnet/fib/fib_entry.h> +#include <vnet/ip/ip4_mtrie.h> +#include <vnet/fib/ip4_fib.h> + + +/** + * Global pool of IPv4 8bit PLYs + */ +ip4_fib_mtrie_8_ply_t *ip4_ply_pool; always_inline u32 -ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte) +ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte) { /* * It's 'non-empty' if the length of the leaf stored is greater than the @@ -84,61 +91,83 @@ ip4_fib_mtrie_leaf_set_next_ply_index (u32 i) return l; } -static void -ply_init (ip4_fib_mtrie_ply_t * p, - ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len) -{ - /* - * A leaf is 'empty' if it represents a leaf from the covering PLY - * i.e. if the prefix length of the leaf is less than or equal to - * the prefix length of the PLY - */ - p->n_non_empty_leafs = (prefix_len > ply_base_len ? - ARRAY_LEN (p->leaves) : 0); - memset (p->dst_address_bits_of_leaves, prefix_len, - sizeof (p->dst_address_bits_of_leaves)); - p->dst_address_bits_base = ply_base_len; - - /* Initialize leaves. */ -#ifdef CLIB_HAVE_VEC128 - { - u32x4 *l, init_x4; - #ifndef __ALTIVEC__ - init_x4 = u32x4_splat (init); +#define PLY_X4_SPLAT_INIT(init_x4, init) \ + init_x4 = u32x4_splat (init); #else - { - u32x4_union_t y; - y.as_u32[0] = init; - y.as_u32[1] = init; - y.as_u32[2] = init; - y.as_u32[3] = init; - init_x4 = y.as_u32x4; - } +#define PLY_X4_SPLAT_INIT(init_x4, init) \ +{ \ + u32x4_union_t y; \ + y.as_u32[0] = init; \ + y.as_u32[1] = init; \ + y.as_u32[2] = init; \ + y.as_u32[3] = init; \ + init_x4 = y.as_u32x4; \ +} #endif - for (l = p->leaves_as_u32x4; - l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4) - { - l[0] = init_x4; - l[1] = init_x4; - l[2] = init_x4; - l[3] = init_x4; - } - } +#ifdef CLIB_HAVE_VEC128 +#define PLY_INIT_LEAVES(p) \ +{ \ + u32x4 *l, init_x4; \ + \ + PLY_X4_SPLAT_INIT(init_x4, init); \ + for (l = p->leaves_as_u32x4; \ + l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \ + l += 4) \ + { \ + l[0] = init_x4; \ + l[1] = init_x4; \ + l[2] = init_x4; \ + l[3] = init_x4; \ + } \ +} #else - { - u32 *l; - - for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) - { - l[0] = init; - l[1] = init; - l[2] = init; - l[3] = init; - } - } +#define PLY_INIT_LEAVES(p) \ +{ \ + u32 *l; \ + \ + for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \ + { \ + l[0] = init; \ + l[1] = init; \ + l[2] = init; \ + l[3] = init; \ + } \ +} #endif + +#define PLY_INIT(p, init, prefix_len, ply_base_len) \ +{ \ + /* \ + * A leaf is 'empty' if it represents a leaf from the covering PLY \ + * i.e. if the prefix length of the leaf is less than or equal to \ + * the prefix length of the PLY \ + */ \ + p->n_non_empty_leafs = (prefix_len > ply_base_len ? \ + ARRAY_LEN (p->leaves) : 0); \ + memset (p->dst_address_bits_of_leaves, prefix_len, \ + sizeof (p->dst_address_bits_of_leaves)); \ + p->dst_address_bits_base = ply_base_len; \ + \ + /* Initialize leaves. */ \ + PLY_INIT_LEAVES(p); \ +} + +static void +ply_8_init (ip4_fib_mtrie_8_ply_t * p, + ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len) +{ + PLY_INIT (p, init, prefix_len, ply_base_len); +} + +static void +ply_16_init (ip4_fib_mtrie_16_ply_t * p, + ip4_fib_mtrie_leaf_t init, uword prefix_len) +{ + memset (p->dst_address_bits_of_leaves, prefix_len, + sizeof (p->dst_address_bits_of_leaves)); + PLY_INIT_LEAVES (p); } static ip4_fib_mtrie_leaf_t @@ -146,49 +175,43 @@ ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf, u32 leaf_prefix_len, u32 ply_base_len) { - ip4_fib_mtrie_ply_t *p; + ip4_fib_mtrie_8_ply_t *p; /* Get cache aligned ply. */ - pool_get_aligned (m->ply_pool, p, sizeof (p[0])); + pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES); - ply_init (p, init_leaf, leaf_prefix_len, ply_base_len); - return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool); + ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len); + return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool); } -always_inline ip4_fib_mtrie_ply_t * +always_inline ip4_fib_mtrie_8_ply_t * get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l) { uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l); - /* It better not be the root ply. */ - ASSERT (n != 0); - return pool_elt_at_index (m->ply_pool, n); + + return pool_elt_at_index (ip4_ply_pool, n); } -static void -ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +void +ip4_mtrie_free (ip4_fib_mtrie_t * m) { - uword i, is_root; - - is_root = p - m->ply_pool == 0; - - for (i = 0; i < ARRAY_LEN (p->leaves); i++) + /* the root ply is embedded so the is nothing to do, + * the assumption being that the IP4 FIB table has emptied the trie + * before deletion. + */ +#if CLIB_DEBUG > 0 + int i; + for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++) { - ip4_fib_mtrie_leaf_t l = p->leaves[i]; - if (ip4_fib_mtrie_leaf_is_next_ply (l)) - ply_free (m, get_next_ply_for_leaf (m, l)); + ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i])); } - - if (is_root) - ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0); - else - pool_put (m->ply_pool, p); +#endif } void -ip4_fib_free (ip4_fib_mtrie_t * m) +ip4_mtrie_init (ip4_fib_mtrie_t * m) { - ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0); - ply_free (m, root_ply); + ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0); } typedef struct @@ -202,7 +225,7 @@ typedef struct static void set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, - ip4_fib_mtrie_ply_t * ply, + ip4_fib_mtrie_8_ply_t * ply, ip4_fib_mtrie_leaf_t new_leaf, uword new_leaf_dst_address_bits) { @@ -218,7 +241,8 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, /* Recurse into sub plies. */ if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf)) { - ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf); + ip4_fib_mtrie_8_ply_t *sub_ply = + get_next_ply_for_leaf (m, old_leaf); set_ply_with_more_specific_leaf (m, sub_ply, new_leaf, new_leaf_dst_address_bits); } @@ -237,16 +261,20 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, static void set_leaf (ip4_fib_mtrie_t * m, - ip4_fib_mtrie_set_unset_leaf_args_t * a, + const ip4_fib_mtrie_set_unset_leaf_args_t * a, u32 old_ply_index, u32 dst_address_byte_index) { ip4_fib_mtrie_leaf_t old_leaf, new_leaf; i32 n_dst_bits_next_plies; u8 dst_byte; + ip4_fib_mtrie_8_ply_t *old_ply; + + old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index); ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); + /* how many bits of the destination address are in the next PLY */ n_dst_bits_next_plies = a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1); @@ -255,30 +283,36 @@ set_leaf (ip4_fib_mtrie_t * m, /* Number of bits next plies <= 0 => insert leaves this ply. */ if (n_dst_bits_next_plies <= 0) { + /* The mask length of the address to insert maps to this ply */ uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + /* The number of bits, and hence slots/buckets, we will fill */ n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies); ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & pow2_mask (n_dst_bits_this_ply)) == 0); + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++) { - ip4_fib_mtrie_ply_t *old_ply, *new_ply; - - old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + ip4_fib_mtrie_8_ply_t *new_ply; old_leaf = old_ply->leaves[i]; old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); - /* Is leaf to be inserted more specific? */ if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i]) { + /* The new leaf is more or equally specific than the one currently + * occupying the slot */ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); if (old_leaf_is_terminal) { + /* The current leaf is terminal, we can replace it with + * the new one */ old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); + old_ply->dst_address_bits_of_leaves[i] = a->dst_address_length; __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf, @@ -292,32 +326,42 @@ set_leaf (ip4_fib_mtrie_t * m, } else { - /* Existing leaf points to another ply. We need to place new_leaf into all - more specific slots. */ + /* Existing leaf points to another ply. We need to place + * new_leaf into all more specific slots. */ new_ply = get_next_ply_for_leaf (m, old_leaf); set_ply_with_more_specific_leaf (m, new_ply, new_leaf, a->dst_address_length); } } - else if (!old_leaf_is_terminal) { + /* The current leaf is less specific and not termial (i.e. a ply), + * recurse on down the trie */ new_ply = get_next_ply_for_leaf (m, old_leaf); - set_leaf (m, a, new_ply - m->ply_pool, + set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1); } + /* + * else + * the route we are adding is less specific than the leaf currently + * occupying this slot. leave it there + */ } } else { - ip4_fib_mtrie_ply_t *old_ply, *new_ply; + /* The address to insert requires us to move down at a lower level of + * the trie - recurse on down */ + ip4_fib_mtrie_8_ply_t *new_ply; u8 ply_base_len; ply_base_len = 8 * (dst_address_byte_index + 1); - old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + old_leaf = old_ply->leaves[dst_byte]; + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) { + /* There is a leaf occupying the slot. Replace it with a new ply */ old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); @@ -328,28 +372,143 @@ set_leaf (ip4_fib_mtrie_t * m, new_ply = get_next_ply_for_leaf (m, new_leaf); /* Refetch since ply_create may move pool. */ - old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index); __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, new_leaf); ASSERT (old_ply->leaves[dst_byte] == new_leaf); old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; - /* Account for the ply we just created. */ - old_ply->n_non_empty_leafs += 1; + old_ply->n_non_empty_leafs += + ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); ASSERT (old_ply->n_non_empty_leafs >= 0); } else new_ply = get_next_ply_for_leaf (m, old_leaf); - set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1); + set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1); + } +} + +static void +set_root_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a) +{ + ip4_fib_mtrie_leaf_t old_leaf, new_leaf; + ip4_fib_mtrie_16_ply_t *old_ply; + i32 n_dst_bits_next_plies; + u16 dst_byte; + + old_ply = &m->root_ply; + + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); + + /* how many bits of the destination address are in the next PLY */ + n_dst_bits_next_plies = a->dst_address_length - BITS (u16); + + dst_byte = a->dst_address.as_u16[0]; + + /* Number of bits next plies <= 0 => insert leaves this ply. */ + if (n_dst_bits_next_plies <= 0) + { + /* The mask length of the address to insert maps to this ply */ + uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + + /* The number of bits, and hence slots/buckets, we will fill */ + n_dst_bits_this_ply = 16 - a->dst_address_length; + ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) & + pow2_mask (n_dst_bits_this_ply)) == 0); + + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ + for (i = 0; i < (1 << n_dst_bits_this_ply); i++) + { + ip4_fib_mtrie_8_ply_t *new_ply; + u16 slot; + + slot = clib_net_to_host_u16 (dst_byte); + slot += i; + slot = clib_host_to_net_u16 (slot); + + old_leaf = old_ply->leaves[slot]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (a->dst_address_length >= + old_ply->dst_address_bits_of_leaves[slot]) + { + /* The new leaf is more or equally specific than the one currently + * occupying the slot */ + new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + if (old_leaf_is_terminal) + { + /* The current leaf is terminal, we can replace it with + * the new one */ + old_ply->dst_address_bits_of_leaves[slot] = + a->dst_address_length; + __sync_val_compare_and_swap (&old_ply->leaves[slot], + old_leaf, new_leaf); + ASSERT (old_ply->leaves[slot] == new_leaf); + } + else + { + /* Existing leaf points to another ply. We need to place + * new_leaf into all more specific slots. */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, new_ply, new_leaf, + a->dst_address_length); + } + } + else if (!old_leaf_is_terminal) + { + /* The current leaf is less specific and not termial (i.e. a ply), + * recurse on down the trie */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_leaf (m, a, new_ply - ip4_ply_pool, 2); + } + /* + * else + * the route we are adding is less specific than the leaf currently + * occupying this slot. leave it there + */ + } + } + else + { + /* The address to insert requires us to move down at a lower level of + * the trie - recurse on down */ + ip4_fib_mtrie_8_ply_t *new_ply; + u8 ply_base_len; + + ply_base_len = 16; + + old_leaf = old_ply->leaves[dst_byte]; + + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + /* There is a leaf occupying the slot. Replace it with a new ply */ + new_leaf = ply_create (m, old_leaf, + clib_max (old_ply->dst_address_bits_of_leaves + [dst_byte], ply_base_len), + ply_base_len); + new_ply = get_next_ply_for_leaf (m, new_leaf); + + __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, + new_leaf); + ASSERT (old_ply->leaves[dst_byte] == new_leaf); + old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; + } + else + new_ply = get_next_ply_for_leaf (m, old_leaf); + + set_leaf (m, a, new_ply - ip4_ply_pool, 2); } } static uword unset_leaf (ip4_fib_mtrie_t * m, - ip4_fib_mtrie_set_unset_leaf_args_t * a, - ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index) + const ip4_fib_mtrie_set_unset_leaf_args_t * a, + ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index) { ip4_fib_mtrie_leaf_t old_leaf, del_leaf; i32 n_dst_bits_next_plies; @@ -397,7 +556,7 @@ unset_leaf (ip4_fib_mtrie_t * m, ASSERT (old_ply->n_non_empty_leafs >= 0); if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0) { - pool_put (m->ply_pool, old_ply); + pool_put (ip4_ply_pool, old_ply); /* Old ply was deleted. */ return 1; } @@ -419,106 +578,120 @@ unset_leaf (ip4_fib_mtrie_t * m, return 0; } -void -ip4_mtrie_init (ip4_fib_mtrie_t * m) +static void +unset_root_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a) { - ip4_fib_mtrie_leaf_t root; - memset (m, 0, sizeof (m[0])); - root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0); - ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0); -} + ip4_fib_mtrie_leaf_t old_leaf, del_leaf; + i32 n_dst_bits_next_plies; + i32 i, n_dst_bits_this_ply, old_leaf_is_terminal; + u16 dst_byte; + ip4_fib_mtrie_16_ply_t *old_ply; -void -ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, - ip4_address_t dst_address, - u32 dst_address_length, - u32 adj_index, u32 is_del) -{ - ip4_fib_mtrie_t *m = &fib->mtrie; - ip4_fib_mtrie_ply_t *root_ply; - ip4_fib_mtrie_set_unset_leaf_args_t a; - ip4_main_t *im = &ip4_main; + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); - ASSERT (m->ply_pool != 0); + old_ply = &m->root_ply; + n_dst_bits_next_plies = a->dst_address_length - BITS (u16); - root_ply = pool_elt_at_index (m->ply_pool, 0); + dst_byte = a->dst_address.as_u16[0]; - /* Honor dst_address_length. Fib masks are in network byte order */ - dst_address.as_u32 &= im->fib_masks[dst_address_length]; - a.dst_address = dst_address; - a.dst_address_length = dst_address_length; - a.adj_index = adj_index; + n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ? + (16 - a->dst_address_length) : 0); - if (!is_del) - { - set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); - } - else + del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ + for (i = 0; i < (1 << n_dst_bits_this_ply); i++) { - ip4_main_t *im = &ip4_main; + u16 slot; + + slot = clib_net_to_host_u16 (dst_byte); + slot += i; + slot = clib_host_to_net_u16 (slot); - if (dst_address_length) + old_leaf = old_ply->leaves[slot]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (old_leaf == del_leaf + || (!old_leaf_is_terminal + && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2))) { - word i; + old_ply->leaves[slot] = + ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index); + old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length; + } + } +} - /* If the ply was not deleted, then we need to fill the - * bucket just reset will the leaf from the less specfic - * cover. - * Find next less specific route and insert into mtrie. */ - for (i = dst_address_length - 1; i >= 0; i--) - { - uword *p; - index_t lbi; - ip4_address_t key; +void +ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, u32 adj_index) +{ + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t *im = &ip4_main; - if (!fib->fib_entry_by_dst_address[i]) - continue; + /* Honor dst_address_length. Fib masks are in network byte order */ + a.dst_address.as_u32 = (dst_address->as_u32 & + im->fib_masks[dst_address_length]); + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; - key.as_u32 = dst_address.as_u32 & im->fib_masks[i]; - p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32); - if (p) - { - lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index; - if (INDEX_INVALID == lbi) - continue; + set_root_leaf (m, &a); +} - a.cover_adj_index = lbi; - a.cover_address_length = i; +void +ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, + u32 adj_index, + u32 cover_address_length, u32 cover_adj_index) +{ + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t *im = &ip4_main; - break; - } - } - } - else - { - a.cover_adj_index = 0; - a.cover_address_length = 0; - } + /* Honor dst_address_length. Fib masks are in network byte order */ + a.dst_address.as_u32 = (dst_address->as_u32 & + im->fib_masks[dst_address_length]); + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; + a.cover_adj_index = cover_adj_index; + a.cover_address_length = cover_address_length; - /* the top level ply is never removed, so we can ignore the return code */ - unset_leaf (m, &a, root_ply, 0); - } + /* the top level ply is never removed */ + unset_root_leaf (m, &a); } /* Returns number of bytes of memory used by mtrie. */ static uword -mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p) { uword bytes, i; - if (!p) - { - if (pool_is_free_index (m->ply_pool, 0)) - return 0; - p = pool_elt_at_index (m->ply_pool, 0); - } - bytes = sizeof (p[0]); for (i = 0; i < ARRAY_LEN (p->leaves); i++) { ip4_fib_mtrie_leaf_t l = p->leaves[i]; if (ip4_fib_mtrie_leaf_is_next_ply (l)) - bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l)); + bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l)); + } + + return bytes; +} + +/* Returns number of bytes of memory used by mtrie. */ +static uword +mtrie_memory_usage (ip4_fib_mtrie_t * m) +{ + uword bytes, i; + + bytes = sizeof (*m); + for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++) + { + ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i]; + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l)); } return bytes; @@ -536,47 +709,49 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va) return s; } +#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \ +({ \ + u32 a, ia_length; \ + ip4_address_t ia; \ + ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \ + \ + a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \ + ia.as_u32 = clib_host_to_net_u32 (a); \ + ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \ + s = format (s, "\n%U%20U %U", \ + format_white_space, (_indent) + 2, \ + format_ip4_address_and_length, &ia, ia_length, \ + format_ip4_fib_mtrie_leaf, _l); \ + \ + if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \ + s = format (s, "\n%U%U", \ + format_white_space, (_indent) + 2, \ + format_ip4_fib_mtrie_ply, m, a, \ + ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \ + s; \ +}) + static u8 * format_ip4_fib_mtrie_ply (u8 * s, va_list * va) { ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); u32 base_address = va_arg (*va, u32); u32 ply_index = va_arg (*va, u32); - u32 dst_address_byte_index = va_arg (*va, u32); - ip4_fib_mtrie_ply_t *p; - uword i, indent; + ip4_fib_mtrie_8_ply_t *p; + uword indent; + int i; - p = pool_elt_at_index (m->ply_pool, ply_index); + p = pool_elt_at_index (ip4_ply_pool, ply_index); indent = format_get_indent (s); - s = - format (s, "ply index %d, %d non-empty leaves", ply_index, - p->n_non_empty_leafs); + s = format (s, "ply index %d, %d non-empty leaves", ply_index, + p->n_non_empty_leafs); + for (i = 0; i < ARRAY_LEN (p->leaves); i++) { - ip4_fib_mtrie_leaf_t l = p->leaves[i]; - if (ip4_fib_mtrie_leaf_is_non_empty (p, i)) { - u32 a, ia_length; - ip4_address_t ia; - - a = base_address + (i << (24 - 8 * dst_address_byte_index)); - ia.as_u32 = clib_host_to_net_u32 (a); - if (ip4_fib_mtrie_leaf_is_terminal (l)) - ia_length = p->dst_address_bits_of_leaves[i]; - else - ia_length = 8 * (1 + dst_address_byte_index); - s = format (s, "\n%U%20U %U", - format_white_space, indent + 2, - format_ip4_address_and_length, &ia, ia_length, - format_ip4_fib_mtrie_leaf, l); - - if (ip4_fib_mtrie_leaf_is_next_ply (l)) - s = format (s, "\n%U%U", - format_white_space, indent + 2, - format_ip4_fib_mtrie_ply, m, a, - ip4_fib_mtrie_leaf_get_next_ply_index (l), - dst_address_byte_index + 1); + FORMAT_PLY (s, p, i, base_address, + p->dst_address_bits_base + 8, indent); } } @@ -587,22 +762,44 @@ u8 * format_ip4_fib_mtrie (u8 * s, va_list * va) { ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); + ip4_fib_mtrie_16_ply_t *p; + u32 base_address = 0; + int i; - s = format (s, "%d plies, memory usage %U", - pool_elts (m->ply_pool), - format_memory_size, mtrie_memory_usage (m, 0)); + s = format (s, "%d plies, memory usage %U\n", + pool_elts (ip4_ply_pool), + format_memory_size, mtrie_memory_usage (m)); + s = format (s, "root-ply"); + p = &m->root_ply; - if (pool_elts (m->ply_pool) > 0) + for (i = 0; i < ARRAY_LEN (p->leaves); i++) { - ip4_address_t base_address; - base_address.as_u32 = 0; - s = - format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0); + u16 slot; + + slot = clib_host_to_net_u16 (i); + + if (p->dst_address_bits_of_leaves[slot] > 0) + { + FORMAT_PLY (s, p, slot, base_address, 16, 2); + } } return s; } +static clib_error_t * +ip4_mtrie_module_init (vlib_main_t * vm) +{ + /* Burn one ply so index 0 is taken */ + CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p); + + pool_get (ip4_ply_pool, p); + + return (NULL); +} + +VLIB_INIT_FUNCTION (ip4_mtrie_module_init); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h index 128195d3..be262c2c 100644 --- a/src/vnet/ip/ip4_mtrie.h +++ b/src/vnet/ip/ip4_mtrie.h @@ -47,16 +47,43 @@ /* ip4 fib leafs: 4 ply 8-8-8-8 mtrie. 1 + 2*adj_index for terminal leaves. - 0 + 2*next_ply_index for non-terminals. + 0 + 2*next_ply_index for non-terminals, i.e. PLYs 1 => empty (adjacency index of zero is special miss adjacency). */ typedef u32 ip4_fib_mtrie_leaf_t; #define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0) /** + * @brief the 16 way stride that is the top PLY of the mtrie + * We do not maintain the count of 'real' leaves in this PLY, since + * it is never removed. The FIB will destroy the mtrie and the ply once + * the FIB is destroyed. + */ +#define PLY_16_SIZE (1<<16) +typedef struct ip4_fib_mtrie_16_ply_t_ +{ + /** + * The leaves/slots/buckets to be filed with leafs + */ + union + { + ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE]; + +#ifdef CLIB_HAVE_VEC128 + u32x4 leaves_as_u32x4[PLY_16_SIZE / 4]; +#endif + }; + + /** + * Prefix length for terminal leaves. + */ + u8 dst_address_bits_of_leaves[PLY_16_SIZE]; +} ip4_fib_mtrie_16_ply_t; + +/** * @brief One ply of the 4 ply mtrie fib. */ -typedef struct +typedef struct ip4_fib_mtrie_8_ply_t_ { /** * The leaves/slots/buckets to be filed with leafs @@ -90,34 +117,72 @@ typedef struct /* Pad to cache line boundary. */ u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)]; } -ip4_fib_mtrie_ply_t; +ip4_fib_mtrie_8_ply_t; -STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES, +STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES, "IP4 Mtrie ply cache line"); +/** + * @brief The mutiway-TRIE. + * There is no data associated with the mtrie apart from the top PLY + */ typedef struct { - /* Pool of plies. Index zero is root ply. */ - ip4_fib_mtrie_ply_t *ply_pool; + /** + * Embed the PLY with the mtrie struct. This means that the Data-plane + * 'get me the mtrie' returns the first ply, and not an indirect 'pointer' + * to it. therefore no cachline misses in the data-path. + */ + ip4_fib_mtrie_16_ply_t root_ply; } ip4_fib_mtrie_t; -void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m); +/** + * @brief Initialise an mtrie + */ +void ip4_mtrie_init (ip4_fib_mtrie_t * m); -struct ip4_fib_t; +/** + * @brief Free an mtrie, It must be emty when free'd + */ +void ip4_mtrie_free (ip4_fib_mtrie_t * m); -void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f, - ip4_address_t dst_address, - u32 dst_address_length, - u32 adj_index, u32 is_del); +/** + * @brief Add a route/rntry to the mtrie + */ +void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, u32 adj_index); +/** + * @brief remove a route/rntry to the mtrie + */ +void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, + u32 adj_index, + u32 cover_address_length, u32 cover_adj_index); +/** + * @brief Format/display the contents of the mtrie + */ format_function_t format_ip4_fib_mtrie; +/** + * @brief A global pool of 8bit stride plys + */ +extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool; + +/** + * Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index) + */ always_inline u32 ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n) { return n & 1; } +/** + * From the stored slot value extract the LB index value + */ always_inline u32 ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) { @@ -125,35 +190,38 @@ ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) return n >> 1; } -/* Lookup step. Processes 1 byte of 4 byte ip4 address. */ +/** + * @brief Lookup step. Processes 1 byte of 4 byte ip4 address. + */ always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t current_leaf, const ip4_address_t * dst_address, u32 dst_address_byte_index) { - ip4_fib_mtrie_ply_t *ply; + ip4_fib_mtrie_8_ply_t *ply; + uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf); if (!current_is_terminal) { - ply = m->ply_pool + (current_leaf >> 1); + ply = ip4_ply_pool + (current_leaf >> 1); return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]); } return current_leaf; } -/* Lookup step. Processes 1 byte of 4 byte ip4 address. */ +/** + * @brief Lookup step number 1. Processes 2 bytes of 4 byte ip4 address. + */ always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m, const ip4_address_t * dst_address) { ip4_fib_mtrie_leaf_t next_leaf; - ip4_fib_mtrie_ply_t *ply; - ply = m->ply_pool; - next_leaf = ply->leaves[dst_address->as_u8[0]]; + next_leaf = m->root_ply.leaves[dst_address->as_u16[0]]; return next_leaf; } diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index b2c1fcd4..1ff9fbdb 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -52,6 +52,7 @@ typedef union u32 data_u32; /* Aliases. */ u8 as_u8[4]; + u16 as_u16[2]; u32 as_u32; } ip4_address_t; diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c index 7c2b7be8..6831066e 100644 --- a/src/vnet/ip/ip4_source_check.c +++ b/src/vnet/ip/ip4_source_check.c @@ -166,11 +166,6 @@ ip4_source_check_inline (vlib_main_t * vm, leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2); @@ -249,9 +244,6 @@ ip4_source_check_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); leaf0 = diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index 8fa9a479..bf7ec7d5 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -153,6 +153,9 @@ typedef struct ip6_main_t /* Pool of FIBs. */ struct fib_table_t_ *fibs; + /* Pool of V6 FIBs. */ + ip6_fib_t *v6_fibs; + /** Vector of MFIBs. */ struct mfib_table_t_ *mfibs; diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index e3a1fee8..b9f1782b 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -240,6 +240,21 @@ send_ip_fib_details (vpe_api_main_t * am, vl_msg_api_send_shmem (q, (u8 *) & mp); } +typedef struct vl_api_ip_fib_dump_walk_ctx_t_ +{ + fib_node_index_t *feis; +} vl_api_ip_fib_dump_walk_ctx_t; + +static int +vl_api_ip_fib_dump_walk (fib_node_index_t fei, void *arg) +{ + vl_api_ip_fib_dump_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->feis, fei); + + return (1); +} + static void vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) { @@ -247,12 +262,13 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) unix_shared_memory_queue_t *q; ip4_main_t *im = &ip4_main; fib_table_t *fib_table; - fib_node_index_t lfei, *lfeip, *lfeis = NULL; - mpls_label_t key; + fib_node_index_t *lfeip; fib_prefix_t pfx; u32 fib_index; fib_route_path_encode_t *api_rpaths; - int i; + vl_api_ip_fib_dump_walk_ctx_t ctx = { + .feis = NULL, + }; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -261,19 +277,16 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) /* *INDENT-OFF* */ pool_foreach (fib_table, im->fibs, ({ - for (i = 0; i < ARRAY_LEN (fib_table->v4.fib_entry_by_dst_address); i++) - { - hash_foreach(key, lfei, fib_table->v4.fib_entry_by_dst_address[i], - ({ - vec_add1(lfeis, lfei); - })); - } + fib_table_walk(fib_table->ft_index, + FIB_PROTOCOL_IP4, + vl_api_ip_fib_dump_walk, + &ctx); })); /* *INDENT-ON* */ - vec_sort_with_function (lfeis, fib_entry_cmp_for_sort); + vec_sort_with_function (ctx.feis, fib_entry_cmp_for_sort); - vec_foreach (lfeip, lfeis) + vec_foreach (lfeip, ctx.feis) { fib_entry_get_prefix (*lfeip, &pfx); fib_index = fib_entry_get_fib_index (*lfeip); @@ -286,7 +299,7 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) vec_free (api_rpaths); } - vec_free (lfeis); + vec_free (ctx.feis); } static void @@ -377,10 +390,10 @@ api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q, { vpe_api_main_t *am = &vpe_api_main; ip6_main_t *im6 = &ip6_main; - ip6_fib_t *fib = &fib_table->v6; fib_node_index_t *fib_entry_index; api_ip6_fib_show_ctx_t ctx = { - .fib_index = fib->index,.entries = NULL, + .fib_index = fib_table->ft_index, + .entries = NULL, }; fib_route_path_encode_t *api_rpaths; fib_prefix_t pfx; diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c index f631dc76..a085aaa2 100644 --- a/src/vnet/mpls/interface.c +++ b/src/vnet/mpls/interface.c @@ -18,6 +18,7 @@ #include <vnet/vnet.h> #include <vnet/pg/pg.h> #include <vnet/mpls/mpls.h> +#include <vnet/fib/mpls_fib.h> #include <vnet/fib/ip4_fib.h> #include <vnet/adj/adj_midchain.h> #include <vnet/dpo/classify_dpo.h> diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h index 300f2cfd..b0125e60 100644 --- a/src/vnet/mpls/mpls.h +++ b/src/vnet/mpls/mpls.h @@ -30,29 +30,6 @@ typedef enum { MPLS_N_ERROR, } mpls_error_t; -#define MPLS_FIB_DEFAULT_TABLE_ID 0 - -/** - * Type exposure is to allow the DP fast/inlined access - */ -#define MPLS_FIB_KEY_SIZE 21 -#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) - -typedef struct mpls_fib_t_ -{ - /** - * A hash table of entries. 21 bit key - * Hash table for reduced memory footprint - */ - uword * mf_entries; - - /** - * The load-balance indeices keyed by 21 bit label+eos bit. - * A flat array for maximum lookup performace. - */ - index_t mf_lbs[MPLS_FIB_DB_SIZE]; -} mpls_fib_t; - /** * @brief Definition of a callback for receiving MPLS interface state change * notifications @@ -67,6 +44,9 @@ typedef struct { /** A pool of all the MPLS FIBs */ struct fib_table_t_ *fibs; + /** A pool of all the MPLS FIBs */ + struct mpls_fib_t_ *mpls_fibs; + /** A hash table to lookup the mpls_fib by table ID */ uword *fib_index_by_table_id; diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index a36a5046..f1aef6c9 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -26,6 +26,7 @@ #include <vnet/mpls/mpls_tunnel.h> #include <vnet/fib/fib_table.h> #include <vnet/fib/fib_api.h> +#include <vnet/fib/mpls_fib.h> #include <vnet/vnet_msg_enum.h> @@ -369,6 +370,21 @@ send_mpls_fib_details (vpe_api_main_t * am, vl_msg_api_send_shmem (q, (u8 *) & mp); } +typedef struct vl_api_mpls_fib_dump_table_walk_ctx_t_ +{ + fib_node_index_t *lfeis; +} vl_api_mpls_fib_dump_table_walk_ctx_t; + +static int +vl_api_mpls_fib_dump_table_walk (fib_node_index_t fei, void *arg) +{ + vl_api_mpls_fib_dump_table_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->lfeis, fei); + + return (1); +} + static void vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) { @@ -376,28 +392,30 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) unix_shared_memory_queue_t *q; mpls_main_t *mm = &mpls_main; fib_table_t *fib_table; - fib_node_index_t lfei, *lfeip, *lfeis = NULL; - mpls_label_t key; + mpls_fib_t *mpls_fib; + fib_node_index_t *lfeip = NULL; fib_prefix_t pfx; u32 fib_index; fib_route_path_encode_t *api_rpaths; + vl_api_mpls_fib_dump_table_walk_ctx_t ctx = { + .lfeis = NULL, + }; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) return; /* *INDENT-OFF* */ - pool_foreach (fib_table, mm->fibs, + pool_foreach (mpls_fib, mm->mpls_fibs, ({ - hash_foreach(key, lfei, fib_table->mpls.mf_entries, - ({ - vec_add1(lfeis, lfei); - })); + mpls_fib_table_walk (mpls_fib, + vl_api_mpls_fib_dump_table_walk, + &ctx); })); /* *INDENT-ON* */ - vec_sort_with_function (lfeis, fib_entry_cmp_for_sort); + vec_sort_with_function (ctx.lfeis, fib_entry_cmp_for_sort); - vec_foreach (lfeip, lfeis) + vec_foreach (lfeip, ctx.lfeis) { fib_entry_get_prefix (*lfeip, &pfx); fib_index = fib_entry_get_fib_index (*lfeip); @@ -410,7 +428,7 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) vec_free (api_rpaths); } - vec_free (lfeis); + vec_free (ctx.lfeis); } /* diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 14ccd864..09ae8b8f 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -896,9 +896,10 @@ ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp) /* *INDENT-OFF* */ pool_foreach (fib_table, im4->fibs, ({ - fib = &fib_table->v4; vnet_sw_interface_t * si; + fib = pool_elt_at_index (im4->v4_fibs, fib_table->ft_index); + if (fib->table_id != target_fib_id) continue; @@ -964,7 +965,8 @@ ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp) pool_foreach (fib_table, im6->fibs, ({ vnet_sw_interface_t * si; - fib = &(fib_table->v6); + + fib = pool_elt_at_index (im6->v6_fibs, fib_table->ft_index); if (fib->table_id != target_fib_id) continue; diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index 1927da0b..042d02e2 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -17,6 +17,7 @@ #include <vlib/threads.h> #include <vnet/fib/fib_entry.h> #include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> #include <vnet/dpo/load_balance.h> #define STATS_DEBUG 0 @@ -576,6 +577,7 @@ do_ip4_fibs (stats_main_t * sm) static ip4_route_t *routes; ip4_route_t *r; fib_table_t *fib; + ip4_fib_t *v4_fib; ip_lookup_main_t *lm = &im4->lookup_main; static uword *results; vl_api_vnet_ip4_fib_counters_t *mp = 0; @@ -592,6 +594,8 @@ again: while ((fib - im4->fibs) < start_at_fib_index) continue; + v4_fib = pool_elt_at_index (im4->v4_fibs, fib->ft_index); + if (mp == 0) { items_this_message = IP4_FIB_COUNTER_BATCH_SIZE; @@ -615,9 +619,9 @@ again: vec_reset_length (routes); vec_reset_length (results); - for (i = 0; i < ARRAY_LEN (fib->v4.fib_entry_by_dst_address); i++) + for (i = 0; i < ARRAY_LEN (v4_fib->fib_entry_by_dst_address); i++) { - uword *hash = fib->v4.fib_entry_by_dst_address[i]; + uword *hash = v4_fib->fib_entry_by_dst_address[i]; hash_pair_t *p; ip4_route_t x; |