From 04a75e3230ab71248fc29a56b9f64bdaee0c17ac Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 23 Mar 2017 06:46:01 -0700 Subject: Mtrie optimisations 1 - make the default route non-special, i.e. like any other less specific route. Consequently, all buckets have a valid valid index of either a leaf or a ply. Checks for special indeices in the data-path can thus be removed. 2 - since all leaves are now 'real' i.e. they represent a real load-balance object, to tell if a ply slot is 'empty' requeirs chekcing that the prefix length of the leaf occupying the slot is slot than the minium value for that ply. 3 - when removing a leaf find the cover first, then recurse down the ply and replace the old leaf with the cover. This saves us a ply walk. Change-Id: Idd523019e8bb1b6ef527b1f5279a5e24bcf18332 Signed-off-by: Neale Ranns --- src/vnet/adj/adj.c | 12 --- src/vnet/adj/adj_l2.c | 3 - src/vnet/cop/ip4_whitelist.c | 15 +-- src/vnet/dpo/load_balance.c | 7 ++ src/vnet/dpo/lookup_dpo.c | 16 +--- src/vnet/fib/ip4_fib.c | 3 +- src/vnet/fib/ip4_fib.h | 6 +- src/vnet/ip/ip4_forward.c | 65 +++---------- src/vnet/ip/ip4_mtrie.c | 204 +++++++++++++++++++++++++---------------- src/vnet/ip/ip4_mtrie.h | 125 +++++++++++-------------- src/vnet/ip/ip4_source_check.c | 13 +-- src/vnet/ip/ip6_forward.c | 6 -- src/vnet/mpls/mpls_output.c | 8 -- 13 files changed, 208 insertions(+), 275 deletions(-) diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index 9a01e89d..c1d036a0 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -20,13 +20,6 @@ #include #include -/* - * Special Adj with index zero. we need to define this since the v4 mtrie - * assumes an index of 0 implies the ply is empty. therefore all 'real' - * adjs need a non-zero index. - */ -static ip_adjacency_t *special_v4_miss_adj_with_index_zero; - /* Adjacency packet/byte counters indexed by adjacency index. */ vlib_combined_counter_main_t adjacency_counters; @@ -426,11 +419,6 @@ adj_module_init (vlib_main_t * vm) adj_midchain_module_init(); adj_mcast_module_init(); - /* - * one special adj to reserve index 0 - */ - special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4); - return (NULL); } diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c index fb64e505..f68e54e0 100644 --- a/src/vnet/adj/adj_l2.c +++ b/src/vnet/adj/adj_l2.c @@ -81,9 +81,6 @@ adj_l2_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT(adj_index0); - adj0 = adj_get (adj_index0); /* Guess we are only writing on simple Ethernet header. */ diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c index d5121e72..ccb9dc03 100644 --- a/src/vnet/cop/ip4_whitelist.c +++ b/src/vnet/cop/ip4_whitelist.c @@ -125,10 +125,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); @@ -167,10 +164,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, sizeof (c1[0])); mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie; - leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 0); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); @@ -267,10 +261,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index e9fb5d9d..d5e98e4e 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -829,6 +829,13 @@ load_balance_module_init (void) { dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes); + /* + * Special LB with index zero. we need to define this since the v4 mtrie + * assumes an index of 0 implies the ply is empty. therefore all 'real' + * adjs need a non-zero index. + */ + load_balance_create(0, DPO_PROTO_IP4, 0); + load_balance_map_module_init(); } diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 96fedd27..3726c8fe 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -205,19 +205,16 @@ ip4_src_fib_lookup_one (u32 src_fib_index0, const ip4_address_t * addr0, u32 * src_adj_index0) { - ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_fib_mtrie_leaf_t leaf0; ip4_fib_mtrie_t * mtrie0; mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); } @@ -235,10 +232,8 @@ ip4_src_fib_lookup_two (u32 src_fib_index0, mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie; - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); @@ -249,9 +244,6 @@ ip4_src_fib_lookup_two (u32 src_fib_index0, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3); - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1); } diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index e8211c80..a7915620 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -158,8 +158,9 @@ ip4_fib_table_destroy (ip4_fib_t *fib) /* * remove all the specials we added when the table was created. + * In reverse order so the default route is last. */ - for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++) + for (ii = ARRAY_LEN(ip4_specials) - 1; ii >= 0; ii--) { fib_prefix_t prefix = ip4_specials[ii].ift_prefix; diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index a8dc68b5..243fd77f 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -133,15 +133,11 @@ ip4_fib_forwarding_lookup (u32 fib_index, mtrie = &ip4_fib_get(fib_index)->mtrie; - leaf = IP4_FIB_MTRIE_LEAF_ROOT; - leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 0); + leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3); - /* Handle default route. */ - leaf = (leaf == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie->default_leaf : leaf); - return (ip4_fib_mtrie_leaf_get_adj_index(leaf)); } diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index bbba4b70..60e15d41 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -186,12 +186,11 @@ ip4_lookup_inline (vlib_main_t * vm, mtrie2 = &ip4_fib_get (fib_index2)->mtrie; mtrie3 = &ip4_fib_get (fib_index3)->mtrie; - leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1); + leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2); + leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3); } tcp0 = (void *) (ip0 + 1); @@ -241,25 +240,13 @@ ip4_lookup_inline (vlib_main_t * vm, } else { - /* Handle default route. */ - leaf0 = - (leaf0 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - leaf1 = - (leaf1 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); - leaf2 = - (leaf2 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2); - leaf3 = - (leaf3 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3); lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2); lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3); } + ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3); lb0 = load_balance_get (lb_index0); lb1 = load_balance_get (lb_index1); lb2 = load_balance_get (lb_index2); @@ -384,9 +371,7 @@ ip4_lookup_inline (vlib_main_t * vm, { mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); } tcp0 = (void *) (ip0 + 1); @@ -408,12 +393,10 @@ ip4_lookup_inline (vlib_main_t * vm, else { /* Handle default route. */ - leaf0 = - (leaf0 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); } + ASSERT (lbi0); lb0 = load_balance_get (lbi0); /* Use flow hash to compute multipath adjacency. */ @@ -1623,12 +1606,8 @@ ip4_local_inline (vlib_main_t * vm, mtrie0 = &ip4_fib_get (fib_index0)->mtrie; mtrie1 = &ip4_fib_get (fib_index1)->mtrie; - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); /* Treat IP frag packets as "experimental" protocol for now until support of IP frag reassembly is implemented */ @@ -1722,12 +1701,6 @@ ip4_local_inline (vlib_main_t * vm, ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); - leaf0 = - (leaf0 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - leaf1 = - (leaf1 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); @@ -1831,10 +1804,7 @@ ip4_local_inline (vlib_main_t * vm, mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); /* Treat IP frag packets as "experimental" protocol for now until support of IP frag reassembly is implemented */ @@ -1897,9 +1867,6 @@ ip4_local_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); - leaf0 = - (leaf0 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; @@ -2453,9 +2420,6 @@ ip4_rewrite_inline (vlib_main_t * vm, cpu_index, adj_index1); } - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT (adj_index0 && adj_index1); - ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); @@ -2643,9 +2607,6 @@ ip4_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT (adj_index0); - adj0 = ip_get_adjacency (lm, adj_index0); ip0 = vlib_buffer_get_current (p0); @@ -2967,15 +2928,11 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3); - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a); diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index 6e3d0e80..317d8f10 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -40,14 +40,64 @@ #include #include +always_inline u32 +ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte) +{ + /* + * It's 'non-empty' if the length of the leaf stored is greater than the + * length of a leaf in the covering ply. i.e. the leaf is more specific + * than it's would be cover in the covering ply + */ + if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base) + return (1); + return (0); +} + +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index) +{ + ip4_fib_mtrie_leaf_t l; + l = 1 + 2 * adj_index; + ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index); + return l; +} + +always_inline u32 +ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n) +{ + return (n & 1) == 0; +} + +always_inline u32 +ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n) +{ + ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n)); + return n >> 1; +} + +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_leaf_set_next_ply_index (u32 i) +{ + ip4_fib_mtrie_leaf_t l; + l = 0 + 2 * i; + ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i); + return l; +} + static void -ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, - uword prefix_len) +ply_init (ip4_fib_mtrie_ply_t * p, + ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len) { - p->n_non_empty_leafs = - ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves); + /* + * A leaf is 'empty' if it represents a leaf from the covering PLY + * i.e. if the prefix length of the leaf is less than or equal to + * the prefix length of the PLY + */ + p->n_non_empty_leafs = (prefix_len > ply_base_len ? + ARRAY_LEN (p->leaves) : 0); memset (p->dst_address_bits_of_leaves, prefix_len, sizeof (p->dst_address_bits_of_leaves)); + p->dst_address_bits_base = ply_base_len; /* Initialize leaves. */ #ifdef CLIB_HAVE_VEC128 @@ -92,15 +142,16 @@ ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, } static ip4_fib_mtrie_leaf_t -ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf, - uword prefix_len) +ply_create (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_leaf_t init_leaf, + u32 leaf_prefix_len, u32 ply_base_len) { ip4_fib_mtrie_ply_t *p; /* Get cache aligned ply. */ pool_get_aligned (m->ply_pool, p, sizeof (p[0])); - ply_init (p, init_leaf, prefix_len); + ply_init (p, init_leaf, leaf_prefix_len, ply_base_len); return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool); } @@ -128,7 +179,7 @@ ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) } if (is_root) - ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0); + ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0); else pool_put (m->ply_pool, p); } @@ -140,38 +191,13 @@ ip4_fib_free (ip4_fib_mtrie_t * m) ply_free (m, root_ply); } -u32 -ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst) -{ - ip4_fib_mtrie_ply_t *p = pool_elt_at_index (m->ply_pool, 0); - ip4_fib_mtrie_leaf_t l; - - l = p->leaves[dst.as_u8[0]]; - if (ip4_fib_mtrie_leaf_is_terminal (l)) - return ip4_fib_mtrie_leaf_get_adj_index (l); - - p = get_next_ply_for_leaf (m, l); - l = p->leaves[dst.as_u8[1]]; - if (ip4_fib_mtrie_leaf_is_terminal (l)) - return ip4_fib_mtrie_leaf_get_adj_index (l); - - p = get_next_ply_for_leaf (m, l); - l = p->leaves[dst.as_u8[2]]; - if (ip4_fib_mtrie_leaf_is_terminal (l)) - return ip4_fib_mtrie_leaf_get_adj_index (l); - - p = get_next_ply_for_leaf (m, l); - l = p->leaves[dst.as_u8[3]]; - - ASSERT (ip4_fib_mtrie_leaf_is_terminal (l)); - return ip4_fib_mtrie_leaf_get_adj_index (l); -} - typedef struct { ip4_address_t dst_address; u32 dst_address_length; u32 adj_index; + u32 cover_address_length; + u32 cover_adj_index; } ip4_fib_mtrie_set_unset_leaf_args_t; static void @@ -184,7 +210,6 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, uword i; ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf)); - ASSERT (!ip4_fib_mtrie_leaf_is_empty (new_leaf)); for (i = 0; i < ARRAY_LEN (ply->leaves); i++) { @@ -205,7 +230,7 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf); ASSERT (ply->leaves[i] == new_leaf); ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits; - ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf); + ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i); } } } @@ -219,7 +244,7 @@ set_leaf (ip4_fib_mtrie_t * m, i32 n_dst_bits_next_plies; u8 dst_byte; - ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32); + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); n_dst_bits_next_plies = @@ -232,7 +257,7 @@ set_leaf (ip4_fib_mtrie_t * m, { uword i, n_dst_bits_this_ply, old_leaf_is_terminal; - n_dst_bits_this_ply = -n_dst_bits_next_plies; + n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies); ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & pow2_mask (n_dst_bits_this_ply)) == 0); @@ -252,13 +277,16 @@ set_leaf (ip4_fib_mtrie_t * m, if (old_leaf_is_terminal) { + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); old_ply->dst_address_bits_of_leaves[i] = a->dst_address_length; __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf, new_leaf); ASSERT (old_ply->leaves[i] == new_leaf); + old_ply->n_non_empty_leafs += - ip4_fib_mtrie_leaf_is_empty (old_leaf); + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); ASSERT (old_ply->n_non_empty_leafs <= ARRAY_LEN (old_ply->leaves)); } @@ -283,14 +311,20 @@ set_leaf (ip4_fib_mtrie_t * m, else { ip4_fib_mtrie_ply_t *old_ply, *new_ply; + u8 ply_base_len; + ply_base_len = 8 * (dst_address_byte_index + 1); old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); old_leaf = old_ply->leaves[dst_byte]; if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) { - new_leaf = - ply_create (m, old_leaf, - old_ply->dst_address_bits_of_leaves[dst_byte]); + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); + + new_leaf = ply_create (m, old_leaf, + clib_max (old_ply->dst_address_bits_of_leaves + [dst_byte], ply_base_len), + ply_base_len); new_ply = get_next_ply_for_leaf (m, new_leaf); /* Refetch since ply_create may move pool. */ @@ -299,14 +333,11 @@ set_leaf (ip4_fib_mtrie_t * m, __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, new_leaf); ASSERT (old_ply->leaves[dst_byte] == new_leaf); - old_ply->dst_address_bits_of_leaves[dst_byte] = 0; - - old_ply->n_non_empty_leafs -= - ip4_fib_mtrie_leaf_is_non_empty (old_leaf); - ASSERT (old_ply->n_non_empty_leafs >= 0); + old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; /* Account for the ply we just created. */ old_ply->n_non_empty_leafs += 1; + ASSERT (old_ply->n_non_empty_leafs >= 0); } else new_ply = get_next_ply_for_leaf (m, old_leaf); @@ -325,7 +356,7 @@ unset_leaf (ip4_fib_mtrie_t * m, i32 i, n_dst_bits_this_ply, old_leaf_is_terminal; u8 dst_byte; - ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32); + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); n_dst_bits_next_plies = @@ -351,12 +382,17 @@ unset_leaf (ip4_fib_mtrie_t * m, && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), dst_address_byte_index + 1))) { - old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY; - old_ply->dst_address_bits_of_leaves[i] = 0; + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); + + old_ply->leaves[i] = + ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index); + old_ply->dst_address_bits_of_leaves[i] = + clib_max (old_ply->dst_address_bits_base, + a->cover_address_length); - /* No matter what we just deleted a non-empty leaf. */ - ASSERT (!ip4_fib_mtrie_leaf_is_empty (old_leaf)); - old_ply->n_non_empty_leafs -= 1; + old_ply->n_non_empty_leafs += + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); ASSERT (old_ply->n_non_empty_leafs >= 0); if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0) @@ -365,6 +401,17 @@ unset_leaf (ip4_fib_mtrie_t * m, /* Old ply was deleted. */ return 1; } +#if CLIB_DEBUG > 0 + else if (dst_address_byte_index) + { + int ii, count = 0; + for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++) + { + count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii); + } + ASSERT (count); + } +#endif } } @@ -377,9 +424,7 @@ ip4_mtrie_init (ip4_fib_mtrie_t * m) { ip4_fib_mtrie_leaf_t root; memset (m, 0, sizeof (m[0])); - m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY; - root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */ - 0); + root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0); ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0); } @@ -406,25 +451,21 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, if (!is_del) { - if (dst_address_length == 0) - m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index); - else - set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); + set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); } else { - if (dst_address_length == 0) - m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY; + ip4_main_t *im = &ip4_main; - else + if (dst_address_length) { - ip4_main_t *im = &ip4_main; - uword i; + word i; - unset_leaf (m, &a, root_ply, 0); - - /* Find next less specific route and insert into mtrie. */ - for (i = dst_address_length - 1; i >= 1; i--) + /* If the ply was not deleted, then we need to fill the + * bucket just reset will the leaf from the less specfic + * cover. + * Find next less specific route and insert into mtrie. */ + for (i = dst_address_length - 1; i >= 0; i--) { uword *p; index_t lbi; @@ -441,16 +482,21 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, if (INDEX_INVALID == lbi) continue; - a.dst_address = key; - a.adj_index = lbi; - a.dst_address_length = i; + a.cover_adj_index = lbi; + a.cover_address_length = i; - set_leaf (m, &a, /* ply_index */ 0, - /* dst_address_byte_index */ 0); break; } } } + else + { + a.cover_adj_index = 0; + a.cover_address_length = 0; + } + + /* the top level ply is never removed, so we can ignore the return code */ + unset_leaf (m, &a, root_ply, 0); } } @@ -483,10 +529,8 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va) { ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t); - if (ip4_fib_mtrie_leaf_is_empty (l)) - s = format (s, "miss"); - else if (ip4_fib_mtrie_leaf_is_terminal (l)) - s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l)); + if (ip4_fib_mtrie_leaf_is_terminal (l)) + s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l)); else s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l)); return s; @@ -511,7 +555,7 @@ format_ip4_fib_mtrie_ply (u8 * s, va_list * va) { ip4_fib_mtrie_leaf_t l = p->leaves[i]; - if (!ip4_fib_mtrie_leaf_is_empty (l)) + if (ip4_fib_mtrie_leaf_is_non_empty (p, i)) { u32 a, ia_length; ip4_address_t ia; diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h index c0afc2cf..128195d3 100644 --- a/src/vnet/ip/ip4_mtrie.h +++ b/src/vnet/ip/ip4_mtrie.h @@ -52,67 +52,15 @@ typedef u32 ip4_fib_mtrie_leaf_t; #define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0) -#define IP4_FIB_MTRIE_LEAF_ROOT (0 + 2*0) -always_inline u32 -ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n) -{ - return n == IP4_FIB_MTRIE_LEAF_EMPTY; -} - -always_inline u32 -ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_leaf_t n) -{ - return n != IP4_FIB_MTRIE_LEAF_EMPTY; -} - -always_inline u32 -ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n) -{ - return n & 1; -} - -always_inline u32 -ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) -{ - ASSERT (ip4_fib_mtrie_leaf_is_terminal (n)); - return n >> 1; -} - -always_inline ip4_fib_mtrie_leaf_t -ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index) -{ - ip4_fib_mtrie_leaf_t l; - l = 1 + 2 * adj_index; - ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index); - return l; -} - -always_inline u32 -ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n) -{ - return (n & 1) == 0; -} - -always_inline u32 -ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n) -{ - ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n)); - return n >> 1; -} - -always_inline ip4_fib_mtrie_leaf_t -ip4_fib_mtrie_leaf_set_next_ply_index (u32 i) -{ - ip4_fib_mtrie_leaf_t l; - l = 0 + 2 * i; - ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i); - return l; -} - -/* One ply of the 4 ply mtrie fib. */ +/** + * @brief One ply of the 4 ply mtrie fib. + */ typedef struct { + /** + * The leaves/slots/buckets to be filed with leafs + */ union { ip4_fib_mtrie_leaf_t leaves[256]; @@ -122,14 +70,25 @@ typedef struct #endif }; - /* Prefix length for terminal leaves. */ + /** + * Prefix length for leaves/ply. + */ u8 dst_address_bits_of_leaves[256]; - /* Number of non-empty leafs (whether terminal or not). */ + /** + * Number of non-empty leafs (whether terminal or not). + */ i32 n_non_empty_leafs; + /** + * The length of the ply's coviering prefix. Also a measure of its depth + * If a leaf in a slot has a mask length longer than this then it is + * 'non-empty'. Otherwise it is the value of the cover. + */ + i32 dst_address_bits_base; + /* Pad to cache line boundary. */ - u8 pad[CLIB_CACHE_LINE_BYTES - 1 * sizeof (i32)]; + u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)]; } ip4_fib_mtrie_ply_t; @@ -140,9 +99,6 @@ typedef struct { /* Pool of plies. Index zero is root ply. */ ip4_fib_mtrie_ply_t *ply_pool; - - /* Special case leaf for default route 0.0.0.0/0. */ - ip4_fib_mtrie_leaf_t default_leaf; } ip4_fib_mtrie_t; void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m); @@ -154,25 +110,50 @@ void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f, u32 dst_address_length, u32 adj_index, u32 is_del); -/* Returns adjacency index. */ -u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst); - format_function_t format_ip4_fib_mtrie; +always_inline u32 +ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n) +{ + return n & 1; +} + +always_inline u32 +ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) +{ + ASSERT (ip4_fib_mtrie_leaf_is_terminal (n)); + return n >> 1; +} + /* Lookup step. Processes 1 byte of 4 byte ip4 address. */ always_inline ip4_fib_mtrie_leaf_t -ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m, +ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t current_leaf, const ip4_address_t * dst_address, u32 dst_address_byte_index) { - ip4_fib_mtrie_leaf_t next_leaf; ip4_fib_mtrie_ply_t *ply; uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf); - ply = m->ply_pool + (current_is_terminal ? 0 : (current_leaf >> 1)); - next_leaf = ply->leaves[dst_address->as_u8[dst_address_byte_index]]; - next_leaf = current_is_terminal ? current_leaf : next_leaf; + if (!current_is_terminal) + { + ply = m->ply_pool + (current_leaf >> 1); + return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]); + } + + return current_leaf; +} + +/* Lookup step. Processes 1 byte of 4 byte ip4 address. */ +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address) +{ + ip4_fib_mtrie_leaf_t next_leaf; + ip4_fib_mtrie_ply_t *ply; + + ply = m->ply_pool; + next_leaf = ply->leaves[dst_address->as_u8[0]]; return next_leaf; } diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c index 3af32f2e..7c2b7be8 100644 --- a/src/vnet/ip/ip4_source_check.c +++ b/src/vnet/ip/ip4_source_check.c @@ -162,12 +162,8 @@ ip4_source_check_inline (vlib_main_t * vm, mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie; - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); @@ -250,10 +246,7 @@ ip4_source_check_inline (vlib_main_t * vm, mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index ecc3bd2c..c120f12c 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1943,9 +1943,6 @@ ip6_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT (adj_index0 && adj_index1); - ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); @@ -2111,9 +2108,6 @@ ip6_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT (adj_index0); - adj0 = ip_get_adjacency (lm, adj_index0); ip0 = vlib_buffer_get_current (p0); diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index 2d8bd0c9..08018fd1 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -121,10 +121,6 @@ mpls_output_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT(adj_index0); - ASSERT(adj_index1); - adj0 = adj_get(adj_index0); adj1 = adj_get(adj_index1); hdr0 = vlib_buffer_get_current (p0); @@ -237,9 +233,6 @@ mpls_output_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT(adj_index0); - adj0 = adj_get(adj_index0); hdr0 = vlib_buffer_get_current (p0); @@ -431,7 +424,6 @@ mpls_adj_incomplete (vlib_main_t * vm, n_left_to_next -= 1; adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - ASSERT(adj_index0); adj0 = adj_get(adj_index0); -- cgit 1.2.3-korg