aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeale Ranns <nranns@cisco.com>2017-03-23 06:46:01 -0700
committerDamjan Marion <dmarion.lists@gmail.com>2017-03-29 13:00:52 +0000
commit04a75e3230ab71248fc29a56b9f64bdaee0c17ac (patch)
tree85977ecfdf1c4002451cbbcf075694ae1ba1173c
parent9a69a6095f67b8979a02f128f44e449889454273 (diff)
Mtrie optimisations
1 - make the default route non-special, i.e. like any other less specific route. Consequently, all buckets have a valid valid index of either a leaf or a ply. Checks for special indeices in the data-path can thus be removed. 2 - since all leaves are now 'real' i.e. they represent a real load-balance object, to tell if a ply slot is 'empty' requeirs chekcing that the prefix length of the leaf occupying the slot is slot than the minium value for that ply. 3 - when removing a leaf find the cover first, then recurse down the ply and replace the old leaf with the cover. This saves us a ply walk. Change-Id: Idd523019e8bb1b6ef527b1f5279a5e24bcf18332 Signed-off-by: Neale Ranns <nranns@cisco.com>
-rw-r--r--src/vnet/adj/adj.c12
-rw-r--r--src/vnet/adj/adj_l2.c3
-rw-r--r--src/vnet/cop/ip4_whitelist.c15
-rw-r--r--src/vnet/dpo/load_balance.c7
-rw-r--r--src/vnet/dpo/lookup_dpo.c16
-rw-r--r--src/vnet/fib/ip4_fib.c3
-rw-r--r--src/vnet/fib/ip4_fib.h6
-rw-r--r--src/vnet/ip/ip4_forward.c65
-rw-r--r--src/vnet/ip/ip4_mtrie.c204
-rw-r--r--src/vnet/ip/ip4_mtrie.h125
-rw-r--r--src/vnet/ip/ip4_source_check.c13
-rw-r--r--src/vnet/ip/ip6_forward.c6
-rw-r--r--src/vnet/mpls/mpls_output.c8
13 files changed, 208 insertions, 275 deletions
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index 9a01e89d..c1d036a0 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -20,13 +20,6 @@
#include <vnet/adj/adj_mcast.h>
#include <vnet/fib/fib_node_list.h>
-/*
- * Special Adj with index zero. we need to define this since the v4 mtrie
- * assumes an index of 0 implies the ply is empty. therefore all 'real'
- * adjs need a non-zero index.
- */
-static ip_adjacency_t *special_v4_miss_adj_with_index_zero;
-
/* Adjacency packet/byte counters indexed by adjacency index. */
vlib_combined_counter_main_t adjacency_counters;
@@ -426,11 +419,6 @@ adj_module_init (vlib_main_t * vm)
adj_midchain_module_init();
adj_mcast_module_init();
- /*
- * one special adj to reserve index 0
- */
- special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4);
-
return (NULL);
}
diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c
index fb64e505..f68e54e0 100644
--- a/src/vnet/adj/adj_l2.c
+++ b/src/vnet/adj/adj_l2.c
@@ -81,9 +81,6 @@ adj_l2_rewrite_inline (vlib_main_t * vm,
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
- /* We should never rewrite a pkt using the MISS adjacency */
- ASSERT(adj_index0);
-
adj0 = adj_get (adj_index0);
/* Guess we are only writing on simple Ethernet header. */
diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c
index d5121e72..ccb9dc03 100644
--- a/src/vnet/cop/ip4_whitelist.c
+++ b/src/vnet/cop/ip4_whitelist.c
@@ -125,10 +125,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
- leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
&ip0->src_address, 1);
@@ -167,10 +164,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
sizeof (c1[0]));
mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
- leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
- &ip1->src_address, 0);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
&ip1->src_address, 1);
@@ -267,10 +261,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
- leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
&ip0->src_address, 1);
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index e9fb5d9d..d5e98e4e 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -829,6 +829,13 @@ load_balance_module_init (void)
{
dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
+ /*
+ * Special LB with index zero. we need to define this since the v4 mtrie
+ * assumes an index of 0 implies the ply is empty. therefore all 'real'
+ * adjs need a non-zero index.
+ */
+ load_balance_create(0, DPO_PROTO_IP4, 0);
+
load_balance_map_module_init();
}
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index 96fedd27..3726c8fe 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -205,19 +205,16 @@ ip4_src_fib_lookup_one (u32 src_fib_index0,
const ip4_address_t * addr0,
u32 * src_adj_index0)
{
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_fib_mtrie_leaf_t leaf0;
ip4_fib_mtrie_t * mtrie0;
mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
- leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
- /* Handle default route. */
- leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
}
@@ -235,10 +232,8 @@ ip4_src_fib_lookup_two (u32 src_fib_index0,
mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie;
- leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1);
@@ -249,9 +244,6 @@ ip4_src_fib_lookup_two (u32 src_fib_index0,
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3);
- /* Handle default route. */
- leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
- leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
}
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
index e8211c80..a7915620 100644
--- a/src/vnet/fib/ip4_fib.c
+++ b/src/vnet/fib/ip4_fib.c
@@ -158,8 +158,9 @@ ip4_fib_table_destroy (ip4_fib_t *fib)
/*
* remove all the specials we added when the table was created.
+ * In reverse order so the default route is last.
*/
- for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+ for (ii = ARRAY_LEN(ip4_specials) - 1; ii >= 0; ii--)
{
fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h
index a8dc68b5..243fd77f 100644
--- a/src/vnet/fib/ip4_fib.h
+++ b/src/vnet/fib/ip4_fib.h
@@ -133,15 +133,11 @@ ip4_fib_forwarding_lookup (u32 fib_index,
mtrie = &ip4_fib_get(fib_index)->mtrie;
- leaf = IP4_FIB_MTRIE_LEAF_ROOT;
- leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 0);
+ leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr);
leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1);
leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2);
leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3);
- /* Handle default route. */
- leaf = (leaf == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie->default_leaf : leaf);
-
return (ip4_fib_mtrie_leaf_get_adj_index(leaf));
}
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index bbba4b70..60e15d41 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -186,12 +186,11 @@ ip4_lookup_inline (vlib_main_t * vm,
mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
- leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
- leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
- leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
+ leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
+ leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
}
tcp0 = (void *) (ip0 + 1);
@@ -241,25 +240,13 @@ ip4_lookup_inline (vlib_main_t * vm,
}
else
{
- /* Handle default route. */
- leaf0 =
- (leaf0 ==
- IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
- leaf1 =
- (leaf1 ==
- IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
- leaf2 =
- (leaf2 ==
- IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
- leaf3 =
- (leaf3 ==
- IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
}
+ ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
lb0 = load_balance_get (lb_index0);
lb1 = load_balance_get (lb_index1);
lb2 = load_balance_get (lb_index2);
@@ -384,9 +371,7 @@ ip4_lookup_inline (vlib_main_t * vm,
{
mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
- leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
}
tcp0 = (void *) (ip0 + 1);
@@ -408,12 +393,10 @@ ip4_lookup_inline (vlib_main_t * vm,
else
{
/* Handle default route. */
- leaf0 =
- (leaf0 ==
- IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
}
+ ASSERT (lbi0);
lb0 = load_balance_get (lbi0);
/* Use flow hash to compute multipath adjacency. */
@@ -1623,12 +1606,8 @@ ip4_local_inline (vlib_main_t * vm,
mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
- leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 =
- ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
- leaf1 =
- ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
/* Treat IP frag packets as "experimental" protocol for now
until support of IP frag reassembly is implemented */
@@ -1722,12 +1701,6 @@ ip4_local_inline (vlib_main_t * vm,
ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
leaf1 =
ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
- leaf0 =
- (leaf0 ==
- IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
- leaf1 =
- (leaf1 ==
- IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
ip4_fib_mtrie_leaf_get_adj_index (leaf0);
@@ -1831,10 +1804,7 @@ ip4_local_inline (vlib_main_t * vm,
mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
- leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 =
- ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
/* Treat IP frag packets as "experimental" protocol for now
until support of IP frag reassembly is implemented */
@@ -1897,9 +1867,6 @@ ip4_local_inline (vlib_main_t * vm,
leaf0 =
ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
- leaf0 =
- (leaf0 ==
- IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
@@ -2453,9 +2420,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
cpu_index, adj_index1);
}
- /* We should never rewrite a pkt using the MISS adjacency */
- ASSERT (adj_index0 && adj_index1);
-
ip0 = vlib_buffer_get_current (p0);
ip1 = vlib_buffer_get_current (p1);
@@ -2643,9 +2607,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
- /* We should never rewrite a pkt using the MISS adjacency */
- ASSERT (adj_index0);
-
adj0 = ip_get_adjacency (lm, adj_index0);
ip0 = vlib_buffer_get_current (p0);
@@ -2967,15 +2928,11 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
- leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
- /* Handle default route. */
- leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
-
lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 6e3d0e80..317d8f10 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -40,14 +40,64 @@
#include <vnet/ip/ip.h>
#include <vnet/fib/fib_entry.h>
+always_inline u32
+ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte)
+{
+ /*
+ * It's 'non-empty' if the length of the leaf stored is greater than the
+ * length of a leaf in the covering ply. i.e. the leaf is more specific
+ * than it's would be cover in the covering ply
+ */
+ if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
+ return (1);
+ return (0);
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 1 + 2 * adj_index;
+ ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
+ return l;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
+{
+ return (n & 1) == 0;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
+ return n >> 1;
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 0 + 2 * i;
+ ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
+ return l;
+}
+
static void
-ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init,
- uword prefix_len)
+ply_init (ip4_fib_mtrie_ply_t * p,
+ ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len)
{
- p->n_non_empty_leafs =
- ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves);
+ /*
+ * A leaf is 'empty' if it represents a leaf from the covering PLY
+ * i.e. if the prefix length of the leaf is less than or equal to
+ * the prefix length of the PLY
+ */
+ p->n_non_empty_leafs = (prefix_len > ply_base_len ?
+ ARRAY_LEN (p->leaves) : 0);
memset (p->dst_address_bits_of_leaves, prefix_len,
sizeof (p->dst_address_bits_of_leaves));
+ p->dst_address_bits_base = ply_base_len;
/* Initialize leaves. */
#ifdef CLIB_HAVE_VEC128
@@ -92,15 +142,16 @@ ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init,
}
static ip4_fib_mtrie_leaf_t
-ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf,
- uword prefix_len)
+ply_create (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_leaf_t init_leaf,
+ u32 leaf_prefix_len, u32 ply_base_len)
{
ip4_fib_mtrie_ply_t *p;
/* Get cache aligned ply. */
pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
- ply_init (p, init_leaf, prefix_len);
+ ply_init (p, init_leaf, leaf_prefix_len, ply_base_len);
return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
}
@@ -128,7 +179,7 @@ ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
}
if (is_root)
- ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0);
+ ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0);
else
pool_put (m->ply_pool, p);
}
@@ -140,38 +191,13 @@ ip4_fib_free (ip4_fib_mtrie_t * m)
ply_free (m, root_ply);
}
-u32
-ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst)
-{
- ip4_fib_mtrie_ply_t *p = pool_elt_at_index (m->ply_pool, 0);
- ip4_fib_mtrie_leaf_t l;
-
- l = p->leaves[dst.as_u8[0]];
- if (ip4_fib_mtrie_leaf_is_terminal (l))
- return ip4_fib_mtrie_leaf_get_adj_index (l);
-
- p = get_next_ply_for_leaf (m, l);
- l = p->leaves[dst.as_u8[1]];
- if (ip4_fib_mtrie_leaf_is_terminal (l))
- return ip4_fib_mtrie_leaf_get_adj_index (l);
-
- p = get_next_ply_for_leaf (m, l);
- l = p->leaves[dst.as_u8[2]];
- if (ip4_fib_mtrie_leaf_is_terminal (l))
- return ip4_fib_mtrie_leaf_get_adj_index (l);
-
- p = get_next_ply_for_leaf (m, l);
- l = p->leaves[dst.as_u8[3]];
-
- ASSERT (ip4_fib_mtrie_leaf_is_terminal (l));
- return ip4_fib_mtrie_leaf_get_adj_index (l);
-}
-
typedef struct
{
ip4_address_t dst_address;
u32 dst_address_length;
u32 adj_index;
+ u32 cover_address_length;
+ u32 cover_adj_index;
} ip4_fib_mtrie_set_unset_leaf_args_t;
static void
@@ -184,7 +210,6 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
uword i;
ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
- ASSERT (!ip4_fib_mtrie_leaf_is_empty (new_leaf));
for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
{
@@ -205,7 +230,7 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
__sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
ASSERT (ply->leaves[i] == new_leaf);
ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
- ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf);
+ ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
}
}
}
@@ -219,7 +244,7 @@ set_leaf (ip4_fib_mtrie_t * m,
i32 n_dst_bits_next_plies;
u8 dst_byte;
- ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+ ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
n_dst_bits_next_plies =
@@ -232,7 +257,7 @@ set_leaf (ip4_fib_mtrie_t * m,
{
uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
- n_dst_bits_this_ply = -n_dst_bits_next_plies;
+ n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
pow2_mask (n_dst_bits_this_ply)) == 0);
@@ -252,13 +277,16 @@ set_leaf (ip4_fib_mtrie_t * m,
if (old_leaf_is_terminal)
{
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
old_ply->dst_address_bits_of_leaves[i] =
a->dst_address_length;
__sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
new_leaf);
ASSERT (old_ply->leaves[i] == new_leaf);
+
old_ply->n_non_empty_leafs +=
- ip4_fib_mtrie_leaf_is_empty (old_leaf);
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
ASSERT (old_ply->n_non_empty_leafs <=
ARRAY_LEN (old_ply->leaves));
}
@@ -283,14 +311,20 @@ set_leaf (ip4_fib_mtrie_t * m,
else
{
ip4_fib_mtrie_ply_t *old_ply, *new_ply;
+ u8 ply_base_len;
+ ply_base_len = 8 * (dst_address_byte_index + 1);
old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
old_leaf = old_ply->leaves[dst_byte];
if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
{
- new_leaf =
- ply_create (m, old_leaf,
- old_ply->dst_address_bits_of_leaves[dst_byte]);
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
+
+ new_leaf = ply_create (m, old_leaf,
+ clib_max (old_ply->dst_address_bits_of_leaves
+ [dst_byte], ply_base_len),
+ ply_base_len);
new_ply = get_next_ply_for_leaf (m, new_leaf);
/* Refetch since ply_create may move pool. */
@@ -299,14 +333,11 @@ set_leaf (ip4_fib_mtrie_t * m,
__sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
new_leaf);
ASSERT (old_ply->leaves[dst_byte] == new_leaf);
- old_ply->dst_address_bits_of_leaves[dst_byte] = 0;
-
- old_ply->n_non_empty_leafs -=
- ip4_fib_mtrie_leaf_is_non_empty (old_leaf);
- ASSERT (old_ply->n_non_empty_leafs >= 0);
+ old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
/* Account for the ply we just created. */
old_ply->n_non_empty_leafs += 1;
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
}
else
new_ply = get_next_ply_for_leaf (m, old_leaf);
@@ -325,7 +356,7 @@ unset_leaf (ip4_fib_mtrie_t * m,
i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
u8 dst_byte;
- ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+ ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
n_dst_bits_next_plies =
@@ -351,12 +382,17 @@ unset_leaf (ip4_fib_mtrie_t * m,
&& unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
dst_address_byte_index + 1)))
{
- old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY;
- old_ply->dst_address_bits_of_leaves[i] = 0;
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
+ old_ply->leaves[i] =
+ ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
+ old_ply->dst_address_bits_of_leaves[i] =
+ clib_max (old_ply->dst_address_bits_base,
+ a->cover_address_length);
- /* No matter what we just deleted a non-empty leaf. */
- ASSERT (!ip4_fib_mtrie_leaf_is_empty (old_leaf));
- old_ply->n_non_empty_leafs -= 1;
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
ASSERT (old_ply->n_non_empty_leafs >= 0);
if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
@@ -365,6 +401,17 @@ unset_leaf (ip4_fib_mtrie_t * m,
/* Old ply was deleted. */
return 1;
}
+#if CLIB_DEBUG > 0
+ else if (dst_address_byte_index)
+ {
+ int ii, count = 0;
+ for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
+ {
+ count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
+ }
+ ASSERT (count);
+ }
+#endif
}
}
@@ -377,9 +424,7 @@ ip4_mtrie_init (ip4_fib_mtrie_t * m)
{
ip4_fib_mtrie_leaf_t root;
memset (m, 0, sizeof (m[0]));
- m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
- root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */
- 0);
+ root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0);
ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
}
@@ -406,25 +451,21 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
if (!is_del)
{
- if (dst_address_length == 0)
- m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index);
- else
- set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
+ set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
}
else
{
- if (dst_address_length == 0)
- m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
+ ip4_main_t *im = &ip4_main;
- else
+ if (dst_address_length)
{
- ip4_main_t *im = &ip4_main;
- uword i;
+ word i;
- unset_leaf (m, &a, root_ply, 0);
-
- /* Find next less specific route and insert into mtrie. */
- for (i = dst_address_length - 1; i >= 1; i--)
+ /* If the ply was not deleted, then we need to fill the
+ * bucket just reset will the leaf from the less specfic
+ * cover.
+ * Find next less specific route and insert into mtrie. */
+ for (i = dst_address_length - 1; i >= 0; i--)
{
uword *p;
index_t lbi;
@@ -441,16 +482,21 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
if (INDEX_INVALID == lbi)
continue;
- a.dst_address = key;
- a.adj_index = lbi;
- a.dst_address_length = i;
+ a.cover_adj_index = lbi;
+ a.cover_address_length = i;
- set_leaf (m, &a, /* ply_index */ 0,
- /* dst_address_byte_index */ 0);
break;
}
}
}
+ else
+ {
+ a.cover_adj_index = 0;
+ a.cover_address_length = 0;
+ }
+
+ /* the top level ply is never removed, so we can ignore the return code */
+ unset_leaf (m, &a, root_ply, 0);
}
}
@@ -483,10 +529,8 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
{
ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
- if (ip4_fib_mtrie_leaf_is_empty (l))
- s = format (s, "miss");
- else if (ip4_fib_mtrie_leaf_is_terminal (l))
- s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l));
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
else
s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
return s;
@@ -511,7 +555,7 @@ format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
{
ip4_fib_mtrie_leaf_t l = p->leaves[i];
- if (!ip4_fib_mtrie_leaf_is_empty (l))
+ if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
{
u32 a, ia_length;
ip4_address_t ia;
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index c0afc2cf..128195d3 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -52,67 +52,15 @@
typedef u32 ip4_fib_mtrie_leaf_t;
#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
-#define IP4_FIB_MTRIE_LEAF_ROOT (0 + 2*0)
-always_inline u32
-ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n)
-{
- return n == IP4_FIB_MTRIE_LEAF_EMPTY;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_leaf_t n)
-{
- return n != IP4_FIB_MTRIE_LEAF_EMPTY;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
-{
- return n & 1;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
-{
- ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
- return n >> 1;
-}
-
-always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
-{
- ip4_fib_mtrie_leaf_t l;
- l = 1 + 2 * adj_index;
- ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
- return l;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
-{
- return (n & 1) == 0;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
-{
- ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
- return n >> 1;
-}
-
-always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
-{
- ip4_fib_mtrie_leaf_t l;
- l = 0 + 2 * i;
- ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
- return l;
-}
-
-/* One ply of the 4 ply mtrie fib. */
+/**
+ * @brief One ply of the 4 ply mtrie fib.
+ */
typedef struct
{
+ /**
+ * The leaves/slots/buckets to be filed with leafs
+ */
union
{
ip4_fib_mtrie_leaf_t leaves[256];
@@ -122,14 +70,25 @@ typedef struct
#endif
};
- /* Prefix length for terminal leaves. */
+ /**
+ * Prefix length for leaves/ply.
+ */
u8 dst_address_bits_of_leaves[256];
- /* Number of non-empty leafs (whether terminal or not). */
+ /**
+ * Number of non-empty leafs (whether terminal or not).
+ */
i32 n_non_empty_leafs;
+ /**
+ * The length of the ply's coviering prefix. Also a measure of its depth
+ * If a leaf in a slot has a mask length longer than this then it is
+ * 'non-empty'. Otherwise it is the value of the cover.
+ */
+ i32 dst_address_bits_base;
+
/* Pad to cache line boundary. */
- u8 pad[CLIB_CACHE_LINE_BYTES - 1 * sizeof (i32)];
+ u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
}
ip4_fib_mtrie_ply_t;
@@ -140,9 +99,6 @@ typedef struct
{
/* Pool of plies. Index zero is root ply. */
ip4_fib_mtrie_ply_t *ply_pool;
-
- /* Special case leaf for default route 0.0.0.0/0. */
- ip4_fib_mtrie_leaf_t default_leaf;
} ip4_fib_mtrie_t;
void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m);
@@ -154,25 +110,50 @@ void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f,
u32 dst_address_length,
u32 adj_index, u32 is_del);
-/* Returns adjacency index. */
-u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst);
-
format_function_t format_ip4_fib_mtrie;
+always_inline u32
+ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
+{
+ return n & 1;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
+ return n >> 1;
+}
+
/* Lookup step. Processes 1 byte of 4 byte ip4 address. */
always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m,
+ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
ip4_fib_mtrie_leaf_t current_leaf,
const ip4_address_t * dst_address,
u32 dst_address_byte_index)
{
- ip4_fib_mtrie_leaf_t next_leaf;
ip4_fib_mtrie_ply_t *ply;
uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
- ply = m->ply_pool + (current_is_terminal ? 0 : (current_leaf >> 1));
- next_leaf = ply->leaves[dst_address->as_u8[dst_address_byte_index]];
- next_leaf = current_is_terminal ? current_leaf : next_leaf;
+ if (!current_is_terminal)
+ {
+ ply = m->ply_pool + (current_leaf >> 1);
+ return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
+ }
+
+ return current_leaf;
+}
+
+/* Lookup step. Processes 1 byte of 4 byte ip4 address. */
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address)
+{
+ ip4_fib_mtrie_leaf_t next_leaf;
+ ip4_fib_mtrie_ply_t *ply;
+
+ ply = m->ply_pool;
+ next_leaf = ply->leaves[dst_address->as_u8[0]];
return next_leaf;
}
diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c
index 3af32f2e..7c2b7be8 100644
--- a/src/vnet/ip/ip4_source_check.c
+++ b/src/vnet/ip/ip4_source_check.c
@@ -162,12 +162,8 @@ ip4_source_check_inline (vlib_main_t * vm,
mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
- leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 =
- ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
- leaf1 =
- ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
leaf0 =
ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
@@ -250,10 +246,7 @@ ip4_source_check_inline (vlib_main_t * vm,
mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
- leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
- leaf0 =
- ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
leaf0 =
ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index ecc3bd2c..c120f12c 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1943,9 +1943,6 @@ ip6_rewrite_inline (vlib_main_t * vm,
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
- /* We should never rewrite a pkt using the MISS adjacency */
- ASSERT (adj_index0 && adj_index1);
-
ip0 = vlib_buffer_get_current (p0);
ip1 = vlib_buffer_get_current (p1);
@@ -2111,9 +2108,6 @@ ip6_rewrite_inline (vlib_main_t * vm,
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
- /* We should never rewrite a pkt using the MISS adjacency */
- ASSERT (adj_index0);
-
adj0 = ip_get_adjacency (lm, adj_index0);
ip0 = vlib_buffer_get_current (p0);
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 2d8bd0c9..08018fd1 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -121,10 +121,6 @@ mpls_output_inline (vlib_main_t * vm,
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
- /* We should never rewrite a pkt using the MISS adjacency */
- ASSERT(adj_index0);
- ASSERT(adj_index1);
-
adj0 = adj_get(adj_index0);
adj1 = adj_get(adj_index1);
hdr0 = vlib_buffer_get_current (p0);
@@ -237,9 +233,6 @@ mpls_output_inline (vlib_main_t * vm,
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
- /* We should never rewrite a pkt using the MISS adjacency */
- ASSERT(adj_index0);
-
adj0 = adj_get(adj_index0);
hdr0 = vlib_buffer_get_current (p0);
@@ -431,7 +424,6 @@ mpls_adj_incomplete (vlib_main_t * vm,
n_left_to_next -= 1;
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
- ASSERT(adj_index0);
adj0 = adj_get(adj_index0);