aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/ip
diff options
context:
space:
mode:
authorNeale Ranns <nranns@cisco.com>2017-03-28 03:49:52 -0700
committerDamjan Marion <dmarion.lists@gmail.com>2017-04-01 16:48:31 +0000
commita3af337e06a79f7d1dacf42a319f241c907122fc (patch)
tree7f236558a16cf37298d57556ed8fa905a19b934b /src/vnet/ip
parent8db1de83ec540e01bb0577b726770bbb2338edcb (diff)
MTRIE Optimisations 2
1) 16-8-8 stride. Reduce trie depth walk traded with increased memory in the top PLY. 2) separate the vector of protocol-independent (PI) fib_table_t with the vector of protocol dependent (PD) FIBs. PD FIBs are large structures, we don't want to burn the memory for ech PD type 3) Go straight to the PD FIB in the data-path thus avoiding an indirection through, e.g., a PLY pool. Change-Id: I800d1ed0b2049040d5da95213f3ed6b12bdd78b7 Signed-off-by: Neale Ranns <nranns@cisco.com>
Diffstat (limited to 'src/vnet/ip')
-rw-r--r--src/vnet/ip/ip4.h29
-rw-r--r--src/vnet/ip/ip4_forward.c21
-rw-r--r--src/vnet/ip/ip4_mtrie.c611
-rw-r--r--src/vnet/ip/ip4_mtrie.h106
-rw-r--r--src/vnet/ip/ip4_packet.h1
-rw-r--r--src/vnet/ip/ip4_source_check.c8
-rw-r--r--src/vnet/ip/ip6.h3
-rw-r--r--src/vnet/ip/ip_api.c43
8 files changed, 526 insertions, 296 deletions
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index 4e075d0f..71640def 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -40,34 +40,10 @@
#ifndef included_ip_ip4_h
#define included_ip_ip4_h
-#include <vnet/ip/ip4_mtrie.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/lookup.h>
#include <vnet/feature/feature.h>
-typedef struct ip4_fib_t
-{
- /* Hash table for each prefix length mapping. */
- uword *fib_entry_by_dst_address[33];
-
- /* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */
- ip4_fib_mtrie_t mtrie;
-
- /* Table ID (hash key) for this FIB. */
- u32 table_id;
-
- /* Index into FIB vector. */
- u32 index;
-
- /* flow hash configuration */
- flow_hash_config_t flow_hash_config;
-
- /* N-tuple classifier indices */
- u32 fwd_classify_table_index;
- u32 rev_classify_table_index;
-
-} ip4_fib_t;
-
typedef struct ip4_mfib_t
{
/* Hash table for each prefix length mapping. */
@@ -111,6 +87,9 @@ typedef struct ip4_main_t
/** Vector of FIBs. */
struct fib_table_t_ *fibs;
+ /** Vector of MTries. */
+ struct ip4_fib_t_ *v4_fibs;
+
/** Vector of MFIBs. */
struct mfib_table_t_ *mfibs;
@@ -284,8 +263,6 @@ serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main;
int vnet_set_ip4_flow_hash (u32 table_id,
flow_hash_config_t flow_hash_config);
-void ip4_mtrie_init (ip4_fib_mtrie_t * m);
-
int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
u32 table_index);
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index ef6dded5..ee1703e7 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -182,7 +182,6 @@ ip4_lookup_inline (vlib_main_t * vm,
mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
-
leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
@@ -196,14 +195,6 @@ ip4_lookup_inline (vlib_main_t * vm,
if (!lookup_for_responses_to_locally_received_packets)
{
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
- leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
- leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
- }
-
- if (!lookup_for_responses_to_locally_received_packets)
- {
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
@@ -364,9 +355,6 @@ ip4_lookup_inline (vlib_main_t * vm,
tcp0 = (void *) (ip0 + 1);
if (!lookup_for_responses_to_locally_received_packets)
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
-
- if (!lookup_for_responses_to_locally_received_packets)
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
if (!lookup_for_responses_to_locally_received_packets)
@@ -1622,11 +1610,6 @@ ip4_local_inline (vlib_main_t * vm,
good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
- leaf0 =
- ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
- leaf1 =
- ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
-
/* Verify UDP length. */
ip_len0 = clib_net_to_host_u16 (ip0->length);
ip_len1 = clib_net_to_host_u16 (ip1->length);
@@ -1812,9 +1795,6 @@ ip4_local_inline (vlib_main_t * vm,
/* Don't verify UDP checksum for packets with explicit zero checksum. */
good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
- leaf0 =
- ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-
/* Verify UDP length. */
ip_len0 = clib_net_to_host_u16 (ip0->length);
udp_len0 = clib_net_to_host_u16 (udp0->length);
@@ -2913,7 +2893,6 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 317d8f10..adc95125 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -38,10 +38,17 @@
*/
#include <vnet/ip/ip.h>
-#include <vnet/fib/fib_entry.h>
+#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/fib/ip4_fib.h>
+
+
+/**
+ * Global pool of IPv4 8bit PLYs
+ */
+ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
always_inline u32
-ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte)
+ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
{
/*
* It's 'non-empty' if the length of the leaf stored is greater than the
@@ -84,61 +91,83 @@ ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
return l;
}
-static void
-ply_init (ip4_fib_mtrie_ply_t * p,
- ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len)
-{
- /*
- * A leaf is 'empty' if it represents a leaf from the covering PLY
- * i.e. if the prefix length of the leaf is less than or equal to
- * the prefix length of the PLY
- */
- p->n_non_empty_leafs = (prefix_len > ply_base_len ?
- ARRAY_LEN (p->leaves) : 0);
- memset (p->dst_address_bits_of_leaves, prefix_len,
- sizeof (p->dst_address_bits_of_leaves));
- p->dst_address_bits_base = ply_base_len;
-
- /* Initialize leaves. */
-#ifdef CLIB_HAVE_VEC128
- {
- u32x4 *l, init_x4;
-
#ifndef __ALTIVEC__
- init_x4 = u32x4_splat (init);
+#define PLY_X4_SPLAT_INIT(init_x4, init) \
+ init_x4 = u32x4_splat (init);
#else
- {
- u32x4_union_t y;
- y.as_u32[0] = init;
- y.as_u32[1] = init;
- y.as_u32[2] = init;
- y.as_u32[3] = init;
- init_x4 = y.as_u32x4;
- }
+#define PLY_X4_SPLAT_INIT(init_x4, init) \
+{ \
+ u32x4_union_t y; \
+ y.as_u32[0] = init; \
+ y.as_u32[1] = init; \
+ y.as_u32[2] = init; \
+ y.as_u32[3] = init; \
+ init_x4 = y.as_u32x4; \
+}
#endif
- for (l = p->leaves_as_u32x4;
- l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4)
- {
- l[0] = init_x4;
- l[1] = init_x4;
- l[2] = init_x4;
- l[3] = init_x4;
- }
- }
+#ifdef CLIB_HAVE_VEC128
+#define PLY_INIT_LEAVES(p) \
+{ \
+ u32x4 *l, init_x4; \
+ \
+ PLY_X4_SPLAT_INIT(init_x4, init); \
+ for (l = p->leaves_as_u32x4; \
+ l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \
+ l += 4) \
+ { \
+ l[0] = init_x4; \
+ l[1] = init_x4; \
+ l[2] = init_x4; \
+ l[3] = init_x4; \
+ } \
+}
#else
- {
- u32 *l;
-
- for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)
- {
- l[0] = init;
- l[1] = init;
- l[2] = init;
- l[3] = init;
- }
- }
+#define PLY_INIT_LEAVES(p) \
+{ \
+ u32 *l; \
+ \
+ for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \
+ { \
+ l[0] = init; \
+ l[1] = init; \
+ l[2] = init; \
+ l[3] = init; \
+ } \
+}
#endif
+
+#define PLY_INIT(p, init, prefix_len, ply_base_len) \
+{ \
+ /* \
+ * A leaf is 'empty' if it represents a leaf from the covering PLY \
+ * i.e. if the prefix length of the leaf is less than or equal to \
+ * the prefix length of the PLY \
+ */ \
+ p->n_non_empty_leafs = (prefix_len > ply_base_len ? \
+ ARRAY_LEN (p->leaves) : 0); \
+ memset (p->dst_address_bits_of_leaves, prefix_len, \
+ sizeof (p->dst_address_bits_of_leaves)); \
+ p->dst_address_bits_base = ply_base_len; \
+ \
+ /* Initialize leaves. */ \
+ PLY_INIT_LEAVES(p); \
+}
+
+static void
+ply_8_init (ip4_fib_mtrie_8_ply_t * p,
+ ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
+{
+ PLY_INIT (p, init, prefix_len, ply_base_len);
+}
+
+static void
+ply_16_init (ip4_fib_mtrie_16_ply_t * p,
+ ip4_fib_mtrie_leaf_t init, uword prefix_len)
+{
+ memset (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+ PLY_INIT_LEAVES (p);
}
static ip4_fib_mtrie_leaf_t
@@ -146,49 +175,43 @@ ply_create (ip4_fib_mtrie_t * m,
ip4_fib_mtrie_leaf_t init_leaf,
u32 leaf_prefix_len, u32 ply_base_len)
{
- ip4_fib_mtrie_ply_t *p;
+ ip4_fib_mtrie_8_ply_t *p;
/* Get cache aligned ply. */
- pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
+ pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
- ply_init (p, init_leaf, leaf_prefix_len, ply_base_len);
- return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
+ ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
+ return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
}
-always_inline ip4_fib_mtrie_ply_t *
+always_inline ip4_fib_mtrie_8_ply_t *
get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
{
uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
- /* It better not be the root ply. */
- ASSERT (n != 0);
- return pool_elt_at_index (m->ply_pool, n);
+
+ return pool_elt_at_index (ip4_ply_pool, n);
}
-static void
-ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+void
+ip4_mtrie_free (ip4_fib_mtrie_t * m)
{
- uword i, is_root;
-
- is_root = p - m->ply_pool == 0;
-
- for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ /* the root ply is embedded so the is nothing to do,
+ * the assumption being that the IP4 FIB table has emptied the trie
+ * before deletion.
+ */
+#if CLIB_DEBUG > 0
+ int i;
+ for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
{
- ip4_fib_mtrie_leaf_t l = p->leaves[i];
- if (ip4_fib_mtrie_leaf_is_next_ply (l))
- ply_free (m, get_next_ply_for_leaf (m, l));
+ ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
}
-
- if (is_root)
- ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0);
- else
- pool_put (m->ply_pool, p);
+#endif
}
void
-ip4_fib_free (ip4_fib_mtrie_t * m)
+ip4_mtrie_init (ip4_fib_mtrie_t * m)
{
- ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0);
- ply_free (m, root_ply);
+ ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
}
typedef struct
@@ -202,7 +225,7 @@ typedef struct
static void
set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
- ip4_fib_mtrie_ply_t * ply,
+ ip4_fib_mtrie_8_ply_t * ply,
ip4_fib_mtrie_leaf_t new_leaf,
uword new_leaf_dst_address_bits)
{
@@ -218,7 +241,8 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
/* Recurse into sub plies. */
if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
{
- ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf);
+ ip4_fib_mtrie_8_ply_t *sub_ply =
+ get_next_ply_for_leaf (m, old_leaf);
set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
new_leaf_dst_address_bits);
}
@@ -237,16 +261,20 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
static void
set_leaf (ip4_fib_mtrie_t * m,
- ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a,
u32 old_ply_index, u32 dst_address_byte_index)
{
ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
i32 n_dst_bits_next_plies;
u8 dst_byte;
+ ip4_fib_mtrie_8_ply_t *old_ply;
+
+ old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+ /* how many bits of the destination address are in the next PLY */
n_dst_bits_next_plies =
a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
@@ -255,30 +283,36 @@ set_leaf (ip4_fib_mtrie_t * m,
/* Number of bits next plies <= 0 => insert leaves this ply. */
if (n_dst_bits_next_plies <= 0)
{
+ /* The mask length of the address to insert maps to this ply */
uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
+ /* The number of bits, and hence slots/buckets, we will fill */
n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
pow2_mask (n_dst_bits_this_ply)) == 0);
+ /* Starting at the value of the byte at this section of the v4 address
+ * fill the buckets/slots of the ply */
for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
{
- ip4_fib_mtrie_ply_t *old_ply, *new_ply;
-
- old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+ ip4_fib_mtrie_8_ply_t *new_ply;
old_leaf = old_ply->leaves[i];
old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
- /* Is leaf to be inserted more specific? */
if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
{
+ /* The new leaf is more or equally specific than the one currently
+ * occupying the slot */
new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
if (old_leaf_is_terminal)
{
+ /* The current leaf is terminal, we can replace it with
+ * the new one */
old_ply->n_non_empty_leafs -=
ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
old_ply->dst_address_bits_of_leaves[i] =
a->dst_address_length;
__sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
@@ -292,32 +326,42 @@ set_leaf (ip4_fib_mtrie_t * m,
}
else
{
- /* Existing leaf points to another ply. We need to place new_leaf into all
- more specific slots. */
+ /* Existing leaf points to another ply. We need to place
+ * new_leaf into all more specific slots. */
new_ply = get_next_ply_for_leaf (m, old_leaf);
set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
a->dst_address_length);
}
}
-
else if (!old_leaf_is_terminal)
{
+ /* The current leaf is less specific and not termial (i.e. a ply),
+ * recurse on down the trie */
new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_leaf (m, a, new_ply - m->ply_pool,
+ set_leaf (m, a, new_ply - ip4_ply_pool,
dst_address_byte_index + 1);
}
+ /*
+ * else
+ * the route we are adding is less specific than the leaf currently
+ * occupying this slot. leave it there
+ */
}
}
else
{
- ip4_fib_mtrie_ply_t *old_ply, *new_ply;
+ /* The address to insert requires us to move down at a lower level of
+ * the trie - recurse on down */
+ ip4_fib_mtrie_8_ply_t *new_ply;
u8 ply_base_len;
ply_base_len = 8 * (dst_address_byte_index + 1);
- old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+
old_leaf = old_ply->leaves[dst_byte];
+
if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
{
+ /* There is a leaf occupying the slot. Replace it with a new ply */
old_ply->n_non_empty_leafs -=
ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
@@ -328,28 +372,143 @@ set_leaf (ip4_fib_mtrie_t * m,
new_ply = get_next_ply_for_leaf (m, new_leaf);
/* Refetch since ply_create may move pool. */
- old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+ old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
__sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
new_leaf);
ASSERT (old_ply->leaves[dst_byte] == new_leaf);
old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
- /* Account for the ply we just created. */
- old_ply->n_non_empty_leafs += 1;
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
ASSERT (old_ply->n_non_empty_leafs >= 0);
}
else
new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
+ set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
+ }
+}
+
+static void
+set_root_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+ ip4_fib_mtrie_16_ply_t *old_ply;
+ i32 n_dst_bits_next_plies;
+ u16 dst_byte;
+
+ old_ply = &m->root_ply;
+
+ ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
+
+ /* how many bits of the destination address are in the next PLY */
+ n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
+
+ dst_byte = a->dst_address.as_u16[0];
+
+ /* Number of bits next plies <= 0 => insert leaves this ply. */
+ if (n_dst_bits_next_plies <= 0)
+ {
+ /* The mask length of the address to insert maps to this ply */
+ uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
+
+ /* The number of bits, and hence slots/buckets, we will fill */
+ n_dst_bits_this_ply = 16 - a->dst_address_length;
+ ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
+ pow2_mask (n_dst_bits_this_ply)) == 0);
+
+ /* Starting at the value of the byte at this section of the v4 address
+ * fill the buckets/slots of the ply */
+ for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
+ {
+ ip4_fib_mtrie_8_ply_t *new_ply;
+ u16 slot;
+
+ slot = clib_net_to_host_u16 (dst_byte);
+ slot += i;
+ slot = clib_host_to_net_u16 (slot);
+
+ old_leaf = old_ply->leaves[slot];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (a->dst_address_length >=
+ old_ply->dst_address_bits_of_leaves[slot])
+ {
+ /* The new leaf is more or equally specific than the one currently
+ * occupying the slot */
+ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ if (old_leaf_is_terminal)
+ {
+ /* The current leaf is terminal, we can replace it with
+ * the new one */
+ old_ply->dst_address_bits_of_leaves[slot] =
+ a->dst_address_length;
+ __sync_val_compare_and_swap (&old_ply->leaves[slot],
+ old_leaf, new_leaf);
+ ASSERT (old_ply->leaves[slot] == new_leaf);
+ }
+ else
+ {
+ /* Existing leaf points to another ply. We need to place
+ * new_leaf into all more specific slots. */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ a->dst_address_length);
+ }
+ }
+ else if (!old_leaf_is_terminal)
+ {
+ /* The current leaf is less specific and not termial (i.e. a ply),
+ * recurse on down the trie */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+ }
+ /*
+ * else
+ * the route we are adding is less specific than the leaf currently
+ * occupying this slot. leave it there
+ */
+ }
+ }
+ else
+ {
+ /* The address to insert requires us to move down at a lower level of
+ * the trie - recurse on down */
+ ip4_fib_mtrie_8_ply_t *new_ply;
+ u8 ply_base_len;
+
+ ply_base_len = 16;
+
+ old_leaf = old_ply->leaves[dst_byte];
+
+ if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ /* There is a leaf occupying the slot. Replace it with a new ply */
+ new_leaf = ply_create (m, old_leaf,
+ clib_max (old_ply->dst_address_bits_of_leaves
+ [dst_byte], ply_base_len),
+ ply_base_len);
+ new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+ __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[dst_byte] == new_leaf);
+ old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
+ }
+ else
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+ set_leaf (m, a, new_ply - ip4_ply_pool, 2);
}
}
static uword
unset_leaf (ip4_fib_mtrie_t * m,
- ip4_fib_mtrie_set_unset_leaf_args_t * a,
- ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index)
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
{
ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
i32 n_dst_bits_next_plies;
@@ -397,7 +556,7 @@ unset_leaf (ip4_fib_mtrie_t * m,
ASSERT (old_ply->n_non_empty_leafs >= 0);
if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
{
- pool_put (m->ply_pool, old_ply);
+ pool_put (ip4_ply_pool, old_ply);
/* Old ply was deleted. */
return 1;
}
@@ -419,106 +578,120 @@ unset_leaf (ip4_fib_mtrie_t * m,
return 0;
}
-void
-ip4_mtrie_init (ip4_fib_mtrie_t * m)
+static void
+unset_root_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a)
{
- ip4_fib_mtrie_leaf_t root;
- memset (m, 0, sizeof (m[0]));
- root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0);
- ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
-}
+ ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ i32 n_dst_bits_next_plies;
+ i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
+ u16 dst_byte;
+ ip4_fib_mtrie_16_ply_t *old_ply;
-void
-ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
- ip4_address_t dst_address,
- u32 dst_address_length,
- u32 adj_index, u32 is_del)
-{
- ip4_fib_mtrie_t *m = &fib->mtrie;
- ip4_fib_mtrie_ply_t *root_ply;
- ip4_fib_mtrie_set_unset_leaf_args_t a;
- ip4_main_t *im = &ip4_main;
+ ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
- ASSERT (m->ply_pool != 0);
+ old_ply = &m->root_ply;
+ n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
- root_ply = pool_elt_at_index (m->ply_pool, 0);
+ dst_byte = a->dst_address.as_u16[0];
- /* Honor dst_address_length. Fib masks are in network byte order */
- dst_address.as_u32 &= im->fib_masks[dst_address_length];
- a.dst_address = dst_address;
- a.dst_address_length = dst_address_length;
- a.adj_index = adj_index;
+ n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
+ (16 - a->dst_address_length) : 0);
- if (!is_del)
- {
- set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
- }
- else
+ del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ /* Starting at the value of the byte at this section of the v4 address
+ * fill the buckets/slots of the ply */
+ for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
{
- ip4_main_t *im = &ip4_main;
+ u16 slot;
+
+ slot = clib_net_to_host_u16 (dst_byte);
+ slot += i;
+ slot = clib_host_to_net_u16 (slot);
- if (dst_address_length)
+ old_leaf = old_ply->leaves[slot];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (old_leaf == del_leaf
+ || (!old_leaf_is_terminal
+ && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
{
- word i;
+ old_ply->leaves[slot] =
+ ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
+ old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
+ }
+ }
+}
- /* If the ply was not deleted, then we need to fill the
- * bucket just reset will the leaf from the less specfic
- * cover.
- * Find next less specific route and insert into mtrie. */
- for (i = dst_address_length - 1; i >= 0; i--)
- {
- uword *p;
- index_t lbi;
- ip4_address_t key;
+void
+ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length, u32 adj_index)
+{
+ ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
- if (!fib->fib_entry_by_dst_address[i])
- continue;
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ a.dst_address.as_u32 = (dst_address->as_u32 &
+ im->fib_masks[dst_address_length]);
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
- key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
- p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32);
- if (p)
- {
- lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index;
- if (INDEX_INVALID == lbi)
- continue;
+ set_root_leaf (m, &a);
+}
- a.cover_adj_index = lbi;
- a.cover_address_length = i;
+void
+ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length,
+ u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index)
+{
+ ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
- break;
- }
- }
- }
- else
- {
- a.cover_adj_index = 0;
- a.cover_address_length = 0;
- }
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ a.dst_address.as_u32 = (dst_address->as_u32 &
+ im->fib_masks[dst_address_length]);
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+ a.cover_adj_index = cover_adj_index;
+ a.cover_address_length = cover_address_length;
- /* the top level ply is never removed, so we can ignore the return code */
- unset_leaf (m, &a, root_ply, 0);
- }
+ /* the top level ply is never removed */
+ unset_root_leaf (m, &a);
}
/* Returns number of bytes of memory used by mtrie. */
static uword
-mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
{
uword bytes, i;
- if (!p)
- {
- if (pool_is_free_index (m->ply_pool, 0))
- return 0;
- p = pool_elt_at_index (m->ply_pool, 0);
- }
-
bytes = sizeof (p[0]);
for (i = 0; i < ARRAY_LEN (p->leaves); i++)
{
ip4_fib_mtrie_leaf_t l = p->leaves[i];
if (ip4_fib_mtrie_leaf_is_next_ply (l))
- bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l));
+ bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+ }
+
+ return bytes;
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword
+mtrie_memory_usage (ip4_fib_mtrie_t * m)
+{
+ uword bytes, i;
+
+ bytes = sizeof (*m);
+ for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
}
return bytes;
@@ -536,47 +709,49 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
return s;
}
+#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \
+({ \
+ u32 a, ia_length; \
+ ip4_address_t ia; \
+ ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \
+ \
+ a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \
+ ia.as_u32 = clib_host_to_net_u32 (a); \
+ ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \
+ s = format (s, "\n%U%20U %U", \
+ format_white_space, (_indent) + 2, \
+ format_ip4_address_and_length, &ia, ia_length, \
+ format_ip4_fib_mtrie_leaf, _l); \
+ \
+ if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \
+ s = format (s, "\n%U%U", \
+ format_white_space, (_indent) + 2, \
+ format_ip4_fib_mtrie_ply, m, a, \
+ ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \
+ s; \
+})
+
static u8 *
format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
{
ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
u32 base_address = va_arg (*va, u32);
u32 ply_index = va_arg (*va, u32);
- u32 dst_address_byte_index = va_arg (*va, u32);
- ip4_fib_mtrie_ply_t *p;
- uword i, indent;
+ ip4_fib_mtrie_8_ply_t *p;
+ uword indent;
+ int i;
- p = pool_elt_at_index (m->ply_pool, ply_index);
+ p = pool_elt_at_index (ip4_ply_pool, ply_index);
indent = format_get_indent (s);
- s =
- format (s, "ply index %d, %d non-empty leaves", ply_index,
- p->n_non_empty_leafs);
+ s = format (s, "ply index %d, %d non-empty leaves", ply_index,
+ p->n_non_empty_leafs);
+
for (i = 0; i < ARRAY_LEN (p->leaves); i++)
{
- ip4_fib_mtrie_leaf_t l = p->leaves[i];
-
if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
{
- u32 a, ia_length;
- ip4_address_t ia;
-
- a = base_address + (i << (24 - 8 * dst_address_byte_index));
- ia.as_u32 = clib_host_to_net_u32 (a);
- if (ip4_fib_mtrie_leaf_is_terminal (l))
- ia_length = p->dst_address_bits_of_leaves[i];
- else
- ia_length = 8 * (1 + dst_address_byte_index);
- s = format (s, "\n%U%20U %U",
- format_white_space, indent + 2,
- format_ip4_address_and_length, &ia, ia_length,
- format_ip4_fib_mtrie_leaf, l);
-
- if (ip4_fib_mtrie_leaf_is_next_ply (l))
- s = format (s, "\n%U%U",
- format_white_space, indent + 2,
- format_ip4_fib_mtrie_ply, m, a,
- ip4_fib_mtrie_leaf_get_next_ply_index (l),
- dst_address_byte_index + 1);
+ FORMAT_PLY (s, p, i, base_address,
+ p->dst_address_bits_base + 8, indent);
}
}
@@ -587,22 +762,44 @@ u8 *
format_ip4_fib_mtrie (u8 * s, va_list * va)
{
ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+ ip4_fib_mtrie_16_ply_t *p;
+ u32 base_address = 0;
+ int i;
- s = format (s, "%d plies, memory usage %U",
- pool_elts (m->ply_pool),
- format_memory_size, mtrie_memory_usage (m, 0));
+ s = format (s, "%d plies, memory usage %U\n",
+ pool_elts (ip4_ply_pool),
+ format_memory_size, mtrie_memory_usage (m));
+ s = format (s, "root-ply");
+ p = &m->root_ply;
- if (pool_elts (m->ply_pool) > 0)
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
{
- ip4_address_t base_address;
- base_address.as_u32 = 0;
- s =
- format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0);
+ u16 slot;
+
+ slot = clib_host_to_net_u16 (i);
+
+ if (p->dst_address_bits_of_leaves[slot] > 0)
+ {
+ FORMAT_PLY (s, p, slot, base_address, 16, 2);
+ }
}
return s;
}
+static clib_error_t *
+ip4_mtrie_module_init (vlib_main_t * vm)
+{
+ /* Burn one ply so index 0 is taken */
+ CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
+
+ pool_get (ip4_ply_pool, p);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index 128195d3..be262c2c 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -47,16 +47,43 @@
/* ip4 fib leafs: 4 ply 8-8-8-8 mtrie.
1 + 2*adj_index for terminal leaves.
- 0 + 2*next_ply_index for non-terminals.
+ 0 + 2*next_ply_index for non-terminals, i.e. PLYs
1 => empty (adjacency index of zero is special miss adjacency). */
typedef u32 ip4_fib_mtrie_leaf_t;
#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
/**
+ * @brief the 16 way stride that is the top PLY of the mtrie
+ * We do not maintain the count of 'real' leaves in this PLY, since
+ * it is never removed. The FIB will destroy the mtrie and the ply once
+ * the FIB is destroyed.
+ */
+#define PLY_16_SIZE (1<<16)
+typedef struct ip4_fib_mtrie_16_ply_t_
+{
+ /**
+ * The leaves/slots/buckets to be filed with leafs
+ */
+ union
+ {
+ ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE];
+
+#ifdef CLIB_HAVE_VEC128
+ u32x4 leaves_as_u32x4[PLY_16_SIZE / 4];
+#endif
+ };
+
+ /**
+ * Prefix length for terminal leaves.
+ */
+ u8 dst_address_bits_of_leaves[PLY_16_SIZE];
+} ip4_fib_mtrie_16_ply_t;
+
+/**
* @brief One ply of the 4 ply mtrie fib.
*/
-typedef struct
+typedef struct ip4_fib_mtrie_8_ply_t_
{
/**
* The leaves/slots/buckets to be filed with leafs
@@ -90,34 +117,72 @@ typedef struct
/* Pad to cache line boundary. */
u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
}
-ip4_fib_mtrie_ply_t;
+ip4_fib_mtrie_8_ply_t;
-STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES,
+STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
"IP4 Mtrie ply cache line");
+/**
+ * @brief The mutiway-TRIE.
+ * There is no data associated with the mtrie apart from the top PLY
+ */
typedef struct
{
- /* Pool of plies. Index zero is root ply. */
- ip4_fib_mtrie_ply_t *ply_pool;
+ /**
+ * Embed the PLY with the mtrie struct. This means that the Data-plane
+ * 'get me the mtrie' returns the first ply, and not an indirect 'pointer'
+ * to it. therefore no cachline misses in the data-path.
+ */
+ ip4_fib_mtrie_16_ply_t root_ply;
} ip4_fib_mtrie_t;
-void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m);
+/**
+ * @brief Initialise an mtrie
+ */
+void ip4_mtrie_init (ip4_fib_mtrie_t * m);
-struct ip4_fib_t;
+/**
+ * @brief Free an mtrie, It must be emty when free'd
+ */
+void ip4_mtrie_free (ip4_fib_mtrie_t * m);
-void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f,
- ip4_address_t dst_address,
- u32 dst_address_length,
- u32 adj_index, u32 is_del);
+/**
+ * @brief Add a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length, u32 adj_index);
+/**
+ * @brief remove a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length,
+ u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index);
+/**
+ * @brief Format/display the contents of the mtrie
+ */
format_function_t format_ip4_fib_mtrie;
+/**
+ * @brief A global pool of 8bit stride plys
+ */
+extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
+
+/**
+ * Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index)
+ */
always_inline u32
ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
{
return n & 1;
}
+/**
+ * From the stored slot value extract the LB index value
+ */
always_inline u32
ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
{
@@ -125,35 +190,38 @@ ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
return n >> 1;
}
-/* Lookup step. Processes 1 byte of 4 byte ip4 address. */
+/**
+ * @brief Lookup step. Processes 1 byte of 4 byte ip4 address.
+ */
always_inline ip4_fib_mtrie_leaf_t
ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
ip4_fib_mtrie_leaf_t current_leaf,
const ip4_address_t * dst_address,
u32 dst_address_byte_index)
{
- ip4_fib_mtrie_ply_t *ply;
+ ip4_fib_mtrie_8_ply_t *ply;
+
uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
if (!current_is_terminal)
{
- ply = m->ply_pool + (current_leaf >> 1);
+ ply = ip4_ply_pool + (current_leaf >> 1);
return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
}
return current_leaf;
}
-/* Lookup step. Processes 1 byte of 4 byte ip4 address. */
+/**
+ * @brief Lookup step number 1. Processes 2 bytes of 4 byte ip4 address.
+ */
always_inline ip4_fib_mtrie_leaf_t
ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m,
const ip4_address_t * dst_address)
{
ip4_fib_mtrie_leaf_t next_leaf;
- ip4_fib_mtrie_ply_t *ply;
- ply = m->ply_pool;
- next_leaf = ply->leaves[dst_address->as_u8[0]];
+ next_leaf = m->root_ply.leaves[dst_address->as_u16[0]];
return next_leaf;
}
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
index b2c1fcd4..1ff9fbdb 100644
--- a/src/vnet/ip/ip4_packet.h
+++ b/src/vnet/ip/ip4_packet.h
@@ -52,6 +52,7 @@ typedef union
u32 data_u32;
/* Aliases. */
u8 as_u8[4];
+ u16 as_u16[2];
u32 as_u32;
} ip4_address_t;
diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c
index 7c2b7be8..6831066e 100644
--- a/src/vnet/ip/ip4_source_check.c
+++ b/src/vnet/ip/ip4_source_check.c
@@ -166,11 +166,6 @@ ip4_source_check_inline (vlib_main_t * vm,
leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
leaf0 =
- ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
- leaf1 =
- ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
-
- leaf0 =
ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
leaf1 =
ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
@@ -249,9 +244,6 @@ ip4_source_check_inline (vlib_main_t * vm,
leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
leaf0 =
- ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-
- leaf0 =
ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
leaf0 =
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index 8fa9a479..bf7ec7d5 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -153,6 +153,9 @@ typedef struct ip6_main_t
/* Pool of FIBs. */
struct fib_table_t_ *fibs;
+ /* Pool of V6 FIBs. */
+ ip6_fib_t *v6_fibs;
+
/** Vector of MFIBs. */
struct mfib_table_t_ *mfibs;
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index e3a1fee8..b9f1782b 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -240,6 +240,21 @@ send_ip_fib_details (vpe_api_main_t * am,
vl_msg_api_send_shmem (q, (u8 *) & mp);
}
+typedef struct vl_api_ip_fib_dump_walk_ctx_t_
+{
+ fib_node_index_t *feis;
+} vl_api_ip_fib_dump_walk_ctx_t;
+
+static int
+vl_api_ip_fib_dump_walk (fib_node_index_t fei, void *arg)
+{
+ vl_api_ip_fib_dump_walk_ctx_t *ctx = arg;
+
+ vec_add1 (ctx->feis, fei);
+
+ return (1);
+}
+
static void
vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
{
@@ -247,12 +262,13 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
unix_shared_memory_queue_t *q;
ip4_main_t *im = &ip4_main;
fib_table_t *fib_table;
- fib_node_index_t lfei, *lfeip, *lfeis = NULL;
- mpls_label_t key;
+ fib_node_index_t *lfeip;
fib_prefix_t pfx;
u32 fib_index;
fib_route_path_encode_t *api_rpaths;
- int i;
+ vl_api_ip_fib_dump_walk_ctx_t ctx = {
+ .feis = NULL,
+ };
q = vl_api_client_index_to_input_queue (mp->client_index);
if (q == 0)
@@ -261,19 +277,16 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
/* *INDENT-OFF* */
pool_foreach (fib_table, im->fibs,
({
- for (i = 0; i < ARRAY_LEN (fib_table->v4.fib_entry_by_dst_address); i++)
- {
- hash_foreach(key, lfei, fib_table->v4.fib_entry_by_dst_address[i],
- ({
- vec_add1(lfeis, lfei);
- }));
- }
+ fib_table_walk(fib_table->ft_index,
+ FIB_PROTOCOL_IP4,
+ vl_api_ip_fib_dump_walk,
+ &ctx);
}));
/* *INDENT-ON* */
- vec_sort_with_function (lfeis, fib_entry_cmp_for_sort);
+ vec_sort_with_function (ctx.feis, fib_entry_cmp_for_sort);
- vec_foreach (lfeip, lfeis)
+ vec_foreach (lfeip, ctx.feis)
{
fib_entry_get_prefix (*lfeip, &pfx);
fib_index = fib_entry_get_fib_index (*lfeip);
@@ -286,7 +299,7 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
vec_free (api_rpaths);
}
- vec_free (lfeis);
+ vec_free (ctx.feis);
}
static void
@@ -377,10 +390,10 @@ api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q,
{
vpe_api_main_t *am = &vpe_api_main;
ip6_main_t *im6 = &ip6_main;
- ip6_fib_t *fib = &fib_table->v6;
fib_node_index_t *fib_entry_index;
api_ip6_fib_show_ctx_t ctx = {
- .fib_index = fib->index,.entries = NULL,
+ .fib_index = fib_table->ft_index,
+ .entries = NULL,
};
fib_route_path_encode_t *api_rpaths;
fib_prefix_t pfx;