From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 19 Dec 2016 23:05:39 +0100 Subject: Reorganize source tree to use single autotools instance Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion --- src/vnet/ip/ip4_mtrie.c | 568 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 568 insertions(+) create mode 100644 src/vnet/ip/ip4_mtrie.c (limited to 'src/vnet/ip/ip4_mtrie.c') diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c new file mode 100644 index 00000000..6e3d0e80 --- /dev/null +++ b/src/vnet/ip/ip4_mtrie.c @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_fib.h: ip4 mtrie fib + * + * Copyright (c) 2012 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +static void +ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, + uword prefix_len) +{ + p->n_non_empty_leafs = + ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves); + memset (p->dst_address_bits_of_leaves, prefix_len, + sizeof (p->dst_address_bits_of_leaves)); + + /* Initialize leaves. */ +#ifdef CLIB_HAVE_VEC128 + { + u32x4 *l, init_x4; + +#ifndef __ALTIVEC__ + init_x4 = u32x4_splat (init); +#else + { + u32x4_union_t y; + y.as_u32[0] = init; + y.as_u32[1] = init; + y.as_u32[2] = init; + y.as_u32[3] = init; + init_x4 = y.as_u32x4; + } +#endif + + for (l = p->leaves_as_u32x4; + l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4) + { + l[0] = init_x4; + l[1] = init_x4; + l[2] = init_x4; + l[3] = init_x4; + } + } +#else + { + u32 *l; + + for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) + { + l[0] = init; + l[1] = init; + l[2] = init; + l[3] = init; + } + } +#endif +} + +static ip4_fib_mtrie_leaf_t +ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf, + uword prefix_len) +{ + ip4_fib_mtrie_ply_t *p; + + /* Get cache aligned ply. */ + pool_get_aligned (m->ply_pool, p, sizeof (p[0])); + + ply_init (p, init_leaf, prefix_len); + return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool); +} + +always_inline ip4_fib_mtrie_ply_t * +get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l) +{ + uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l); + /* It better not be the root ply. */ + ASSERT (n != 0); + return pool_elt_at_index (m->ply_pool, n); +} + +static void +ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +{ + uword i, is_root; + + is_root = p - m->ply_pool == 0; + + for (i = 0; i < ARRAY_LEN (p->leaves); i++) + { + ip4_fib_mtrie_leaf_t l = p->leaves[i]; + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + ply_free (m, get_next_ply_for_leaf (m, l)); + } + + if (is_root) + ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0); + else + pool_put (m->ply_pool, p); +} + +void +ip4_fib_free (ip4_fib_mtrie_t * m) +{ + ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0); + ply_free (m, root_ply); +} + +u32 +ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst) +{ + ip4_fib_mtrie_ply_t *p = pool_elt_at_index (m->ply_pool, 0); + ip4_fib_mtrie_leaf_t l; + + l = p->leaves[dst.as_u8[0]]; + if (ip4_fib_mtrie_leaf_is_terminal (l)) + return ip4_fib_mtrie_leaf_get_adj_index (l); + + p = get_next_ply_for_leaf (m, l); + l = p->leaves[dst.as_u8[1]]; + if (ip4_fib_mtrie_leaf_is_terminal (l)) + return ip4_fib_mtrie_leaf_get_adj_index (l); + + p = get_next_ply_for_leaf (m, l); + l = p->leaves[dst.as_u8[2]]; + if (ip4_fib_mtrie_leaf_is_terminal (l)) + return ip4_fib_mtrie_leaf_get_adj_index (l); + + p = get_next_ply_for_leaf (m, l); + l = p->leaves[dst.as_u8[3]]; + + ASSERT (ip4_fib_mtrie_leaf_is_terminal (l)); + return ip4_fib_mtrie_leaf_get_adj_index (l); +} + +typedef struct +{ + ip4_address_t dst_address; + u32 dst_address_length; + u32 adj_index; +} ip4_fib_mtrie_set_unset_leaf_args_t; + +static void +set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_ply_t * ply, + ip4_fib_mtrie_leaf_t new_leaf, + uword new_leaf_dst_address_bits) +{ + ip4_fib_mtrie_leaf_t old_leaf; + uword i; + + ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf)); + ASSERT (!ip4_fib_mtrie_leaf_is_empty (new_leaf)); + + for (i = 0; i < ARRAY_LEN (ply->leaves); i++) + { + old_leaf = ply->leaves[i]; + + /* Recurse into sub plies. */ + if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, sub_ply, new_leaf, + new_leaf_dst_address_bits); + } + + /* Replace less specific terminal leaves with new leaf. */ + else if (new_leaf_dst_address_bits >= + ply->dst_address_bits_of_leaves[i]) + { + __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf); + ASSERT (ply->leaves[i] == new_leaf); + ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits; + ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf); + } + } +} + +static void +set_leaf (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_set_unset_leaf_args_t * a, + u32 old_ply_index, u32 dst_address_byte_index) +{ + ip4_fib_mtrie_leaf_t old_leaf, new_leaf; + i32 n_dst_bits_next_plies; + u8 dst_byte; + + ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32); + ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); + + n_dst_bits_next_plies = + a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1); + + dst_byte = a->dst_address.as_u8[dst_address_byte_index]; + + /* Number of bits next plies <= 0 => insert leaves this ply. */ + if (n_dst_bits_next_plies <= 0) + { + uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + + n_dst_bits_this_ply = -n_dst_bits_next_plies; + ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & + pow2_mask (n_dst_bits_this_ply)) == 0); + + for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++) + { + ip4_fib_mtrie_ply_t *old_ply, *new_ply; + + old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + + old_leaf = old_ply->leaves[i]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + /* Is leaf to be inserted more specific? */ + if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i]) + { + new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + if (old_leaf_is_terminal) + { + old_ply->dst_address_bits_of_leaves[i] = + a->dst_address_length; + __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf, + new_leaf); + ASSERT (old_ply->leaves[i] == new_leaf); + old_ply->n_non_empty_leafs += + ip4_fib_mtrie_leaf_is_empty (old_leaf); + ASSERT (old_ply->n_non_empty_leafs <= + ARRAY_LEN (old_ply->leaves)); + } + else + { + /* Existing leaf points to another ply. We need to place new_leaf into all + more specific slots. */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, new_ply, new_leaf, + a->dst_address_length); + } + } + + else if (!old_leaf_is_terminal) + { + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_leaf (m, a, new_ply - m->ply_pool, + dst_address_byte_index + 1); + } + } + } + else + { + ip4_fib_mtrie_ply_t *old_ply, *new_ply; + + old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + old_leaf = old_ply->leaves[dst_byte]; + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + new_leaf = + ply_create (m, old_leaf, + old_ply->dst_address_bits_of_leaves[dst_byte]); + new_ply = get_next_ply_for_leaf (m, new_leaf); + + /* Refetch since ply_create may move pool. */ + old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + + __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, + new_leaf); + ASSERT (old_ply->leaves[dst_byte] == new_leaf); + old_ply->dst_address_bits_of_leaves[dst_byte] = 0; + + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_leaf); + ASSERT (old_ply->n_non_empty_leafs >= 0); + + /* Account for the ply we just created. */ + old_ply->n_non_empty_leafs += 1; + } + else + new_ply = get_next_ply_for_leaf (m, old_leaf); + + set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1); + } +} + +static uword +unset_leaf (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_set_unset_leaf_args_t * a, + ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index) +{ + ip4_fib_mtrie_leaf_t old_leaf, del_leaf; + i32 n_dst_bits_next_plies; + i32 i, n_dst_bits_this_ply, old_leaf_is_terminal; + u8 dst_byte; + + ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32); + ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); + + n_dst_bits_next_plies = + a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1); + + dst_byte = a->dst_address.as_u8[dst_address_byte_index]; + if (n_dst_bits_next_plies < 0) + dst_byte &= ~pow2_mask (-n_dst_bits_next_plies); + + n_dst_bits_this_ply = + n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0; + n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply); + + del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++) + { + old_leaf = old_ply->leaves[i]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (old_leaf == del_leaf + || (!old_leaf_is_terminal + && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), + dst_address_byte_index + 1))) + { + old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY; + old_ply->dst_address_bits_of_leaves[i] = 0; + + /* No matter what we just deleted a non-empty leaf. */ + ASSERT (!ip4_fib_mtrie_leaf_is_empty (old_leaf)); + old_ply->n_non_empty_leafs -= 1; + + ASSERT (old_ply->n_non_empty_leafs >= 0); + if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0) + { + pool_put (m->ply_pool, old_ply); + /* Old ply was deleted. */ + return 1; + } + } + } + + /* Old ply was not deleted. */ + return 0; +} + +void +ip4_mtrie_init (ip4_fib_mtrie_t * m) +{ + ip4_fib_mtrie_leaf_t root; + memset (m, 0, sizeof (m[0])); + m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY; + root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */ + 0); + ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0); +} + +void +ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, + ip4_address_t dst_address, + u32 dst_address_length, + u32 adj_index, u32 is_del) +{ + ip4_fib_mtrie_t *m = &fib->mtrie; + ip4_fib_mtrie_ply_t *root_ply; + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t *im = &ip4_main; + + ASSERT (m->ply_pool != 0); + + root_ply = pool_elt_at_index (m->ply_pool, 0); + + /* Honor dst_address_length. Fib masks are in network byte order */ + dst_address.as_u32 &= im->fib_masks[dst_address_length]; + a.dst_address = dst_address; + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; + + if (!is_del) + { + if (dst_address_length == 0) + m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index); + else + set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); + } + else + { + if (dst_address_length == 0) + m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY; + + else + { + ip4_main_t *im = &ip4_main; + uword i; + + unset_leaf (m, &a, root_ply, 0); + + /* Find next less specific route and insert into mtrie. */ + for (i = dst_address_length - 1; i >= 1; i--) + { + uword *p; + index_t lbi; + ip4_address_t key; + + if (!fib->fib_entry_by_dst_address[i]) + continue; + + key.as_u32 = dst_address.as_u32 & im->fib_masks[i]; + p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32); + if (p) + { + lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index; + if (INDEX_INVALID == lbi) + continue; + + a.dst_address = key; + a.adj_index = lbi; + a.dst_address_length = i; + + set_leaf (m, &a, /* ply_index */ 0, + /* dst_address_byte_index */ 0); + break; + } + } + } + } +} + +/* Returns number of bytes of memory used by mtrie. */ +static uword +mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +{ + uword bytes, i; + + if (!p) + { + if (pool_is_free_index (m->ply_pool, 0)) + return 0; + p = pool_elt_at_index (m->ply_pool, 0); + } + + bytes = sizeof (p[0]); + for (i = 0; i < ARRAY_LEN (p->leaves); i++) + { + ip4_fib_mtrie_leaf_t l = p->leaves[i]; + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l)); + } + + return bytes; +} + +static u8 * +format_ip4_fib_mtrie_leaf (u8 * s, va_list * va) +{ + ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t); + + if (ip4_fib_mtrie_leaf_is_empty (l)) + s = format (s, "miss"); + else if (ip4_fib_mtrie_leaf_is_terminal (l)) + s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l)); + else + s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l)); + return s; +} + +static u8 * +format_ip4_fib_mtrie_ply (u8 * s, va_list * va) +{ + ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); + u32 base_address = va_arg (*va, u32); + u32 ply_index = va_arg (*va, u32); + u32 dst_address_byte_index = va_arg (*va, u32); + ip4_fib_mtrie_ply_t *p; + uword i, indent; + + p = pool_elt_at_index (m->ply_pool, ply_index); + indent = format_get_indent (s); + s = + format (s, "ply index %d, %d non-empty leaves", ply_index, + p->n_non_empty_leafs); + for (i = 0; i < ARRAY_LEN (p->leaves); i++) + { + ip4_fib_mtrie_leaf_t l = p->leaves[i]; + + if (!ip4_fib_mtrie_leaf_is_empty (l)) + { + u32 a, ia_length; + ip4_address_t ia; + + a = base_address + (i << (24 - 8 * dst_address_byte_index)); + ia.as_u32 = clib_host_to_net_u32 (a); + if (ip4_fib_mtrie_leaf_is_terminal (l)) + ia_length = p->dst_address_bits_of_leaves[i]; + else + ia_length = 8 * (1 + dst_address_byte_index); + s = format (s, "\n%U%20U %U", + format_white_space, indent + 2, + format_ip4_address_and_length, &ia, ia_length, + format_ip4_fib_mtrie_leaf, l); + + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + s = format (s, "\n%U%U", + format_white_space, indent + 2, + format_ip4_fib_mtrie_ply, m, a, + ip4_fib_mtrie_leaf_get_next_ply_index (l), + dst_address_byte_index + 1); + } + } + + return s; +} + +u8 * +format_ip4_fib_mtrie (u8 * s, va_list * va) +{ + ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); + + s = format (s, "%d plies, memory usage %U", + pool_elts (m->ply_pool), + format_memory_size, mtrie_memory_usage (m, 0)); + + if (pool_elts (m->ply_pool) > 0) + { + ip4_address_t base_address; + base_address.as_u32 = 0; + s = + format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0); + } + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 04a75e3230ab71248fc29a56b9f64bdaee0c17ac Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 23 Mar 2017 06:46:01 -0700 Subject: Mtrie optimisations 1 - make the default route non-special, i.e. like any other less specific route. Consequently, all buckets have a valid valid index of either a leaf or a ply. Checks for special indeices in the data-path can thus be removed. 2 - since all leaves are now 'real' i.e. they represent a real load-balance object, to tell if a ply slot is 'empty' requeirs chekcing that the prefix length of the leaf occupying the slot is slot than the minium value for that ply. 3 - when removing a leaf find the cover first, then recurse down the ply and replace the old leaf with the cover. This saves us a ply walk. Change-Id: Idd523019e8bb1b6ef527b1f5279a5e24bcf18332 Signed-off-by: Neale Ranns --- src/vnet/adj/adj.c | 12 --- src/vnet/adj/adj_l2.c | 3 - src/vnet/cop/ip4_whitelist.c | 15 +-- src/vnet/dpo/load_balance.c | 7 ++ src/vnet/dpo/lookup_dpo.c | 16 +--- src/vnet/fib/ip4_fib.c | 3 +- src/vnet/fib/ip4_fib.h | 6 +- src/vnet/ip/ip4_forward.c | 65 +++---------- src/vnet/ip/ip4_mtrie.c | 204 +++++++++++++++++++++++++---------------- src/vnet/ip/ip4_mtrie.h | 125 +++++++++++-------------- src/vnet/ip/ip4_source_check.c | 13 +-- src/vnet/ip/ip6_forward.c | 6 -- src/vnet/mpls/mpls_output.c | 8 -- 13 files changed, 208 insertions(+), 275 deletions(-) (limited to 'src/vnet/ip/ip4_mtrie.c') diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index 9a01e89d..c1d036a0 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -20,13 +20,6 @@ #include #include -/* - * Special Adj with index zero. we need to define this since the v4 mtrie - * assumes an index of 0 implies the ply is empty. therefore all 'real' - * adjs need a non-zero index. - */ -static ip_adjacency_t *special_v4_miss_adj_with_index_zero; - /* Adjacency packet/byte counters indexed by adjacency index. */ vlib_combined_counter_main_t adjacency_counters; @@ -426,11 +419,6 @@ adj_module_init (vlib_main_t * vm) adj_midchain_module_init(); adj_mcast_module_init(); - /* - * one special adj to reserve index 0 - */ - special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4); - return (NULL); } diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c index fb64e505..f68e54e0 100644 --- a/src/vnet/adj/adj_l2.c +++ b/src/vnet/adj/adj_l2.c @@ -81,9 +81,6 @@ adj_l2_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT(adj_index0); - adj0 = adj_get (adj_index0); /* Guess we are only writing on simple Ethernet header. */ diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c index d5121e72..ccb9dc03 100644 --- a/src/vnet/cop/ip4_whitelist.c +++ b/src/vnet/cop/ip4_whitelist.c @@ -125,10 +125,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); @@ -167,10 +164,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, sizeof (c1[0])); mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie; - leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 0); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); @@ -267,10 +261,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index e9fb5d9d..d5e98e4e 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -829,6 +829,13 @@ load_balance_module_init (void) { dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes); + /* + * Special LB with index zero. we need to define this since the v4 mtrie + * assumes an index of 0 implies the ply is empty. therefore all 'real' + * adjs need a non-zero index. + */ + load_balance_create(0, DPO_PROTO_IP4, 0); + load_balance_map_module_init(); } diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 96fedd27..3726c8fe 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -205,19 +205,16 @@ ip4_src_fib_lookup_one (u32 src_fib_index0, const ip4_address_t * addr0, u32 * src_adj_index0) { - ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_fib_mtrie_leaf_t leaf0; ip4_fib_mtrie_t * mtrie0; mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); } @@ -235,10 +232,8 @@ ip4_src_fib_lookup_two (u32 src_fib_index0, mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie; - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); @@ -249,9 +244,6 @@ ip4_src_fib_lookup_two (u32 src_fib_index0, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3); - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1); } diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index e8211c80..a7915620 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -158,8 +158,9 @@ ip4_fib_table_destroy (ip4_fib_t *fib) /* * remove all the specials we added when the table was created. + * In reverse order so the default route is last. */ - for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++) + for (ii = ARRAY_LEN(ip4_specials) - 1; ii >= 0; ii--) { fib_prefix_t prefix = ip4_specials[ii].ift_prefix; diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index a8dc68b5..243fd77f 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -133,15 +133,11 @@ ip4_fib_forwarding_lookup (u32 fib_index, mtrie = &ip4_fib_get(fib_index)->mtrie; - leaf = IP4_FIB_MTRIE_LEAF_ROOT; - leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 0); + leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3); - /* Handle default route. */ - leaf = (leaf == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie->default_leaf : leaf); - return (ip4_fib_mtrie_leaf_get_adj_index(leaf)); } diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index bbba4b70..60e15d41 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -186,12 +186,11 @@ ip4_lookup_inline (vlib_main_t * vm, mtrie2 = &ip4_fib_get (fib_index2)->mtrie; mtrie3 = &ip4_fib_get (fib_index3)->mtrie; - leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1); + leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2); + leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3); } tcp0 = (void *) (ip0 + 1); @@ -241,25 +240,13 @@ ip4_lookup_inline (vlib_main_t * vm, } else { - /* Handle default route. */ - leaf0 = - (leaf0 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - leaf1 = - (leaf1 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); - leaf2 = - (leaf2 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2); - leaf3 = - (leaf3 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3); lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2); lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3); } + ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3); lb0 = load_balance_get (lb_index0); lb1 = load_balance_get (lb_index1); lb2 = load_balance_get (lb_index2); @@ -384,9 +371,7 @@ ip4_lookup_inline (vlib_main_t * vm, { mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); } tcp0 = (void *) (ip0 + 1); @@ -408,12 +393,10 @@ ip4_lookup_inline (vlib_main_t * vm, else { /* Handle default route. */ - leaf0 = - (leaf0 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); } + ASSERT (lbi0); lb0 = load_balance_get (lbi0); /* Use flow hash to compute multipath adjacency. */ @@ -1623,12 +1606,8 @@ ip4_local_inline (vlib_main_t * vm, mtrie0 = &ip4_fib_get (fib_index0)->mtrie; mtrie1 = &ip4_fib_get (fib_index1)->mtrie; - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); /* Treat IP frag packets as "experimental" protocol for now until support of IP frag reassembly is implemented */ @@ -1722,12 +1701,6 @@ ip4_local_inline (vlib_main_t * vm, ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); - leaf0 = - (leaf0 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - leaf1 = - (leaf1 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); @@ -1831,10 +1804,7 @@ ip4_local_inline (vlib_main_t * vm, mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); /* Treat IP frag packets as "experimental" protocol for now until support of IP frag reassembly is implemented */ @@ -1897,9 +1867,6 @@ ip4_local_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); - leaf0 = - (leaf0 == - IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; @@ -2453,9 +2420,6 @@ ip4_rewrite_inline (vlib_main_t * vm, cpu_index, adj_index1); } - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT (adj_index0 && adj_index1); - ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); @@ -2643,9 +2607,6 @@ ip4_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT (adj_index0); - adj0 = ip_get_adjacency (lm, adj_index0); ip0 = vlib_buffer_get_current (p0); @@ -2967,15 +2928,11 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3); - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a); diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index 6e3d0e80..317d8f10 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -40,14 +40,64 @@ #include #include +always_inline u32 +ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte) +{ + /* + * It's 'non-empty' if the length of the leaf stored is greater than the + * length of a leaf in the covering ply. i.e. the leaf is more specific + * than it's would be cover in the covering ply + */ + if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base) + return (1); + return (0); +} + +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index) +{ + ip4_fib_mtrie_leaf_t l; + l = 1 + 2 * adj_index; + ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index); + return l; +} + +always_inline u32 +ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n) +{ + return (n & 1) == 0; +} + +always_inline u32 +ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n) +{ + ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n)); + return n >> 1; +} + +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_leaf_set_next_ply_index (u32 i) +{ + ip4_fib_mtrie_leaf_t l; + l = 0 + 2 * i; + ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i); + return l; +} + static void -ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, - uword prefix_len) +ply_init (ip4_fib_mtrie_ply_t * p, + ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len) { - p->n_non_empty_leafs = - ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves); + /* + * A leaf is 'empty' if it represents a leaf from the covering PLY + * i.e. if the prefix length of the leaf is less than or equal to + * the prefix length of the PLY + */ + p->n_non_empty_leafs = (prefix_len > ply_base_len ? + ARRAY_LEN (p->leaves) : 0); memset (p->dst_address_bits_of_leaves, prefix_len, sizeof (p->dst_address_bits_of_leaves)); + p->dst_address_bits_base = ply_base_len; /* Initialize leaves. */ #ifdef CLIB_HAVE_VEC128 @@ -92,15 +142,16 @@ ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, } static ip4_fib_mtrie_leaf_t -ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf, - uword prefix_len) +ply_create (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_leaf_t init_leaf, + u32 leaf_prefix_len, u32 ply_base_len) { ip4_fib_mtrie_ply_t *p; /* Get cache aligned ply. */ pool_get_aligned (m->ply_pool, p, sizeof (p[0])); - ply_init (p, init_leaf, prefix_len); + ply_init (p, init_leaf, leaf_prefix_len, ply_base_len); return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool); } @@ -128,7 +179,7 @@ ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) } if (is_root) - ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0); + ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0); else pool_put (m->ply_pool, p); } @@ -140,38 +191,13 @@ ip4_fib_free (ip4_fib_mtrie_t * m) ply_free (m, root_ply); } -u32 -ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst) -{ - ip4_fib_mtrie_ply_t *p = pool_elt_at_index (m->ply_pool, 0); - ip4_fib_mtrie_leaf_t l; - - l = p->leaves[dst.as_u8[0]]; - if (ip4_fib_mtrie_leaf_is_terminal (l)) - return ip4_fib_mtrie_leaf_get_adj_index (l); - - p = get_next_ply_for_leaf (m, l); - l = p->leaves[dst.as_u8[1]]; - if (ip4_fib_mtrie_leaf_is_terminal (l)) - return ip4_fib_mtrie_leaf_get_adj_index (l); - - p = get_next_ply_for_leaf (m, l); - l = p->leaves[dst.as_u8[2]]; - if (ip4_fib_mtrie_leaf_is_terminal (l)) - return ip4_fib_mtrie_leaf_get_adj_index (l); - - p = get_next_ply_for_leaf (m, l); - l = p->leaves[dst.as_u8[3]]; - - ASSERT (ip4_fib_mtrie_leaf_is_terminal (l)); - return ip4_fib_mtrie_leaf_get_adj_index (l); -} - typedef struct { ip4_address_t dst_address; u32 dst_address_length; u32 adj_index; + u32 cover_address_length; + u32 cover_adj_index; } ip4_fib_mtrie_set_unset_leaf_args_t; static void @@ -184,7 +210,6 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, uword i; ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf)); - ASSERT (!ip4_fib_mtrie_leaf_is_empty (new_leaf)); for (i = 0; i < ARRAY_LEN (ply->leaves); i++) { @@ -205,7 +230,7 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf); ASSERT (ply->leaves[i] == new_leaf); ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits; - ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf); + ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i); } } } @@ -219,7 +244,7 @@ set_leaf (ip4_fib_mtrie_t * m, i32 n_dst_bits_next_plies; u8 dst_byte; - ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32); + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); n_dst_bits_next_plies = @@ -232,7 +257,7 @@ set_leaf (ip4_fib_mtrie_t * m, { uword i, n_dst_bits_this_ply, old_leaf_is_terminal; - n_dst_bits_this_ply = -n_dst_bits_next_plies; + n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies); ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & pow2_mask (n_dst_bits_this_ply)) == 0); @@ -252,13 +277,16 @@ set_leaf (ip4_fib_mtrie_t * m, if (old_leaf_is_terminal) { + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); old_ply->dst_address_bits_of_leaves[i] = a->dst_address_length; __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf, new_leaf); ASSERT (old_ply->leaves[i] == new_leaf); + old_ply->n_non_empty_leafs += - ip4_fib_mtrie_leaf_is_empty (old_leaf); + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); ASSERT (old_ply->n_non_empty_leafs <= ARRAY_LEN (old_ply->leaves)); } @@ -283,14 +311,20 @@ set_leaf (ip4_fib_mtrie_t * m, else { ip4_fib_mtrie_ply_t *old_ply, *new_ply; + u8 ply_base_len; + ply_base_len = 8 * (dst_address_byte_index + 1); old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); old_leaf = old_ply->leaves[dst_byte]; if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) { - new_leaf = - ply_create (m, old_leaf, - old_ply->dst_address_bits_of_leaves[dst_byte]); + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); + + new_leaf = ply_create (m, old_leaf, + clib_max (old_ply->dst_address_bits_of_leaves + [dst_byte], ply_base_len), + ply_base_len); new_ply = get_next_ply_for_leaf (m, new_leaf); /* Refetch since ply_create may move pool. */ @@ -299,14 +333,11 @@ set_leaf (ip4_fib_mtrie_t * m, __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, new_leaf); ASSERT (old_ply->leaves[dst_byte] == new_leaf); - old_ply->dst_address_bits_of_leaves[dst_byte] = 0; - - old_ply->n_non_empty_leafs -= - ip4_fib_mtrie_leaf_is_non_empty (old_leaf); - ASSERT (old_ply->n_non_empty_leafs >= 0); + old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; /* Account for the ply we just created. */ old_ply->n_non_empty_leafs += 1; + ASSERT (old_ply->n_non_empty_leafs >= 0); } else new_ply = get_next_ply_for_leaf (m, old_leaf); @@ -325,7 +356,7 @@ unset_leaf (ip4_fib_mtrie_t * m, i32 i, n_dst_bits_this_ply, old_leaf_is_terminal; u8 dst_byte; - ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32); + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); n_dst_bits_next_plies = @@ -351,12 +382,17 @@ unset_leaf (ip4_fib_mtrie_t * m, && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), dst_address_byte_index + 1))) { - old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY; - old_ply->dst_address_bits_of_leaves[i] = 0; + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); + + old_ply->leaves[i] = + ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index); + old_ply->dst_address_bits_of_leaves[i] = + clib_max (old_ply->dst_address_bits_base, + a->cover_address_length); - /* No matter what we just deleted a non-empty leaf. */ - ASSERT (!ip4_fib_mtrie_leaf_is_empty (old_leaf)); - old_ply->n_non_empty_leafs -= 1; + old_ply->n_non_empty_leafs += + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); ASSERT (old_ply->n_non_empty_leafs >= 0); if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0) @@ -365,6 +401,17 @@ unset_leaf (ip4_fib_mtrie_t * m, /* Old ply was deleted. */ return 1; } +#if CLIB_DEBUG > 0 + else if (dst_address_byte_index) + { + int ii, count = 0; + for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++) + { + count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii); + } + ASSERT (count); + } +#endif } } @@ -377,9 +424,7 @@ ip4_mtrie_init (ip4_fib_mtrie_t * m) { ip4_fib_mtrie_leaf_t root; memset (m, 0, sizeof (m[0])); - m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY; - root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */ - 0); + root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0); ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0); } @@ -406,25 +451,21 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, if (!is_del) { - if (dst_address_length == 0) - m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index); - else - set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); + set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); } else { - if (dst_address_length == 0) - m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY; + ip4_main_t *im = &ip4_main; - else + if (dst_address_length) { - ip4_main_t *im = &ip4_main; - uword i; + word i; - unset_leaf (m, &a, root_ply, 0); - - /* Find next less specific route and insert into mtrie. */ - for (i = dst_address_length - 1; i >= 1; i--) + /* If the ply was not deleted, then we need to fill the + * bucket just reset will the leaf from the less specfic + * cover. + * Find next less specific route and insert into mtrie. */ + for (i = dst_address_length - 1; i >= 0; i--) { uword *p; index_t lbi; @@ -441,16 +482,21 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, if (INDEX_INVALID == lbi) continue; - a.dst_address = key; - a.adj_index = lbi; - a.dst_address_length = i; + a.cover_adj_index = lbi; + a.cover_address_length = i; - set_leaf (m, &a, /* ply_index */ 0, - /* dst_address_byte_index */ 0); break; } } } + else + { + a.cover_adj_index = 0; + a.cover_address_length = 0; + } + + /* the top level ply is never removed, so we can ignore the return code */ + unset_leaf (m, &a, root_ply, 0); } } @@ -483,10 +529,8 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va) { ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t); - if (ip4_fib_mtrie_leaf_is_empty (l)) - s = format (s, "miss"); - else if (ip4_fib_mtrie_leaf_is_terminal (l)) - s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l)); + if (ip4_fib_mtrie_leaf_is_terminal (l)) + s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l)); else s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l)); return s; @@ -511,7 +555,7 @@ format_ip4_fib_mtrie_ply (u8 * s, va_list * va) { ip4_fib_mtrie_leaf_t l = p->leaves[i]; - if (!ip4_fib_mtrie_leaf_is_empty (l)) + if (ip4_fib_mtrie_leaf_is_non_empty (p, i)) { u32 a, ia_length; ip4_address_t ia; diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h index c0afc2cf..128195d3 100644 --- a/src/vnet/ip/ip4_mtrie.h +++ b/src/vnet/ip/ip4_mtrie.h @@ -52,67 +52,15 @@ typedef u32 ip4_fib_mtrie_leaf_t; #define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0) -#define IP4_FIB_MTRIE_LEAF_ROOT (0 + 2*0) -always_inline u32 -ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n) -{ - return n == IP4_FIB_MTRIE_LEAF_EMPTY; -} - -always_inline u32 -ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_leaf_t n) -{ - return n != IP4_FIB_MTRIE_LEAF_EMPTY; -} - -always_inline u32 -ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n) -{ - return n & 1; -} - -always_inline u32 -ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) -{ - ASSERT (ip4_fib_mtrie_leaf_is_terminal (n)); - return n >> 1; -} - -always_inline ip4_fib_mtrie_leaf_t -ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index) -{ - ip4_fib_mtrie_leaf_t l; - l = 1 + 2 * adj_index; - ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index); - return l; -} - -always_inline u32 -ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n) -{ - return (n & 1) == 0; -} - -always_inline u32 -ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n) -{ - ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n)); - return n >> 1; -} - -always_inline ip4_fib_mtrie_leaf_t -ip4_fib_mtrie_leaf_set_next_ply_index (u32 i) -{ - ip4_fib_mtrie_leaf_t l; - l = 0 + 2 * i; - ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i); - return l; -} - -/* One ply of the 4 ply mtrie fib. */ +/** + * @brief One ply of the 4 ply mtrie fib. + */ typedef struct { + /** + * The leaves/slots/buckets to be filed with leafs + */ union { ip4_fib_mtrie_leaf_t leaves[256]; @@ -122,14 +70,25 @@ typedef struct #endif }; - /* Prefix length for terminal leaves. */ + /** + * Prefix length for leaves/ply. + */ u8 dst_address_bits_of_leaves[256]; - /* Number of non-empty leafs (whether terminal or not). */ + /** + * Number of non-empty leafs (whether terminal or not). + */ i32 n_non_empty_leafs; + /** + * The length of the ply's coviering prefix. Also a measure of its depth + * If a leaf in a slot has a mask length longer than this then it is + * 'non-empty'. Otherwise it is the value of the cover. + */ + i32 dst_address_bits_base; + /* Pad to cache line boundary. */ - u8 pad[CLIB_CACHE_LINE_BYTES - 1 * sizeof (i32)]; + u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)]; } ip4_fib_mtrie_ply_t; @@ -140,9 +99,6 @@ typedef struct { /* Pool of plies. Index zero is root ply. */ ip4_fib_mtrie_ply_t *ply_pool; - - /* Special case leaf for default route 0.0.0.0/0. */ - ip4_fib_mtrie_leaf_t default_leaf; } ip4_fib_mtrie_t; void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m); @@ -154,25 +110,50 @@ void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f, u32 dst_address_length, u32 adj_index, u32 is_del); -/* Returns adjacency index. */ -u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst); - format_function_t format_ip4_fib_mtrie; +always_inline u32 +ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n) +{ + return n & 1; +} + +always_inline u32 +ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) +{ + ASSERT (ip4_fib_mtrie_leaf_is_terminal (n)); + return n >> 1; +} + /* Lookup step. Processes 1 byte of 4 byte ip4 address. */ always_inline ip4_fib_mtrie_leaf_t -ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m, +ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t current_leaf, const ip4_address_t * dst_address, u32 dst_address_byte_index) { - ip4_fib_mtrie_leaf_t next_leaf; ip4_fib_mtrie_ply_t *ply; uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf); - ply = m->ply_pool + (current_is_terminal ? 0 : (current_leaf >> 1)); - next_leaf = ply->leaves[dst_address->as_u8[dst_address_byte_index]]; - next_leaf = current_is_terminal ? current_leaf : next_leaf; + if (!current_is_terminal) + { + ply = m->ply_pool + (current_leaf >> 1); + return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]); + } + + return current_leaf; +} + +/* Lookup step. Processes 1 byte of 4 byte ip4 address. */ +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address) +{ + ip4_fib_mtrie_leaf_t next_leaf; + ip4_fib_mtrie_ply_t *ply; + + ply = m->ply_pool; + next_leaf = ply->leaves[dst_address->as_u8[0]]; return next_leaf; } diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c index 3af32f2e..7c2b7be8 100644 --- a/src/vnet/ip/ip4_source_check.c +++ b/src/vnet/ip/ip4_source_check.c @@ -162,12 +162,8 @@ ip4_source_check_inline (vlib_main_t * vm, mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie; - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); @@ -250,10 +246,7 @@ ip4_source_check_inline (vlib_main_t * vm, mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index ecc3bd2c..c120f12c 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1943,9 +1943,6 @@ ip6_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT (adj_index0 && adj_index1); - ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); @@ -2111,9 +2108,6 @@ ip6_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT (adj_index0); - adj0 = ip_get_adjacency (lm, adj_index0); ip0 = vlib_buffer_get_current (p0); diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index 2d8bd0c9..08018fd1 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -121,10 +121,6 @@ mpls_output_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT(adj_index0); - ASSERT(adj_index1); - adj0 = adj_get(adj_index0); adj1 = adj_get(adj_index1); hdr0 = vlib_buffer_get_current (p0); @@ -237,9 +233,6 @@ mpls_output_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - /* We should never rewrite a pkt using the MISS adjacency */ - ASSERT(adj_index0); - adj0 = adj_get(adj_index0); hdr0 = vlib_buffer_get_current (p0); @@ -431,7 +424,6 @@ mpls_adj_incomplete (vlib_main_t * vm, n_left_to_next -= 1; adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - ASSERT(adj_index0); adj0 = adj_get(adj_index0); -- cgit 1.2.3-korg From a3af337e06a79f7d1dacf42a319f241c907122fc Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 28 Mar 2017 03:49:52 -0700 Subject: MTRIE Optimisations 2 1) 16-8-8 stride. Reduce trie depth walk traded with increased memory in the top PLY. 2) separate the vector of protocol-independent (PI) fib_table_t with the vector of protocol dependent (PD) FIBs. PD FIBs are large structures, we don't want to burn the memory for ech PD type 3) Go straight to the PD FIB in the data-path thus avoiding an indirection through, e.g., a PLY pool. Change-Id: I800d1ed0b2049040d5da95213f3ed6b12bdd78b7 Signed-off-by: Neale Ranns --- src/vnet/cop/ip4_whitelist.c | 9 - src/vnet/dpo/load_balance.c | 6 +- src/vnet/dpo/lookup_dpo.c | 4 - src/vnet/fib/fib.c | 2 + src/vnet/fib/fib_entry.c | 4 +- src/vnet/fib/fib_path.c | 1 + src/vnet/fib/fib_table.c | 24 +- src/vnet/fib/fib_table.h | 12 - src/vnet/fib/fib_test.c | 6 +- src/vnet/fib/ip4_fib.c | 60 +++- src/vnet/fib/ip4_fib.h | 35 ++- src/vnet/fib/ip6_fib.c | 18 +- src/vnet/fib/ip6_fib.h | 2 +- src/vnet/fib/mpls_fib.c | 21 +- src/vnet/fib/mpls_fib.h | 28 +- src/vnet/ip/ip4.h | 29 +- src/vnet/ip/ip4_forward.c | 21 -- src/vnet/ip/ip4_mtrie.c | 611 +++++++++++++++++++++++++++-------------- src/vnet/ip/ip4_mtrie.h | 106 +++++-- src/vnet/ip/ip4_packet.h | 1 + src/vnet/ip/ip4_source_check.c | 8 - src/vnet/ip/ip6.h | 3 + src/vnet/ip/ip_api.c | 43 ++- src/vnet/mpls/interface.c | 1 + src/vnet/mpls/mpls.h | 26 +- src/vnet/mpls/mpls_api.c | 38 ++- src/vpp/api/api.c | 6 +- src/vpp/stats/stats.c | 8 +- 28 files changed, 720 insertions(+), 413 deletions(-) (limited to 'src/vnet/ip/ip4_mtrie.c') diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c index ccb9dc03..6ef3d7d7 100644 --- a/src/vnet/cop/ip4_whitelist.c +++ b/src/vnet/cop/ip4_whitelist.c @@ -127,9 +127,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); @@ -166,9 +163,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2); @@ -263,9 +257,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index d5e98e4e..6b0eda0e 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -827,14 +827,18 @@ const static char* const * const load_balance_nodes[DPO_PROTO_NUM] = void load_balance_module_init (void) { + index_t lbi; + dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes); /* * Special LB with index zero. we need to define this since the v4 mtrie * assumes an index of 0 implies the ply is empty. therefore all 'real' * adjs need a non-zero index. + * This should never be used, but just in case, stack it on a drop. */ - load_balance_create(0, DPO_PROTO_IP4, 0); + lbi = load_balance_create(1, DPO_PROTO_IP4, 0); + load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4)); load_balance_map_module_init(); } diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 3726c8fe..e94e871c 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -211,7 +211,6 @@ ip4_src_fib_lookup_one (u32 src_fib_index0, mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); @@ -235,9 +234,6 @@ ip4_src_fib_lookup_two (u32 src_fib_index0, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2); diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c index 413f93e8..b430e113 100644 --- a/src/vnet/fib/fib.c +++ b/src/vnet/fib/fib.c @@ -28,6 +28,8 @@ fib_module_init (vlib_main_t * vm) return (error); if ((error = vlib_call_init_function (vm, adj_module_init))) return (error); + if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init))) + return (error); fib_entry_module_init(); fib_entry_src_module_init(); diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index 25005e11..6ac5461d 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -924,10 +924,10 @@ fib_entry_path_remove (fib_node_index_t fib_entry_index, /* * no more sources left. this entry is toast. */ - fib_entry_src_action_uninstall(fib_entry); fib_entry = fib_entry_post_flag_update_actions(fib_entry, source, bflags); + fib_entry_src_action_uninstall(fib_entry); return (FIB_ENTRY_SRC_FLAG_NONE); } @@ -1014,10 +1014,10 @@ fib_entry_special_remove (fib_node_index_t fib_entry_index, /* * no more sources left. this entry is toast. */ - fib_entry_src_action_uninstall(fib_entry); fib_entry = fib_entry_post_flag_update_actions(fib_entry, source, bflags); + fib_entry_src_action_uninstall(fib_entry); return (FIB_ENTRY_SRC_FLAG_NONE); } diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 3ed309f3..928a9d43 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -32,6 +32,7 @@ #include #include #include +#include /** * Enurmeration of path types diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 7818d02e..6c3162e7 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -47,7 +47,7 @@ fib_table_lookup_i (fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - return (ip4_fib_table_lookup(&fib_table->v4, + return (ip4_fib_table_lookup(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len)); case FIB_PROTOCOL_IP6: @@ -55,7 +55,7 @@ fib_table_lookup_i (fib_table_t *fib_table, &prefix->fp_addr.ip6, prefix->fp_len)); case FIB_PROTOCOL_MPLS: - return (mpls_fib_table_lookup(&fib_table->mpls, + return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos)); } @@ -76,7 +76,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - return (ip4_fib_table_lookup_exact_match(&fib_table->v4, + return (ip4_fib_table_lookup_exact_match(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len)); case FIB_PROTOCOL_IP6: @@ -84,7 +84,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table, &prefix->fp_addr.ip6, prefix->fp_len)); case FIB_PROTOCOL_MPLS: - return (mpls_fib_table_lookup(&fib_table->mpls, + return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos)); } @@ -148,7 +148,7 @@ fib_table_entry_remove (fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - ip4_fib_table_entry_remove(&fib_table->v4, + ip4_fib_table_entry_remove(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len); break; @@ -158,7 +158,7 @@ fib_table_entry_remove (fib_table_t *fib_table, prefix->fp_len); break; case FIB_PROTOCOL_MPLS: - mpls_fib_table_entry_remove(&fib_table->mpls, + mpls_fib_table_entry_remove(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos); break; @@ -208,7 +208,7 @@ fib_table_entry_insert (fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - ip4_fib_table_entry_insert(&fib_table->v4, + ip4_fib_table_entry_insert(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len, fib_entry_index); @@ -220,7 +220,7 @@ fib_table_entry_insert (fib_table_t *fib_table, fib_entry_index); break; case FIB_PROTOCOL_MPLS: - mpls_fib_table_entry_insert(&fib_table->mpls, + mpls_fib_table_entry_insert(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos, fib_entry_index); @@ -270,7 +270,9 @@ fib_table_fwding_dpo_remove (u32 fib_index, return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index), &prefix->fp_addr.ip4, prefix->fp_len, - dpo)); + dpo, + fib_table_get_less_specific(fib_index, + prefix))); case FIB_PROTOCOL_IP6: return (ip6_fib_table_fwding_dpo_remove(fib_index, &prefix->fp_addr.ip6, @@ -1034,13 +1036,13 @@ fib_table_destroy (fib_table_t *fib_table) switch (fib_table->ft_proto) { case FIB_PROTOCOL_IP4: - ip4_fib_table_destroy(&fib_table->v4); + ip4_fib_table_destroy(fib_table->ft_index); break; case FIB_PROTOCOL_IP6: ip6_fib_table_destroy(fib_table->ft_index); break; case FIB_PROTOCOL_MPLS: - mpls_fib_table_destroy(&fib_table->mpls); + mpls_fib_table_destroy(fib_table->ft_index); break; } } diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index e7e66acb..b310aea6 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -28,18 +28,6 @@ */ typedef struct fib_table_t_ { - /** - * A union of the protocol specific FIBs that provide the - * underlying LPM mechanism. - * This element is first in the struct so that it is in the - * first cache line. - */ - union { - ip4_fib_t v4; - ip6_fib_t v6; - mpls_fib_t mpls; - }; - /** * Which protocol this table serves. Used to switch on the union above. */ diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 1a9cce24..92141ddf 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -40,8 +40,6 @@ fformat(stderr, "FAIL:%d: " _comment "\n", \ __LINE__, ##_args); \ } else { \ - fformat(stderr, "PASS:%d: " _comment "\n", \ - __LINE__, ##_args); \ } \ _evald; \ }) @@ -5727,7 +5725,7 @@ fib_test_label (void) &a_o_10_10_11_1, &adj_o_10_10_11_2), "1.1.1.1/32 LB 2 buckets via: " - "adj over 10.10.11.1", + "adj over 10.10.11.1, " "adj-v4 over 10.10.11.2"); fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, @@ -5738,7 +5736,7 @@ fib_test_label (void) &a_o_10_10_11_1, &adj_o_10_10_11_2), "24001/eos LB 2 buckets via: " - "adj over 10.10.11.1", + "adj over 10.10.11.1, " "adj-v4 over 10.10.11.2"); fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index a7915620..98d4e52f 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -104,29 +104,35 @@ static u32 ip4_create_fib_with_table_id (u32 table_id) { fib_table_t *fib_table; + ip4_fib_t *v4_fib; pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); memset(fib_table, 0, sizeof(*fib_table)); + pool_get_aligned(ip4_main.v4_fibs, v4_fib, CLIB_CACHE_LINE_BYTES); + + ASSERT((fib_table - ip4_main.fibs) == + (v4_fib - ip4_main.v4_fibs)); + fib_table->ft_proto = FIB_PROTOCOL_IP4; fib_table->ft_index = - fib_table->v4.index = + v4_fib->index = (fib_table - ip4_main.fibs); hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index); fib_table->ft_table_id = - fib_table->v4.table_id = + v4_fib->table_id = table_id; fib_table->ft_flow_hash_config = - fib_table->v4.flow_hash_config = + v4_fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; - fib_table->v4.fwd_classify_table_index = ~0; - fib_table->v4.rev_classify_table_index = ~0; + v4_fib->fwd_classify_table_index = ~0; + v4_fib->rev_classify_table_index = ~0; fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4); - ip4_mtrie_init(&fib_table->v4.mtrie); + ip4_mtrie_init(&v4_fib->mtrie); /* * add the special entries into the new FIB @@ -151,9 +157,10 @@ ip4_create_fib_with_table_id (u32 table_id) } void -ip4_fib_table_destroy (ip4_fib_t *fib) +ip4_fib_table_destroy (u32 fib_index) { - fib_table_t *fib_table = (fib_table_t*)fib; + fib_table_t *fib_table = pool_elt_at_index(ip4_main.fibs, fib_index); + ip4_fib_t *v4_fib = pool_elt_at_index(ip4_main.v4_fibs, fib_index); int ii; /* @@ -185,6 +192,10 @@ ip4_fib_table_destroy (ip4_fib_t *fib) { hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id); } + + ip4_mtrie_free(&v4_fib->mtrie); + + pool_put(ip4_main.v4_fibs, v4_fib); pool_put(ip4_main.fibs, fib_table); } @@ -367,16 +378,33 @@ ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib, u32 len, const dpo_id_t *dpo) { - ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 0); // ADD + ip4_fib_mtrie_route_add(&fib->mtrie, addr, len, dpo->dpoi_index); } void ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib, const ip4_address_t *addr, u32 len, - const dpo_id_t *dpo) + const dpo_id_t *dpo, + u32 cover_index) { - ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE + fib_prefix_t cover_prefix = { + .fp_len = 0, + }; + const dpo_id_t *cover_dpo; + + /* + * We need to pass the MTRIE the LB index and address length of the + * covering prefix, so it can fill the plys with the correct replacement + * for the entry being removed + */ + fib_entry_get_prefix(cover_index, &cover_prefix); + cover_dpo = fib_entry_contribute_ip_forwarding(cover_index); + + ip4_fib_mtrie_route_del(&fib->mtrie, + addr, len, dpo->dpoi_index, + cover_prefix.fp_len, + cover_dpo->dpoi_index); } void @@ -498,7 +526,7 @@ ip4_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im4->fibs, ({ - ip4_fib_t *fib = &fib_table->v4; + ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index); if (table_id >= 0 && table_id != (int)fib->table_id) continue; @@ -523,6 +551,11 @@ ip4_show_fib (vlib_main_t * vm, } continue; } + if (mtrie) + { + vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); + continue; + } if (!matching) { @@ -532,9 +565,6 @@ ip4_show_fib (vlib_main_t * vm, { ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask); } - - if (mtrie) - vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); })); return 0; diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index 243fd77f..4cf9e58a 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -34,6 +34,33 @@ #include #include #include +#include + +typedef struct ip4_fib_t_ +{ + /** + * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. + * First member so it's in the first cacheline. + */ + ip4_fib_mtrie_t mtrie; + + /* Hash table for each prefix length mapping. */ + uword *fib_entry_by_dst_address[33]; + + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; + + /* flow hash configuration */ + flow_hash_config_t flow_hash_config; + + /* N-tuple classifier indices */ + u32 fwd_classify_table_index; + u32 rev_classify_table_index; + +} ip4_fib_t; extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib, const ip4_address_t *addr, @@ -50,7 +77,7 @@ extern void ip4_fib_table_entry_insert(ip4_fib_t *fib, const ip4_address_t *addr, u32 len, fib_node_index_t fib_entry_index); -extern void ip4_fib_table_destroy(ip4_fib_t *fib); +extern void ip4_fib_table_destroy(u32 fib_index); extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib, const ip4_address_t *addr, @@ -60,7 +87,8 @@ extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib, extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib, const ip4_address_t *addr, u32 len, - const dpo_id_t *dpo); + const dpo_id_t *dpo, + fib_node_index_t cover_index); extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib, const ip4_address_t * dst); @@ -79,7 +107,7 @@ extern void ip4_fib_table_walk(ip4_fib_t *fib, static inline ip4_fib_t * ip4_fib_get (u32 index) { - return (&(pool_elt_at_index(ip4_main.fibs, index)->v4)); + return (pool_elt_at_index(ip4_main.v4_fibs, index)); } always_inline u32 @@ -134,7 +162,6 @@ ip4_fib_forwarding_lookup (u32 fib_index, mtrie = &ip4_fib_get(fib_index)->mtrie; leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr); - leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3); diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 343ff55e..0ee029d3 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -55,22 +55,29 @@ static u32 create_fib_with_table_id (u32 table_id) { fib_table_t *fib_table; + ip6_fib_t *v6_fib; pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + pool_get_aligned(ip6_main.v6_fibs, v6_fib, CLIB_CACHE_LINE_BYTES); + memset(fib_table, 0, sizeof(*fib_table)); + memset(v6_fib, 0, sizeof(*v6_fib)); + ASSERT((fib_table - ip6_main.fibs) == + (v6_fib - ip6_main.v6_fibs)); + fib_table->ft_proto = FIB_PROTOCOL_IP6; fib_table->ft_index = - fib_table->v6.index = - (fib_table - ip6_main.fibs); + v6_fib->index = + (fib_table - ip6_main.fibs); hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index); fib_table->ft_table_id = - fib_table->v6.table_id = + v6_fib->table_id = table_id; fib_table->ft_flow_hash_config = - fib_table->v6.flow_hash_config = + v6_fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; vnet_ip6_fib_init(fib_table->ft_index); @@ -188,6 +195,7 @@ ip6_fib_table_destroy (u32 fib_index) { hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id); } + pool_put_index(ip6_main.v6_fibs, fib_table->ft_index); pool_put(ip6_main.fibs, fib_table); } @@ -620,7 +628,7 @@ ip6_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im6->fibs, ({ - fib = &(fib_table->v6); + fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index); if (table_id >= 0 && table_id != (int)fib->table_id) continue; if (fib_index != ~0 && fib_index != (int)fib->index) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index af864a75..e2f28452 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -115,7 +115,7 @@ static inline ip6_fib_t * ip6_fib_get (fib_node_index_t index) { ASSERT(!pool_is_free_index(ip6_main.fibs, index)); - return (&pool_elt_at_index (ip6_main.fibs, index)->v6); + return (pool_elt_at_index (ip6_main.v6_fibs, index)); } static inline diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index 5cd0fd23..4b2b76ea 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -97,11 +97,15 @@ mpls_fib_create_with_table_id (u32 table_id) int i; pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + pool_get_aligned(mpls_main.mpls_fibs, mf, CLIB_CACHE_LINE_BYTES); + + ASSERT((fib_table - mpls_main.fibs) == + (mf - mpls_main.mpls_fibs)); + memset(fib_table, 0, sizeof(*fib_table)); fib_table->ft_proto = FIB_PROTOCOL_MPLS; - fib_table->ft_index = - (fib_table - mpls_main.fibs); + fib_table->ft_index = (fib_table - mpls_main.fibs); hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index); @@ -109,8 +113,6 @@ mpls_fib_create_with_table_id (u32 table_id) table_id; fib_table->ft_flow_hash_config = MPLS_FLOW_HASH_DEFAULT; - fib_table->v4.fwd_classify_table_index = ~0; - fib_table->v4.rev_classify_table_index = ~0; fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS); @@ -122,7 +124,6 @@ mpls_fib_create_with_table_id (u32 table_id) drop_dpo_get(DPO_PROTO_MPLS)); } - mf = &fib_table->mpls; mf->mf_entries = hash_create(0, sizeof(fib_node_index_t)); for (i = 0; i < MPLS_FIB_DB_SIZE; i++) { @@ -241,9 +242,10 @@ mpls_fib_table_create_and_lock (void) } void -mpls_fib_table_destroy (mpls_fib_t *mf) +mpls_fib_table_destroy (u32 fib_index) { - fib_table_t *fib_table = (fib_table_t*)mf; + fib_table_t *fib_table = pool_elt_at_index(mpls_main.fibs, fib_index); + mpls_fib_t *mf = pool_elt_at_index(mpls_main.mpls_fibs, fib_index); fib_prefix_t prefix = { .fp_proto = FIB_PROTOCOL_MPLS, }; @@ -274,6 +276,7 @@ mpls_fib_table_destroy (mpls_fib_t *mf) } hash_free(mf->mf_entries); + pool_put(mpls_main.mpls_fibs, mf); pool_put(mpls_main.fibs, fib_table); } @@ -436,11 +439,11 @@ mpls_fib_show (vlib_main_t * vm, if (MPLS_LABEL_INVALID == label) { - mpls_fib_table_show_all(&(fib_table->mpls), vm); + mpls_fib_table_show_all(mpls_fib_get(fib_table->ft_index), vm); } else { - mpls_fib_table_show_one(&(fib_table->mpls), label, vm); + mpls_fib_table_show_one(mpls_fib_get(fib_table->ft_index), label, vm); } })); diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h index 779decaa..78a61a14 100644 --- a/src/vnet/fib/mpls_fib.h +++ b/src/vnet/fib/mpls_fib.h @@ -25,10 +25,33 @@ #include #include +#define MPLS_FIB_DEFAULT_TABLE_ID 0 + +/** + * Type exposure is to allow the DP fast/inlined access + */ +#define MPLS_FIB_KEY_SIZE 21 +#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) + +typedef struct mpls_fib_t_ +{ + /** + * A hash table of entries. 21 bit key + * Hash table for reduced memory footprint + */ + uword * mf_entries; + + /** + * The load-balance indices keyed by 21 bit label+eos bit. + * A flat array for maximum lookup performace. + */ + index_t mf_lbs[MPLS_FIB_DB_SIZE]; +} mpls_fib_t; + static inline mpls_fib_t* mpls_fib_get (fib_node_index_t index) { - return (&(pool_elt_at_index(mpls_main.fibs, index)->mpls)); + return (pool_elt_at_index(mpls_main.mpls_fibs, index)); } extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id); @@ -56,8 +79,7 @@ extern void mpls_fib_table_entry_insert(mpls_fib_t *mf, mpls_label_t label, mpls_eos_bit_t eos, fib_node_index_t fei); -extern void mpls_fib_table_destroy(mpls_fib_t *mf); - +extern void mpls_fib_table_destroy(u32 fib_index); extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf, diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index 4e075d0f..71640def 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -40,34 +40,10 @@ #ifndef included_ip_ip4_h #define included_ip_ip4_h -#include #include #include #include -typedef struct ip4_fib_t -{ - /* Hash table for each prefix length mapping. */ - uword *fib_entry_by_dst_address[33]; - - /* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */ - ip4_fib_mtrie_t mtrie; - - /* Table ID (hash key) for this FIB. */ - u32 table_id; - - /* Index into FIB vector. */ - u32 index; - - /* flow hash configuration */ - flow_hash_config_t flow_hash_config; - - /* N-tuple classifier indices */ - u32 fwd_classify_table_index; - u32 rev_classify_table_index; - -} ip4_fib_t; - typedef struct ip4_mfib_t { /* Hash table for each prefix length mapping. */ @@ -111,6 +87,9 @@ typedef struct ip4_main_t /** Vector of FIBs. */ struct fib_table_t_ *fibs; + /** Vector of MTries. */ + struct ip4_fib_t_ *v4_fibs; + /** Vector of MFIBs. */ struct mfib_table_t_ *mfibs; @@ -284,8 +263,6 @@ serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main; int vnet_set_ip4_flow_hash (u32 table_id, flow_hash_config_t flow_hash_config); -void ip4_mtrie_init (ip4_fib_mtrie_t * m); - int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, u32 table_index); diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index ef6dded5..ee1703e7 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -182,7 +182,6 @@ ip4_lookup_inline (vlib_main_t * vm, mtrie2 = &ip4_fib_get (fib_index2)->mtrie; mtrie3 = &ip4_fib_get (fib_index3)->mtrie; - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1); leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2); @@ -194,14 +193,6 @@ ip4_lookup_inline (vlib_main_t * vm, tcp2 = (void *) (ip2 + 1); tcp3 = (void *) (ip3 + 1); - if (!lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1); - } - if (!lookup_for_responses_to_locally_received_packets) { leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); @@ -363,9 +354,6 @@ ip4_lookup_inline (vlib_main_t * vm, tcp0 = (void *) (ip0 + 1); - if (!lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); - if (!lookup_for_responses_to_locally_received_packets) leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); @@ -1622,11 +1610,6 @@ ip4_local_inline (vlib_main_t * vm, good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; good_tcp_udp1 |= is_udp1 && udp1->checksum == 0; - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); - /* Verify UDP length. */ ip_len0 = clib_net_to_host_u16 (ip0->length); ip_len1 = clib_net_to_host_u16 (ip1->length); @@ -1812,9 +1795,6 @@ ip4_local_inline (vlib_main_t * vm, /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - /* Verify UDP length. */ ip_len0 = clib_net_to_host_u16 (ip0->length); udp_len0 = clib_net_to_host_u16 (udp0->length); @@ -2913,7 +2893,6 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) mtrie0 = &ip4_fib_get (fib_index0)->mtrie; leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3); diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index 317d8f10..adc95125 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -38,10 +38,17 @@ */ #include -#include +#include +#include + + +/** + * Global pool of IPv4 8bit PLYs + */ +ip4_fib_mtrie_8_ply_t *ip4_ply_pool; always_inline u32 -ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte) +ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte) { /* * It's 'non-empty' if the length of the leaf stored is greater than the @@ -84,61 +91,83 @@ ip4_fib_mtrie_leaf_set_next_ply_index (u32 i) return l; } -static void -ply_init (ip4_fib_mtrie_ply_t * p, - ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len) -{ - /* - * A leaf is 'empty' if it represents a leaf from the covering PLY - * i.e. if the prefix length of the leaf is less than or equal to - * the prefix length of the PLY - */ - p->n_non_empty_leafs = (prefix_len > ply_base_len ? - ARRAY_LEN (p->leaves) : 0); - memset (p->dst_address_bits_of_leaves, prefix_len, - sizeof (p->dst_address_bits_of_leaves)); - p->dst_address_bits_base = ply_base_len; - - /* Initialize leaves. */ -#ifdef CLIB_HAVE_VEC128 - { - u32x4 *l, init_x4; - #ifndef __ALTIVEC__ - init_x4 = u32x4_splat (init); +#define PLY_X4_SPLAT_INIT(init_x4, init) \ + init_x4 = u32x4_splat (init); #else - { - u32x4_union_t y; - y.as_u32[0] = init; - y.as_u32[1] = init; - y.as_u32[2] = init; - y.as_u32[3] = init; - init_x4 = y.as_u32x4; - } +#define PLY_X4_SPLAT_INIT(init_x4, init) \ +{ \ + u32x4_union_t y; \ + y.as_u32[0] = init; \ + y.as_u32[1] = init; \ + y.as_u32[2] = init; \ + y.as_u32[3] = init; \ + init_x4 = y.as_u32x4; \ +} #endif - for (l = p->leaves_as_u32x4; - l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4) - { - l[0] = init_x4; - l[1] = init_x4; - l[2] = init_x4; - l[3] = init_x4; - } - } +#ifdef CLIB_HAVE_VEC128 +#define PLY_INIT_LEAVES(p) \ +{ \ + u32x4 *l, init_x4; \ + \ + PLY_X4_SPLAT_INIT(init_x4, init); \ + for (l = p->leaves_as_u32x4; \ + l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \ + l += 4) \ + { \ + l[0] = init_x4; \ + l[1] = init_x4; \ + l[2] = init_x4; \ + l[3] = init_x4; \ + } \ +} #else - { - u32 *l; - - for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) - { - l[0] = init; - l[1] = init; - l[2] = init; - l[3] = init; - } - } +#define PLY_INIT_LEAVES(p) \ +{ \ + u32 *l; \ + \ + for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \ + { \ + l[0] = init; \ + l[1] = init; \ + l[2] = init; \ + l[3] = init; \ + } \ +} #endif + +#define PLY_INIT(p, init, prefix_len, ply_base_len) \ +{ \ + /* \ + * A leaf is 'empty' if it represents a leaf from the covering PLY \ + * i.e. if the prefix length of the leaf is less than or equal to \ + * the prefix length of the PLY \ + */ \ + p->n_non_empty_leafs = (prefix_len > ply_base_len ? \ + ARRAY_LEN (p->leaves) : 0); \ + memset (p->dst_address_bits_of_leaves, prefix_len, \ + sizeof (p->dst_address_bits_of_leaves)); \ + p->dst_address_bits_base = ply_base_len; \ + \ + /* Initialize leaves. */ \ + PLY_INIT_LEAVES(p); \ +} + +static void +ply_8_init (ip4_fib_mtrie_8_ply_t * p, + ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len) +{ + PLY_INIT (p, init, prefix_len, ply_base_len); +} + +static void +ply_16_init (ip4_fib_mtrie_16_ply_t * p, + ip4_fib_mtrie_leaf_t init, uword prefix_len) +{ + memset (p->dst_address_bits_of_leaves, prefix_len, + sizeof (p->dst_address_bits_of_leaves)); + PLY_INIT_LEAVES (p); } static ip4_fib_mtrie_leaf_t @@ -146,49 +175,43 @@ ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf, u32 leaf_prefix_len, u32 ply_base_len) { - ip4_fib_mtrie_ply_t *p; + ip4_fib_mtrie_8_ply_t *p; /* Get cache aligned ply. */ - pool_get_aligned (m->ply_pool, p, sizeof (p[0])); + pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES); - ply_init (p, init_leaf, leaf_prefix_len, ply_base_len); - return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool); + ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len); + return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool); } -always_inline ip4_fib_mtrie_ply_t * +always_inline ip4_fib_mtrie_8_ply_t * get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l) { uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l); - /* It better not be the root ply. */ - ASSERT (n != 0); - return pool_elt_at_index (m->ply_pool, n); + + return pool_elt_at_index (ip4_ply_pool, n); } -static void -ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +void +ip4_mtrie_free (ip4_fib_mtrie_t * m) { - uword i, is_root; - - is_root = p - m->ply_pool == 0; - - for (i = 0; i < ARRAY_LEN (p->leaves); i++) + /* the root ply is embedded so the is nothing to do, + * the assumption being that the IP4 FIB table has emptied the trie + * before deletion. + */ +#if CLIB_DEBUG > 0 + int i; + for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++) { - ip4_fib_mtrie_leaf_t l = p->leaves[i]; - if (ip4_fib_mtrie_leaf_is_next_ply (l)) - ply_free (m, get_next_ply_for_leaf (m, l)); + ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i])); } - - if (is_root) - ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0); - else - pool_put (m->ply_pool, p); +#endif } void -ip4_fib_free (ip4_fib_mtrie_t * m) +ip4_mtrie_init (ip4_fib_mtrie_t * m) { - ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0); - ply_free (m, root_ply); + ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0); } typedef struct @@ -202,7 +225,7 @@ typedef struct static void set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, - ip4_fib_mtrie_ply_t * ply, + ip4_fib_mtrie_8_ply_t * ply, ip4_fib_mtrie_leaf_t new_leaf, uword new_leaf_dst_address_bits) { @@ -218,7 +241,8 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, /* Recurse into sub plies. */ if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf)) { - ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf); + ip4_fib_mtrie_8_ply_t *sub_ply = + get_next_ply_for_leaf (m, old_leaf); set_ply_with_more_specific_leaf (m, sub_ply, new_leaf, new_leaf_dst_address_bits); } @@ -237,16 +261,20 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, static void set_leaf (ip4_fib_mtrie_t * m, - ip4_fib_mtrie_set_unset_leaf_args_t * a, + const ip4_fib_mtrie_set_unset_leaf_args_t * a, u32 old_ply_index, u32 dst_address_byte_index) { ip4_fib_mtrie_leaf_t old_leaf, new_leaf; i32 n_dst_bits_next_plies; u8 dst_byte; + ip4_fib_mtrie_8_ply_t *old_ply; + + old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index); ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); + /* how many bits of the destination address are in the next PLY */ n_dst_bits_next_plies = a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1); @@ -255,30 +283,36 @@ set_leaf (ip4_fib_mtrie_t * m, /* Number of bits next plies <= 0 => insert leaves this ply. */ if (n_dst_bits_next_plies <= 0) { + /* The mask length of the address to insert maps to this ply */ uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + /* The number of bits, and hence slots/buckets, we will fill */ n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies); ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & pow2_mask (n_dst_bits_this_ply)) == 0); + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++) { - ip4_fib_mtrie_ply_t *old_ply, *new_ply; - - old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + ip4_fib_mtrie_8_ply_t *new_ply; old_leaf = old_ply->leaves[i]; old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); - /* Is leaf to be inserted more specific? */ if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i]) { + /* The new leaf is more or equally specific than the one currently + * occupying the slot */ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); if (old_leaf_is_terminal) { + /* The current leaf is terminal, we can replace it with + * the new one */ old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); + old_ply->dst_address_bits_of_leaves[i] = a->dst_address_length; __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf, @@ -292,32 +326,42 @@ set_leaf (ip4_fib_mtrie_t * m, } else { - /* Existing leaf points to another ply. We need to place new_leaf into all - more specific slots. */ + /* Existing leaf points to another ply. We need to place + * new_leaf into all more specific slots. */ new_ply = get_next_ply_for_leaf (m, old_leaf); set_ply_with_more_specific_leaf (m, new_ply, new_leaf, a->dst_address_length); } } - else if (!old_leaf_is_terminal) { + /* The current leaf is less specific and not termial (i.e. a ply), + * recurse on down the trie */ new_ply = get_next_ply_for_leaf (m, old_leaf); - set_leaf (m, a, new_ply - m->ply_pool, + set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1); } + /* + * else + * the route we are adding is less specific than the leaf currently + * occupying this slot. leave it there + */ } } else { - ip4_fib_mtrie_ply_t *old_ply, *new_ply; + /* The address to insert requires us to move down at a lower level of + * the trie - recurse on down */ + ip4_fib_mtrie_8_ply_t *new_ply; u8 ply_base_len; ply_base_len = 8 * (dst_address_byte_index + 1); - old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + old_leaf = old_ply->leaves[dst_byte]; + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) { + /* There is a leaf occupying the slot. Replace it with a new ply */ old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); @@ -328,28 +372,143 @@ set_leaf (ip4_fib_mtrie_t * m, new_ply = get_next_ply_for_leaf (m, new_leaf); /* Refetch since ply_create may move pool. */ - old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index); __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, new_leaf); ASSERT (old_ply->leaves[dst_byte] == new_leaf); old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; - /* Account for the ply we just created. */ - old_ply->n_non_empty_leafs += 1; + old_ply->n_non_empty_leafs += + ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); ASSERT (old_ply->n_non_empty_leafs >= 0); } else new_ply = get_next_ply_for_leaf (m, old_leaf); - set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1); + set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1); + } +} + +static void +set_root_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a) +{ + ip4_fib_mtrie_leaf_t old_leaf, new_leaf; + ip4_fib_mtrie_16_ply_t *old_ply; + i32 n_dst_bits_next_plies; + u16 dst_byte; + + old_ply = &m->root_ply; + + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); + + /* how many bits of the destination address are in the next PLY */ + n_dst_bits_next_plies = a->dst_address_length - BITS (u16); + + dst_byte = a->dst_address.as_u16[0]; + + /* Number of bits next plies <= 0 => insert leaves this ply. */ + if (n_dst_bits_next_plies <= 0) + { + /* The mask length of the address to insert maps to this ply */ + uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + + /* The number of bits, and hence slots/buckets, we will fill */ + n_dst_bits_this_ply = 16 - a->dst_address_length; + ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) & + pow2_mask (n_dst_bits_this_ply)) == 0); + + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ + for (i = 0; i < (1 << n_dst_bits_this_ply); i++) + { + ip4_fib_mtrie_8_ply_t *new_ply; + u16 slot; + + slot = clib_net_to_host_u16 (dst_byte); + slot += i; + slot = clib_host_to_net_u16 (slot); + + old_leaf = old_ply->leaves[slot]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (a->dst_address_length >= + old_ply->dst_address_bits_of_leaves[slot]) + { + /* The new leaf is more or equally specific than the one currently + * occupying the slot */ + new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + if (old_leaf_is_terminal) + { + /* The current leaf is terminal, we can replace it with + * the new one */ + old_ply->dst_address_bits_of_leaves[slot] = + a->dst_address_length; + __sync_val_compare_and_swap (&old_ply->leaves[slot], + old_leaf, new_leaf); + ASSERT (old_ply->leaves[slot] == new_leaf); + } + else + { + /* Existing leaf points to another ply. We need to place + * new_leaf into all more specific slots. */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, new_ply, new_leaf, + a->dst_address_length); + } + } + else if (!old_leaf_is_terminal) + { + /* The current leaf is less specific and not termial (i.e. a ply), + * recurse on down the trie */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_leaf (m, a, new_ply - ip4_ply_pool, 2); + } + /* + * else + * the route we are adding is less specific than the leaf currently + * occupying this slot. leave it there + */ + } + } + else + { + /* The address to insert requires us to move down at a lower level of + * the trie - recurse on down */ + ip4_fib_mtrie_8_ply_t *new_ply; + u8 ply_base_len; + + ply_base_len = 16; + + old_leaf = old_ply->leaves[dst_byte]; + + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + /* There is a leaf occupying the slot. Replace it with a new ply */ + new_leaf = ply_create (m, old_leaf, + clib_max (old_ply->dst_address_bits_of_leaves + [dst_byte], ply_base_len), + ply_base_len); + new_ply = get_next_ply_for_leaf (m, new_leaf); + + __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, + new_leaf); + ASSERT (old_ply->leaves[dst_byte] == new_leaf); + old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; + } + else + new_ply = get_next_ply_for_leaf (m, old_leaf); + + set_leaf (m, a, new_ply - ip4_ply_pool, 2); } } static uword unset_leaf (ip4_fib_mtrie_t * m, - ip4_fib_mtrie_set_unset_leaf_args_t * a, - ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index) + const ip4_fib_mtrie_set_unset_leaf_args_t * a, + ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index) { ip4_fib_mtrie_leaf_t old_leaf, del_leaf; i32 n_dst_bits_next_plies; @@ -397,7 +556,7 @@ unset_leaf (ip4_fib_mtrie_t * m, ASSERT (old_ply->n_non_empty_leafs >= 0); if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0) { - pool_put (m->ply_pool, old_ply); + pool_put (ip4_ply_pool, old_ply); /* Old ply was deleted. */ return 1; } @@ -419,106 +578,120 @@ unset_leaf (ip4_fib_mtrie_t * m, return 0; } -void -ip4_mtrie_init (ip4_fib_mtrie_t * m) +static void +unset_root_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a) { - ip4_fib_mtrie_leaf_t root; - memset (m, 0, sizeof (m[0])); - root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0); - ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0); -} + ip4_fib_mtrie_leaf_t old_leaf, del_leaf; + i32 n_dst_bits_next_plies; + i32 i, n_dst_bits_this_ply, old_leaf_is_terminal; + u16 dst_byte; + ip4_fib_mtrie_16_ply_t *old_ply; -void -ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, - ip4_address_t dst_address, - u32 dst_address_length, - u32 adj_index, u32 is_del) -{ - ip4_fib_mtrie_t *m = &fib->mtrie; - ip4_fib_mtrie_ply_t *root_ply; - ip4_fib_mtrie_set_unset_leaf_args_t a; - ip4_main_t *im = &ip4_main; + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); - ASSERT (m->ply_pool != 0); + old_ply = &m->root_ply; + n_dst_bits_next_plies = a->dst_address_length - BITS (u16); - root_ply = pool_elt_at_index (m->ply_pool, 0); + dst_byte = a->dst_address.as_u16[0]; - /* Honor dst_address_length. Fib masks are in network byte order */ - dst_address.as_u32 &= im->fib_masks[dst_address_length]; - a.dst_address = dst_address; - a.dst_address_length = dst_address_length; - a.adj_index = adj_index; + n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ? + (16 - a->dst_address_length) : 0); - if (!is_del) - { - set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); - } - else + del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ + for (i = 0; i < (1 << n_dst_bits_this_ply); i++) { - ip4_main_t *im = &ip4_main; + u16 slot; + + slot = clib_net_to_host_u16 (dst_byte); + slot += i; + slot = clib_host_to_net_u16 (slot); - if (dst_address_length) + old_leaf = old_ply->leaves[slot]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (old_leaf == del_leaf + || (!old_leaf_is_terminal + && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2))) { - word i; + old_ply->leaves[slot] = + ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index); + old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length; + } + } +} - /* If the ply was not deleted, then we need to fill the - * bucket just reset will the leaf from the less specfic - * cover. - * Find next less specific route and insert into mtrie. */ - for (i = dst_address_length - 1; i >= 0; i--) - { - uword *p; - index_t lbi; - ip4_address_t key; +void +ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, u32 adj_index) +{ + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t *im = &ip4_main; - if (!fib->fib_entry_by_dst_address[i]) - continue; + /* Honor dst_address_length. Fib masks are in network byte order */ + a.dst_address.as_u32 = (dst_address->as_u32 & + im->fib_masks[dst_address_length]); + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; - key.as_u32 = dst_address.as_u32 & im->fib_masks[i]; - p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32); - if (p) - { - lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index; - if (INDEX_INVALID == lbi) - continue; + set_root_leaf (m, &a); +} - a.cover_adj_index = lbi; - a.cover_address_length = i; +void +ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, + u32 adj_index, + u32 cover_address_length, u32 cover_adj_index) +{ + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t *im = &ip4_main; - break; - } - } - } - else - { - a.cover_adj_index = 0; - a.cover_address_length = 0; - } + /* Honor dst_address_length. Fib masks are in network byte order */ + a.dst_address.as_u32 = (dst_address->as_u32 & + im->fib_masks[dst_address_length]); + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; + a.cover_adj_index = cover_adj_index; + a.cover_address_length = cover_address_length; - /* the top level ply is never removed, so we can ignore the return code */ - unset_leaf (m, &a, root_ply, 0); - } + /* the top level ply is never removed */ + unset_root_leaf (m, &a); } /* Returns number of bytes of memory used by mtrie. */ static uword -mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p) { uword bytes, i; - if (!p) - { - if (pool_is_free_index (m->ply_pool, 0)) - return 0; - p = pool_elt_at_index (m->ply_pool, 0); - } - bytes = sizeof (p[0]); for (i = 0; i < ARRAY_LEN (p->leaves); i++) { ip4_fib_mtrie_leaf_t l = p->leaves[i]; if (ip4_fib_mtrie_leaf_is_next_ply (l)) - bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l)); + bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l)); + } + + return bytes; +} + +/* Returns number of bytes of memory used by mtrie. */ +static uword +mtrie_memory_usage (ip4_fib_mtrie_t * m) +{ + uword bytes, i; + + bytes = sizeof (*m); + for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++) + { + ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i]; + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l)); } return bytes; @@ -536,47 +709,49 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va) return s; } +#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \ +({ \ + u32 a, ia_length; \ + ip4_address_t ia; \ + ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \ + \ + a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \ + ia.as_u32 = clib_host_to_net_u32 (a); \ + ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \ + s = format (s, "\n%U%20U %U", \ + format_white_space, (_indent) + 2, \ + format_ip4_address_and_length, &ia, ia_length, \ + format_ip4_fib_mtrie_leaf, _l); \ + \ + if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \ + s = format (s, "\n%U%U", \ + format_white_space, (_indent) + 2, \ + format_ip4_fib_mtrie_ply, m, a, \ + ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \ + s; \ +}) + static u8 * format_ip4_fib_mtrie_ply (u8 * s, va_list * va) { ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); u32 base_address = va_arg (*va, u32); u32 ply_index = va_arg (*va, u32); - u32 dst_address_byte_index = va_arg (*va, u32); - ip4_fib_mtrie_ply_t *p; - uword i, indent; + ip4_fib_mtrie_8_ply_t *p; + uword indent; + int i; - p = pool_elt_at_index (m->ply_pool, ply_index); + p = pool_elt_at_index (ip4_ply_pool, ply_index); indent = format_get_indent (s); - s = - format (s, "ply index %d, %d non-empty leaves", ply_index, - p->n_non_empty_leafs); + s = format (s, "ply index %d, %d non-empty leaves", ply_index, + p->n_non_empty_leafs); + for (i = 0; i < ARRAY_LEN (p->leaves); i++) { - ip4_fib_mtrie_leaf_t l = p->leaves[i]; - if (ip4_fib_mtrie_leaf_is_non_empty (p, i)) { - u32 a, ia_length; - ip4_address_t ia; - - a = base_address + (i << (24 - 8 * dst_address_byte_index)); - ia.as_u32 = clib_host_to_net_u32 (a); - if (ip4_fib_mtrie_leaf_is_terminal (l)) - ia_length = p->dst_address_bits_of_leaves[i]; - else - ia_length = 8 * (1 + dst_address_byte_index); - s = format (s, "\n%U%20U %U", - format_white_space, indent + 2, - format_ip4_address_and_length, &ia, ia_length, - format_ip4_fib_mtrie_leaf, l); - - if (ip4_fib_mtrie_leaf_is_next_ply (l)) - s = format (s, "\n%U%U", - format_white_space, indent + 2, - format_ip4_fib_mtrie_ply, m, a, - ip4_fib_mtrie_leaf_get_next_ply_index (l), - dst_address_byte_index + 1); + FORMAT_PLY (s, p, i, base_address, + p->dst_address_bits_base + 8, indent); } } @@ -587,22 +762,44 @@ u8 * format_ip4_fib_mtrie (u8 * s, va_list * va) { ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); + ip4_fib_mtrie_16_ply_t *p; + u32 base_address = 0; + int i; - s = format (s, "%d plies, memory usage %U", - pool_elts (m->ply_pool), - format_memory_size, mtrie_memory_usage (m, 0)); + s = format (s, "%d plies, memory usage %U\n", + pool_elts (ip4_ply_pool), + format_memory_size, mtrie_memory_usage (m)); + s = format (s, "root-ply"); + p = &m->root_ply; - if (pool_elts (m->ply_pool) > 0) + for (i = 0; i < ARRAY_LEN (p->leaves); i++) { - ip4_address_t base_address; - base_address.as_u32 = 0; - s = - format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0); + u16 slot; + + slot = clib_host_to_net_u16 (i); + + if (p->dst_address_bits_of_leaves[slot] > 0) + { + FORMAT_PLY (s, p, slot, base_address, 16, 2); + } } return s; } +static clib_error_t * +ip4_mtrie_module_init (vlib_main_t * vm) +{ + /* Burn one ply so index 0 is taken */ + CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p); + + pool_get (ip4_ply_pool, p); + + return (NULL); +} + +VLIB_INIT_FUNCTION (ip4_mtrie_module_init); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h index 128195d3..be262c2c 100644 --- a/src/vnet/ip/ip4_mtrie.h +++ b/src/vnet/ip/ip4_mtrie.h @@ -47,16 +47,43 @@ /* ip4 fib leafs: 4 ply 8-8-8-8 mtrie. 1 + 2*adj_index for terminal leaves. - 0 + 2*next_ply_index for non-terminals. + 0 + 2*next_ply_index for non-terminals, i.e. PLYs 1 => empty (adjacency index of zero is special miss adjacency). */ typedef u32 ip4_fib_mtrie_leaf_t; #define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0) +/** + * @brief the 16 way stride that is the top PLY of the mtrie + * We do not maintain the count of 'real' leaves in this PLY, since + * it is never removed. The FIB will destroy the mtrie and the ply once + * the FIB is destroyed. + */ +#define PLY_16_SIZE (1<<16) +typedef struct ip4_fib_mtrie_16_ply_t_ +{ + /** + * The leaves/slots/buckets to be filed with leafs + */ + union + { + ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE]; + +#ifdef CLIB_HAVE_VEC128 + u32x4 leaves_as_u32x4[PLY_16_SIZE / 4]; +#endif + }; + + /** + * Prefix length for terminal leaves. + */ + u8 dst_address_bits_of_leaves[PLY_16_SIZE]; +} ip4_fib_mtrie_16_ply_t; + /** * @brief One ply of the 4 ply mtrie fib. */ -typedef struct +typedef struct ip4_fib_mtrie_8_ply_t_ { /** * The leaves/slots/buckets to be filed with leafs @@ -90,34 +117,72 @@ typedef struct /* Pad to cache line boundary. */ u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)]; } -ip4_fib_mtrie_ply_t; +ip4_fib_mtrie_8_ply_t; -STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES, +STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES, "IP4 Mtrie ply cache line"); +/** + * @brief The mutiway-TRIE. + * There is no data associated with the mtrie apart from the top PLY + */ typedef struct { - /* Pool of plies. Index zero is root ply. */ - ip4_fib_mtrie_ply_t *ply_pool; + /** + * Embed the PLY with the mtrie struct. This means that the Data-plane + * 'get me the mtrie' returns the first ply, and not an indirect 'pointer' + * to it. therefore no cachline misses in the data-path. + */ + ip4_fib_mtrie_16_ply_t root_ply; } ip4_fib_mtrie_t; -void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m); +/** + * @brief Initialise an mtrie + */ +void ip4_mtrie_init (ip4_fib_mtrie_t * m); -struct ip4_fib_t; +/** + * @brief Free an mtrie, It must be emty when free'd + */ +void ip4_mtrie_free (ip4_fib_mtrie_t * m); -void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f, - ip4_address_t dst_address, - u32 dst_address_length, - u32 adj_index, u32 is_del); +/** + * @brief Add a route/rntry to the mtrie + */ +void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, u32 adj_index); +/** + * @brief remove a route/rntry to the mtrie + */ +void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, + u32 adj_index, + u32 cover_address_length, u32 cover_adj_index); +/** + * @brief Format/display the contents of the mtrie + */ format_function_t format_ip4_fib_mtrie; +/** + * @brief A global pool of 8bit stride plys + */ +extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool; + +/** + * Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index) + */ always_inline u32 ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n) { return n & 1; } +/** + * From the stored slot value extract the LB index value + */ always_inline u32 ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) { @@ -125,35 +190,38 @@ ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) return n >> 1; } -/* Lookup step. Processes 1 byte of 4 byte ip4 address. */ +/** + * @brief Lookup step. Processes 1 byte of 4 byte ip4 address. + */ always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t current_leaf, const ip4_address_t * dst_address, u32 dst_address_byte_index) { - ip4_fib_mtrie_ply_t *ply; + ip4_fib_mtrie_8_ply_t *ply; + uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf); if (!current_is_terminal) { - ply = m->ply_pool + (current_leaf >> 1); + ply = ip4_ply_pool + (current_leaf >> 1); return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]); } return current_leaf; } -/* Lookup step. Processes 1 byte of 4 byte ip4 address. */ +/** + * @brief Lookup step number 1. Processes 2 bytes of 4 byte ip4 address. + */ always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m, const ip4_address_t * dst_address) { ip4_fib_mtrie_leaf_t next_leaf; - ip4_fib_mtrie_ply_t *ply; - ply = m->ply_pool; - next_leaf = ply->leaves[dst_address->as_u8[0]]; + next_leaf = m->root_ply.leaves[dst_address->as_u16[0]]; return next_leaf; } diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index b2c1fcd4..1ff9fbdb 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -52,6 +52,7 @@ typedef union u32 data_u32; /* Aliases. */ u8 as_u8[4]; + u16 as_u16[2]; u32 as_u32; } ip4_address_t; diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c index 7c2b7be8..6831066e 100644 --- a/src/vnet/ip/ip4_source_check.c +++ b/src/vnet/ip/ip4_source_check.c @@ -165,11 +165,6 @@ ip4_source_check_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); leaf1 = @@ -248,9 +243,6 @@ ip4_source_check_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index 8fa9a479..bf7ec7d5 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -153,6 +153,9 @@ typedef struct ip6_main_t /* Pool of FIBs. */ struct fib_table_t_ *fibs; + /* Pool of V6 FIBs. */ + ip6_fib_t *v6_fibs; + /** Vector of MFIBs. */ struct mfib_table_t_ *mfibs; diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index e3a1fee8..b9f1782b 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -240,6 +240,21 @@ send_ip_fib_details (vpe_api_main_t * am, vl_msg_api_send_shmem (q, (u8 *) & mp); } +typedef struct vl_api_ip_fib_dump_walk_ctx_t_ +{ + fib_node_index_t *feis; +} vl_api_ip_fib_dump_walk_ctx_t; + +static int +vl_api_ip_fib_dump_walk (fib_node_index_t fei, void *arg) +{ + vl_api_ip_fib_dump_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->feis, fei); + + return (1); +} + static void vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) { @@ -247,12 +262,13 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) unix_shared_memory_queue_t *q; ip4_main_t *im = &ip4_main; fib_table_t *fib_table; - fib_node_index_t lfei, *lfeip, *lfeis = NULL; - mpls_label_t key; + fib_node_index_t *lfeip; fib_prefix_t pfx; u32 fib_index; fib_route_path_encode_t *api_rpaths; - int i; + vl_api_ip_fib_dump_walk_ctx_t ctx = { + .feis = NULL, + }; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -261,19 +277,16 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) /* *INDENT-OFF* */ pool_foreach (fib_table, im->fibs, ({ - for (i = 0; i < ARRAY_LEN (fib_table->v4.fib_entry_by_dst_address); i++) - { - hash_foreach(key, lfei, fib_table->v4.fib_entry_by_dst_address[i], - ({ - vec_add1(lfeis, lfei); - })); - } + fib_table_walk(fib_table->ft_index, + FIB_PROTOCOL_IP4, + vl_api_ip_fib_dump_walk, + &ctx); })); /* *INDENT-ON* */ - vec_sort_with_function (lfeis, fib_entry_cmp_for_sort); + vec_sort_with_function (ctx.feis, fib_entry_cmp_for_sort); - vec_foreach (lfeip, lfeis) + vec_foreach (lfeip, ctx.feis) { fib_entry_get_prefix (*lfeip, &pfx); fib_index = fib_entry_get_fib_index (*lfeip); @@ -286,7 +299,7 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) vec_free (api_rpaths); } - vec_free (lfeis); + vec_free (ctx.feis); } static void @@ -377,10 +390,10 @@ api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q, { vpe_api_main_t *am = &vpe_api_main; ip6_main_t *im6 = &ip6_main; - ip6_fib_t *fib = &fib_table->v6; fib_node_index_t *fib_entry_index; api_ip6_fib_show_ctx_t ctx = { - .fib_index = fib->index,.entries = NULL, + .fib_index = fib_table->ft_index, + .entries = NULL, }; fib_route_path_encode_t *api_rpaths; fib_prefix_t pfx; diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c index f631dc76..a085aaa2 100644 --- a/src/vnet/mpls/interface.c +++ b/src/vnet/mpls/interface.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h index 300f2cfd..b0125e60 100644 --- a/src/vnet/mpls/mpls.h +++ b/src/vnet/mpls/mpls.h @@ -30,29 +30,6 @@ typedef enum { MPLS_N_ERROR, } mpls_error_t; -#define MPLS_FIB_DEFAULT_TABLE_ID 0 - -/** - * Type exposure is to allow the DP fast/inlined access - */ -#define MPLS_FIB_KEY_SIZE 21 -#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) - -typedef struct mpls_fib_t_ -{ - /** - * A hash table of entries. 21 bit key - * Hash table for reduced memory footprint - */ - uword * mf_entries; - - /** - * The load-balance indeices keyed by 21 bit label+eos bit. - * A flat array for maximum lookup performace. - */ - index_t mf_lbs[MPLS_FIB_DB_SIZE]; -} mpls_fib_t; - /** * @brief Definition of a callback for receiving MPLS interface state change * notifications @@ -67,6 +44,9 @@ typedef struct { /** A pool of all the MPLS FIBs */ struct fib_table_t_ *fibs; + /** A pool of all the MPLS FIBs */ + struct mpls_fib_t_ *mpls_fibs; + /** A hash table to lookup the mpls_fib by table ID */ uword *fib_index_by_table_id; diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index a36a5046..f1aef6c9 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -369,6 +370,21 @@ send_mpls_fib_details (vpe_api_main_t * am, vl_msg_api_send_shmem (q, (u8 *) & mp); } +typedef struct vl_api_mpls_fib_dump_table_walk_ctx_t_ +{ + fib_node_index_t *lfeis; +} vl_api_mpls_fib_dump_table_walk_ctx_t; + +static int +vl_api_mpls_fib_dump_table_walk (fib_node_index_t fei, void *arg) +{ + vl_api_mpls_fib_dump_table_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->lfeis, fei); + + return (1); +} + static void vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) { @@ -376,28 +392,30 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) unix_shared_memory_queue_t *q; mpls_main_t *mm = &mpls_main; fib_table_t *fib_table; - fib_node_index_t lfei, *lfeip, *lfeis = NULL; - mpls_label_t key; + mpls_fib_t *mpls_fib; + fib_node_index_t *lfeip = NULL; fib_prefix_t pfx; u32 fib_index; fib_route_path_encode_t *api_rpaths; + vl_api_mpls_fib_dump_table_walk_ctx_t ctx = { + .lfeis = NULL, + }; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) return; /* *INDENT-OFF* */ - pool_foreach (fib_table, mm->fibs, + pool_foreach (mpls_fib, mm->mpls_fibs, ({ - hash_foreach(key, lfei, fib_table->mpls.mf_entries, - ({ - vec_add1(lfeis, lfei); - })); + mpls_fib_table_walk (mpls_fib, + vl_api_mpls_fib_dump_table_walk, + &ctx); })); /* *INDENT-ON* */ - vec_sort_with_function (lfeis, fib_entry_cmp_for_sort); + vec_sort_with_function (ctx.lfeis, fib_entry_cmp_for_sort); - vec_foreach (lfeip, lfeis) + vec_foreach (lfeip, ctx.lfeis) { fib_entry_get_prefix (*lfeip, &pfx); fib_index = fib_entry_get_fib_index (*lfeip); @@ -410,7 +428,7 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) vec_free (api_rpaths); } - vec_free (lfeis); + vec_free (ctx.lfeis); } /* diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 14ccd864..09ae8b8f 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -896,9 +896,10 @@ ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp) /* *INDENT-OFF* */ pool_foreach (fib_table, im4->fibs, ({ - fib = &fib_table->v4; vnet_sw_interface_t * si; + fib = pool_elt_at_index (im4->v4_fibs, fib_table->ft_index); + if (fib->table_id != target_fib_id) continue; @@ -964,7 +965,8 @@ ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp) pool_foreach (fib_table, im6->fibs, ({ vnet_sw_interface_t * si; - fib = &(fib_table->v6); + + fib = pool_elt_at_index (im6->v6_fibs, fib_table->ft_index); if (fib->table_id != target_fib_id) continue; diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index 1927da0b..042d02e2 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #define STATS_DEBUG 0 @@ -576,6 +577,7 @@ do_ip4_fibs (stats_main_t * sm) static ip4_route_t *routes; ip4_route_t *r; fib_table_t *fib; + ip4_fib_t *v4_fib; ip_lookup_main_t *lm = &im4->lookup_main; static uword *results; vl_api_vnet_ip4_fib_counters_t *mp = 0; @@ -592,6 +594,8 @@ again: while ((fib - im4->fibs) < start_at_fib_index) continue; + v4_fib = pool_elt_at_index (im4->v4_fibs, fib->ft_index); + if (mp == 0) { items_this_message = IP4_FIB_COUNTER_BATCH_SIZE; @@ -615,9 +619,9 @@ again: vec_reset_length (routes); vec_reset_length (results); - for (i = 0; i < ARRAY_LEN (fib->v4.fib_entry_by_dst_address); i++) + for (i = 0; i < ARRAY_LEN (v4_fib->fib_entry_by_dst_address); i++) { - uword *hash = fib->v4.fib_entry_by_dst_address[i]; + uword *hash = v4_fib->fib_entry_by_dst_address[i]; hash_pair_t *p; ip4_route_t x; -- cgit 1.2.3-korg From f060930e7f71bde107dc8f07c8693e2b3df99c46 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 11 Apr 2017 09:13:39 -0700 Subject: MTRIE coverity fixes Change-Id: If98355bebe823f45b11b0908a8d7700ab273a6db Signed-off-by: Neale Ranns --- src/vnet/ip/ip4_mtrie.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/vnet/ip/ip4_mtrie.c') diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index adc95125..e1987d56 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -271,7 +271,7 @@ set_leaf (ip4_fib_mtrie_t * m, old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index); - ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); + ASSERT (a->dst_address_length <= 32); ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); /* how many bits of the destination address are in the next PLY */ @@ -284,7 +284,8 @@ set_leaf (ip4_fib_mtrie_t * m, if (n_dst_bits_next_plies <= 0) { /* The mask length of the address to insert maps to this ply */ - uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + uword i, old_leaf_is_terminal; + u32 n_dst_bits_this_ply; /* The number of bits, and hence slots/buckets, we will fill */ n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies); @@ -401,7 +402,7 @@ set_root_leaf (ip4_fib_mtrie_t * m, old_ply = &m->root_ply; - ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); + ASSERT (a->dst_address_length <= 32); /* how many bits of the destination address are in the next PLY */ n_dst_bits_next_plies = a->dst_address_length - BITS (u16); @@ -412,7 +413,8 @@ set_root_leaf (ip4_fib_mtrie_t * m, if (n_dst_bits_next_plies <= 0) { /* The mask length of the address to insert maps to this ply */ - uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + uword i, old_leaf_is_terminal; + u32 n_dst_bits_this_ply; /* The number of bits, and hence slots/buckets, we will fill */ n_dst_bits_this_ply = 16 - a->dst_address_length; @@ -515,7 +517,7 @@ unset_leaf (ip4_fib_mtrie_t * m, i32 i, n_dst_bits_this_ply, old_leaf_is_terminal; u8 dst_byte; - ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); + ASSERT (a->dst_address_length <= 32); ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); n_dst_bits_next_plies = @@ -588,7 +590,7 @@ unset_root_leaf (ip4_fib_mtrie_t * m, u16 dst_byte; ip4_fib_mtrie_16_ply_t *old_ply; - ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); + ASSERT (a->dst_address_length <= 32); old_ply = &m->root_ply; n_dst_bits_next_plies = a->dst_address_length - BITS (u16); -- cgit 1.2.3-korg From 6ff05499ab14d1dd5979a210847612fa2b293cf4 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 6 Jun 2017 06:52:14 -0700 Subject: Fix coverity error in IP4 Mtrie. Change-Id: I586a758a8b4b0ea5ca030b2df2796f5acb49e154 Signed-off-by: Neale Ranns --- src/vnet/ip/ip4_mtrie.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/vnet/ip/ip4_mtrie.c') diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index e1987d56..cc82384d 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -284,8 +284,8 @@ set_leaf (ip4_fib_mtrie_t * m, if (n_dst_bits_next_plies <= 0) { /* The mask length of the address to insert maps to this ply */ - uword i, old_leaf_is_terminal; - u32 n_dst_bits_this_ply; + uword old_leaf_is_terminal; + u32 i, n_dst_bits_this_ply; /* The number of bits, and hence slots/buckets, we will fill */ n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies); @@ -413,8 +413,8 @@ set_root_leaf (ip4_fib_mtrie_t * m, if (n_dst_bits_next_plies <= 0) { /* The mask length of the address to insert maps to this ply */ - uword i, old_leaf_is_terminal; - u32 n_dst_bits_this_ply; + uword old_leaf_is_terminal; + u32 i, n_dst_bits_this_ply; /* The number of bits, and hence slots/buckets, we will fill */ n_dst_bits_this_ply = 16 - a->dst_address_length; -- cgit 1.2.3-korg