aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/ip/ip4_mtrie.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/ip/ip4_mtrie.c')
-rw-r--r--src/vnet/ip/ip4_mtrie.c568
1 files changed, 568 insertions, 0 deletions
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
new file mode 100644
index 00000000..6e3d0e80
--- /dev/null
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -0,0 +1,568 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_fib.h: ip4 mtrie fib
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_entry.h>
+
+static void
+ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init,
+ uword prefix_len)
+{
+ p->n_non_empty_leafs =
+ ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves);
+ memset (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+
+ /* Initialize leaves. */
+#ifdef CLIB_HAVE_VEC128
+ {
+ u32x4 *l, init_x4;
+
+#ifndef __ALTIVEC__
+ init_x4 = u32x4_splat (init);
+#else
+ {
+ u32x4_union_t y;
+ y.as_u32[0] = init;
+ y.as_u32[1] = init;
+ y.as_u32[2] = init;
+ y.as_u32[3] = init;
+ init_x4 = y.as_u32x4;
+ }
+#endif
+
+ for (l = p->leaves_as_u32x4;
+ l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4)
+ {
+ l[0] = init_x4;
+ l[1] = init_x4;
+ l[2] = init_x4;
+ l[3] = init_x4;
+ }
+ }
+#else
+ {
+ u32 *l;
+
+ for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)
+ {
+ l[0] = init;
+ l[1] = init;
+ l[2] = init;
+ l[3] = init;
+ }
+ }
+#endif
+}
+
+static ip4_fib_mtrie_leaf_t
+ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf,
+ uword prefix_len)
+{
+ ip4_fib_mtrie_ply_t *p;
+
+ /* Get cache aligned ply. */
+ pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
+
+ ply_init (p, init_leaf, prefix_len);
+ return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
+}
+
+always_inline ip4_fib_mtrie_ply_t *
+get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
+{
+ uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
+ /* It better not be the root ply. */
+ ASSERT (n != 0);
+ return pool_elt_at_index (m->ply_pool, n);
+}
+
+static void
+ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+{
+ uword i, is_root;
+
+ is_root = p - m->ply_pool == 0;
+
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ ply_free (m, get_next_ply_for_leaf (m, l));
+ }
+
+ if (is_root)
+ ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0);
+ else
+ pool_put (m->ply_pool, p);
+}
+
+void
+ip4_fib_free (ip4_fib_mtrie_t * m)
+{
+ ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0);
+ ply_free (m, root_ply);
+}
+
+u32
+ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst)
+{
+ ip4_fib_mtrie_ply_t *p = pool_elt_at_index (m->ply_pool, 0);
+ ip4_fib_mtrie_leaf_t l;
+
+ l = p->leaves[dst.as_u8[0]];
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+
+ p = get_next_ply_for_leaf (m, l);
+ l = p->leaves[dst.as_u8[1]];
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+
+ p = get_next_ply_for_leaf (m, l);
+ l = p->leaves[dst.as_u8[2]];
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+
+ p = get_next_ply_for_leaf (m, l);
+ l = p->leaves[dst.as_u8[3]];
+
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (l));
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+}
+
+typedef struct
+{
+ ip4_address_t dst_address;
+ u32 dst_address_length;
+ u32 adj_index;
+} ip4_fib_mtrie_set_unset_leaf_args_t;
+
+static void
+set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_ply_t * ply,
+ ip4_fib_mtrie_leaf_t new_leaf,
+ uword new_leaf_dst_address_bits)
+{
+ ip4_fib_mtrie_leaf_t old_leaf;
+ uword i;
+
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
+ ASSERT (!ip4_fib_mtrie_leaf_is_empty (new_leaf));
+
+ for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
+ {
+ old_leaf = ply->leaves[i];
+
+ /* Recurse into sub plies. */
+ if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
+ new_leaf_dst_address_bits);
+ }
+
+ /* Replace less specific terminal leaves with new leaf. */
+ else if (new_leaf_dst_address_bits >=
+ ply->dst_address_bits_of_leaves[i])
+ {
+ __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
+ ASSERT (ply->leaves[i] == new_leaf);
+ ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
+ ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf);
+ }
+ }
+}
+
+static void
+set_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ u32 old_ply_index, u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+ i32 n_dst_bits_next_plies;
+ u8 dst_byte;
+
+ ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ n_dst_bits_next_plies =
+ a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+
+ /* Number of bits next plies <= 0 => insert leaves this ply. */
+ if (n_dst_bits_next_plies <= 0)
+ {
+ uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
+
+ n_dst_bits_this_ply = -n_dst_bits_next_plies;
+ ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
+ pow2_mask (n_dst_bits_this_ply)) == 0);
+
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ ip4_fib_mtrie_ply_t *old_ply, *new_ply;
+
+ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ /* Is leaf to be inserted more specific? */
+ if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
+ {
+ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ if (old_leaf_is_terminal)
+ {
+ old_ply->dst_address_bits_of_leaves[i] =
+ a->dst_address_length;
+ __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[i] == new_leaf);
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_empty (old_leaf);
+ ASSERT (old_ply->n_non_empty_leafs <=
+ ARRAY_LEN (old_ply->leaves));
+ }
+ else
+ {
+ /* Existing leaf points to another ply. We need to place new_leaf into all
+ more specific slots. */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ a->dst_address_length);
+ }
+ }
+
+ else if (!old_leaf_is_terminal)
+ {
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_leaf (m, a, new_ply - m->ply_pool,
+ dst_address_byte_index + 1);
+ }
+ }
+ }
+ else
+ {
+ ip4_fib_mtrie_ply_t *old_ply, *new_ply;
+
+ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+ old_leaf = old_ply->leaves[dst_byte];
+ if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ new_leaf =
+ ply_create (m, old_leaf,
+ old_ply->dst_address_bits_of_leaves[dst_byte]);
+ new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+ /* Refetch since ply_create may move pool. */
+ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+
+ __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[dst_byte] == new_leaf);
+ old_ply->dst_address_bits_of_leaves[dst_byte] = 0;
+
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_leaf);
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+
+ /* Account for the ply we just created. */
+ old_ply->n_non_empty_leafs += 1;
+ }
+ else
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+ set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
+ }
+}
+
+static uword
+unset_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ i32 n_dst_bits_next_plies;
+ i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
+ u8 dst_byte;
+
+ ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ n_dst_bits_next_plies =
+ a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+ if (n_dst_bits_next_plies < 0)
+ dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
+
+ n_dst_bits_this_ply =
+ n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
+ n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
+
+ del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (old_leaf == del_leaf
+ || (!old_leaf_is_terminal
+ && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
+ dst_address_byte_index + 1)))
+ {
+ old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY;
+ old_ply->dst_address_bits_of_leaves[i] = 0;
+
+ /* No matter what we just deleted a non-empty leaf. */
+ ASSERT (!ip4_fib_mtrie_leaf_is_empty (old_leaf));
+ old_ply->n_non_empty_leafs -= 1;
+
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+ if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
+ {
+ pool_put (m->ply_pool, old_ply);
+ /* Old ply was deleted. */
+ return 1;
+ }
+ }
+ }
+
+ /* Old ply was not deleted. */
+ return 0;
+}
+
+void
+ip4_mtrie_init (ip4_fib_mtrie_t * m)
+{
+ ip4_fib_mtrie_leaf_t root;
+ memset (m, 0, sizeof (m[0]));
+ m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
+ root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */
+ 0);
+ ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
+}
+
+void
+ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
+ ip4_address_t dst_address,
+ u32 dst_address_length,
+ u32 adj_index, u32 is_del)
+{
+ ip4_fib_mtrie_t *m = &fib->mtrie;
+ ip4_fib_mtrie_ply_t *root_ply;
+ ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
+
+ ASSERT (m->ply_pool != 0);
+
+ root_ply = pool_elt_at_index (m->ply_pool, 0);
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ dst_address.as_u32 &= im->fib_masks[dst_address_length];
+ a.dst_address = dst_address;
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+
+ if (!is_del)
+ {
+ if (dst_address_length == 0)
+ m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index);
+ else
+ set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
+ }
+ else
+ {
+ if (dst_address_length == 0)
+ m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
+
+ else
+ {
+ ip4_main_t *im = &ip4_main;
+ uword i;
+
+ unset_leaf (m, &a, root_ply, 0);
+
+ /* Find next less specific route and insert into mtrie. */
+ for (i = dst_address_length - 1; i >= 1; i--)
+ {
+ uword *p;
+ index_t lbi;
+ ip4_address_t key;
+
+ if (!fib->fib_entry_by_dst_address[i])
+ continue;
+
+ key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
+ p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32);
+ if (p)
+ {
+ lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index;
+ if (INDEX_INVALID == lbi)
+ continue;
+
+ a.dst_address = key;
+ a.adj_index = lbi;
+ a.dst_address_length = i;
+
+ set_leaf (m, &a, /* ply_index */ 0,
+ /* dst_address_byte_index */ 0);
+ break;
+ }
+ }
+ }
+ }
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword
+mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+{
+ uword bytes, i;
+
+ if (!p)
+ {
+ if (pool_is_free_index (m->ply_pool, 0))
+ return 0;
+ p = pool_elt_at_index (m->ply_pool, 0);
+ }
+
+ bytes = sizeof (p[0]);
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l));
+ }
+
+ return bytes;
+}
+
+static u8 *
+format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
+
+ if (ip4_fib_mtrie_leaf_is_empty (l))
+ s = format (s, "miss");
+ else if (ip4_fib_mtrie_leaf_is_terminal (l))
+ s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l));
+ else
+ s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
+ return s;
+}
+
+static u8 *
+format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+ u32 base_address = va_arg (*va, u32);
+ u32 ply_index = va_arg (*va, u32);
+ u32 dst_address_byte_index = va_arg (*va, u32);
+ ip4_fib_mtrie_ply_t *p;
+ uword i, indent;
+
+ p = pool_elt_at_index (m->ply_pool, ply_index);
+ indent = format_get_indent (s);
+ s =
+ format (s, "ply index %d, %d non-empty leaves", ply_index,
+ p->n_non_empty_leafs);
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+
+ if (!ip4_fib_mtrie_leaf_is_empty (l))
+ {
+ u32 a, ia_length;
+ ip4_address_t ia;
+
+ a = base_address + (i << (24 - 8 * dst_address_byte_index));
+ ia.as_u32 = clib_host_to_net_u32 (a);
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ ia_length = p->dst_address_bits_of_leaves[i];
+ else
+ ia_length = 8 * (1 + dst_address_byte_index);
+ s = format (s, "\n%U%20U %U",
+ format_white_space, indent + 2,
+ format_ip4_address_and_length, &ia, ia_length,
+ format_ip4_fib_mtrie_leaf, l);
+
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ s = format (s, "\n%U%U",
+ format_white_space, indent + 2,
+ format_ip4_fib_mtrie_ply, m, a,
+ ip4_fib_mtrie_leaf_get_next_ply_index (l),
+ dst_address_byte_index + 1);
+ }
+ }
+
+ return s;
+}
+
+u8 *
+format_ip4_fib_mtrie (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+
+ s = format (s, "%d plies, memory usage %U",
+ pool_elts (m->ply_pool),
+ format_memory_size, mtrie_memory_usage (m, 0));
+
+ if (pool_elts (m->ply_pool) > 0)
+ {
+ ip4_address_t base_address;
+ base_address.as_u32 = 0;
+ s =
+ format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0);
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */