summaryrefslogtreecommitdiffstats
path: root/src/vnet/adj
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/adj')
-rw-r--r--src/vnet/adj/adj.c454
-rw-r--r--src/vnet/adj/adj.h122
-rw-r--r--src/vnet/adj/adj_glean.c285
-rw-r--r--src/vnet/adj/adj_glean.h61
-rw-r--r--src/vnet/adj/adj_internal.h104
-rw-r--r--src/vnet/adj/adj_l2.c194
-rw-r--r--src/vnet/adj/adj_l2.h24
-rw-r--r--src/vnet/adj/adj_midchain.c559
-rw-r--r--src/vnet/adj/adj_midchain.h102
-rw-r--r--src/vnet/adj/adj_nbr.c1087
-rw-r--r--src/vnet/adj/adj_nbr.h176
-rw-r--r--src/vnet/adj/adj_rewrite.c53
-rw-r--r--src/vnet/adj/adj_rewrite.h49
-rw-r--r--src/vnet/adj/adj_types.h53
14 files changed, 3323 insertions, 0 deletions
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
new file mode 100644
index 00000000000..e740c4cb79b
--- /dev/null
+++ b/src/vnet/adj/adj.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/adj/adj_glean.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/fib/fib_node_list.h>
+
+/*
+ * Special Adj with index zero. we need to define this since the v4 mtrie
+ * assumes an index of 0 implies the ply is empty. therefore all 'real'
+ * adjs need a non-zero index.
+ */
+static ip_adjacency_t *special_v4_miss_adj_with_index_zero;
+
+/* Adjacency packet/byte counters indexed by adjacency index. */
+vlib_combined_counter_main_t adjacency_counters;
+
+/*
+ * the single adj pool
+ */
+ip_adjacency_t *adj_pool;
+
+always_inline void
+adj_poison (ip_adjacency_t * adj)
+{
+ if (CLIB_DEBUG > 0)
+ {
+ memset (adj, 0xfe, sizeof (adj[0]));
+ }
+}
+
+ip_adjacency_t *
+adj_alloc (fib_protocol_t proto)
+{
+ ip_adjacency_t *adj;
+
+ pool_get(adj_pool, adj);
+
+ adj_poison(adj);
+
+ /* Make sure certain fields are always initialized. */
+ /* Validate adjacency counters. */
+ vlib_validate_combined_counter(&adjacency_counters,
+ adj_get_index(adj));
+
+ adj->rewrite_header.sw_if_index = ~0;
+ adj->mcast_group_index = ~0;
+ adj->saved_lookup_next_index = 0;
+ adj->n_adj = 1;
+ adj->lookup_next_index = 0;
+
+ fib_node_init(&adj->ia_node,
+ FIB_NODE_TYPE_ADJ);
+ adj->ia_nh_proto = proto;
+ adj->ia_flags = 0;
+
+ ip4_main.lookup_main.adjacency_heap = adj_pool;
+ ip6_main.lookup_main.adjacency_heap = adj_pool;
+
+ return (adj);
+}
+
+static int
+adj_index_is_special (adj_index_t adj_index)
+{
+ if (ADJ_INDEX_INVALID == adj_index)
+ return (!0);
+
+ return (0);
+}
+
+/**
+ * @brief Pretty print helper function for formatting specific adjacencies.
+ * @param s - input string to format
+ * @param args - other args passed to format function such as:
+ * - vnet_main_t
+ * - ip_lookup_main_t
+ * - adj_index
+ */
+u8 *
+format_ip_adjacency (u8 * s, va_list * args)
+{
+ format_ip_adjacency_flags_t fiaf;
+ ip_adjacency_t * adj;
+ u32 adj_index;
+
+ adj_index = va_arg (*args, u32);
+ fiaf = va_arg (*args, format_ip_adjacency_flags_t);
+ adj = adj_get(adj_index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "%U", format_adj_nbr, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_ARP:
+ s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ s = format (s, "%U", format_adj_glean, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ s = format (s, "%U", format_adj_midchain, adj_index, 2);
+ break;
+ default:
+ break;
+ }
+
+ if (fiaf & FORMAT_IP_ADJACENCY_DETAIL)
+ {
+ s = format (s, "\n locks:%d", adj->ia_node.fn_locks);
+ s = format (s, " node:[%d]:%U",
+ adj->rewrite_header.node_index,
+ format_vlib_node_name, vlib_get_main(),
+ adj->rewrite_header.node_index);
+ s = format (s, " next:[%d]:%U",
+ adj->rewrite_header.next_index,
+ format_vlib_next_node_name,
+ vlib_get_main(),
+ adj->rewrite_header.node_index,
+ adj->rewrite_header.next_index);
+ s = format(s, "\n children:\n ");
+ s = fib_node_children_format(adj->ia_node.fn_children, s);
+ }
+
+ return s;
+}
+
+/*
+ * adj_last_lock_gone
+ *
+ * last lock/reference to the adj has gone, we no longer need it.
+ */
+static void
+adj_last_lock_gone (ip_adjacency_t *adj)
+{
+ vlib_main_t * vm = vlib_get_main();
+
+ ASSERT(0 == fib_node_list_get_size(adj->ia_node.fn_children));
+ ADJ_DBG(adj, "last-lock-gone");
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ dpo_reset(&adj->sub_type.midchain.next_dpo);
+ /* FALL THROUGH */
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_REWRITE:
+ /*
+ * complete and incomplete nbr adjs
+ */
+ adj_nbr_remove(adj_get_index(adj),
+ adj->ia_nh_proto,
+ adj->ia_link,
+ &adj->sub_type.nbr.next_hop,
+ adj->rewrite_header.sw_if_index);
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ adj_glean_remove(adj->ia_nh_proto,
+ adj->rewrite_header.sw_if_index);
+ break;
+ default:
+ /*
+ * type not stored in any DB from which we need to remove it
+ */
+ break;
+ }
+
+ vlib_worker_thread_barrier_release(vm);
+
+ fib_node_deinit(&adj->ia_node);
+ pool_put(adj_pool, adj);
+}
+
+void
+adj_lock (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ adj = adj_get(adj_index);
+ ASSERT(adj);
+
+ ADJ_DBG(adj, "lock");
+ fib_node_lock(&adj->ia_node);
+}
+
+void
+adj_unlock (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ adj = adj_get(adj_index);
+ ASSERT(adj);
+
+ ADJ_DBG(adj, "unlock");
+ ASSERT(adj);
+
+ fib_node_unlock(&adj->ia_node);
+}
+
+u32
+adj_child_add (adj_index_t adj_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index)
+{
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+ if (adj_index_is_special(adj_index))
+ {
+ return (~0);
+ }
+
+ return (fib_node_child_add(FIB_NODE_TYPE_ADJ,
+ adj_index,
+ child_type,
+ child_index));
+}
+
+void
+adj_child_remove (adj_index_t adj_index,
+ u32 sibling_index)
+{
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ fib_node_child_remove(FIB_NODE_TYPE_ADJ,
+ adj_index,
+ sibling_index);
+}
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+vnet_link_t
+adj_get_link_type (adj_index_t ai)
+{
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ return (adj->ia_link);
+}
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+u32
+adj_get_sw_if_index (adj_index_t ai)
+{
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ return (adj->rewrite_header.sw_if_index);
+}
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+const u8*
+adj_get_rewrite (adj_index_t ai)
+{
+ vnet_rewrite_header_t *rw;
+ ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+ rw = &adj->rewrite_header;
+
+ ASSERT (rw->data_bytes != 0xfefe);
+
+ return (rw->data - rw->data_bytes);
+}
+
+static fib_node_t *
+adj_get_node (fib_node_index_t index)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_get(index);
+
+ return (&adj->ia_node);
+}
+
+#define ADJ_FROM_NODE(_node) \
+ ((ip_adjacency_t*)((char*)_node - STRUCT_OFFSET_OF(ip_adjacency_t, ia_node)))
+
+static void
+adj_node_last_lock_gone (fib_node_t *node)
+{
+ adj_last_lock_gone(ADJ_FROM_NODE(node));
+}
+
+static fib_node_back_walk_rc_t
+adj_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ /*
+ * Que pasa. yo soj en el final!
+ */
+ ASSERT(0);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * Adjacency's graph node virtual function table
+ */
+static const fib_node_vft_t adj_vft = {
+ .fnv_get = adj_get_node,
+ .fnv_last_lock = adj_node_last_lock_gone,
+ .fnv_back_walk = adj_back_walk_notify,
+};
+
+static clib_error_t *
+adj_module_init (vlib_main_t * vm)
+{
+ fib_node_register_type(FIB_NODE_TYPE_ADJ, &adj_vft);
+
+ adj_nbr_module_init();
+ adj_glean_module_init();
+ adj_midchain_module_init();
+
+ /*
+ * one special adj to reserve index 0
+ */
+ special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (adj_module_init);
+
+static clib_error_t *
+adj_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ adj_index_t ai = ADJ_INDEX_INVALID;
+ u32 sw_if_index = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ai))
+ ;
+ else if (unformat (input, "%U",
+ unformat_vnet_sw_interface, vnet_get_main(),
+ &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (ADJ_INDEX_INVALID != ai)
+ {
+ if (pool_is_free_index(adj_pool, ai))
+ {
+ vlib_cli_output (vm, "adjacency %d invalid", ai);
+ return 0;
+ }
+
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_DETAIL);
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach_index(ai, adj_pool,
+ ({
+ if (~0 != sw_if_index &&
+ sw_if_index != adj_get_sw_if_index(ai))
+ {
+ }
+ else
+ {
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_NONE);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+/*?
+ * Show all adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj}
+ * [@0]
+ * [@1] glean: loop0
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (adj_show_command, static) = {
+ .path = "show adj",
+ .short_help = "show adj [<adj_index>] [interface]",
+ .function = adj_show,
+};
+
+/*
+ * DEPRECATED: DO NOT USE
+ */
+ip_adjacency_t *
+ip_add_adjacency (ip_lookup_main_t * lm,
+ ip_adjacency_t * copy_adj,
+ u32 n_adj,
+ u32 * adj_index_return)
+{
+ ip_adjacency_t * adj;
+
+ ASSERT(1==n_adj);
+
+ adj = adj_alloc(FIB_PROTOCOL_IP4);
+
+ if (copy_adj)
+ *adj = *copy_adj;
+
+ *adj_index_return = adj_get_index(adj);
+ return adj;
+}
diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h
new file mode 100644
index 00000000000..e85625db7ee
--- /dev/null
+++ b/src/vnet/adj/adj.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * An adjacency is a representation of an attached L3 peer.
+ *
+ * Adjacency Sub-types:
+ * - neighbour: a representation of an attached L3 peer.
+ * Key:{addr,interface,link/ether-type}
+ * SHARED
+ * - glean: used to drive ARP/ND for packets destined to a local sub-net.
+ * 'glean' mean use the packet's destination address as the target
+ * address in the ARP packet.
+ * UNSHARED. Only one per-interface.
+ * - midchain: a nighbour adj on a virtual/tunnel interface.
+ * - rewrite: an adj with no key, but with a rewrite string.
+ *
+ * The API to create and update the adjacency is very sub-type specific. This
+ * is intentional as it encourages the user to carefully consider which adjacency
+ * sub-type they are really using, and hence assign it data in the appropriate
+ * sub-type space in the union of sub-types. This prevents the adj becoming a
+ * disorganised dumping group for 'my features needs a u16 somewhere' data. It
+ * is important to enforce this approach as space in the adjacency is a premium,
+ * as we need it to fit in 1 cache line.
+ *
+ * the API is also based around an index to an ajdacency not a raw pointer. This
+ * is so the user doesn't suffer the same limp inducing firearm injuries that
+ * the author suffered as the adjacenices can realloc.
+ */
+
+#ifndef __ADJ_H__
+#define __ADJ_H__
+
+#include <vnet/ip/lookup.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_rewrite.h>
+#include <vnet/adj/adj_glean.h>
+
+/**
+ * @brief
+ * Take a reference counting lock on the adjacency
+ */
+extern void adj_lock(adj_index_t adj_index);
+/**
+ * @brief
+ * Release a reference counting lock on the adjacency
+ */
+extern void adj_unlock(adj_index_t adj_index);
+
+/**
+ * @brief
+ * Add a child dependent to an adjacency. The child will
+ * thus be informed via its registerd back-walk function
+ * when the adjacency state changes.
+ */
+extern u32 adj_child_add(adj_index_t adj_index,
+ fib_node_type_t type,
+ fib_node_index_t child_index);
+/**
+ * @brief
+ * Remove a child dependent
+ */
+extern void adj_child_remove(adj_index_t adj_index,
+ u32 sibling_index);
+
+/**
+ * @brief Walk the Adjacencies on a given interface
+ */
+extern void adj_walk (u32 sw_if_index,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern vnet_link_t adj_get_link_type (adj_index_t ai);
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+extern u32 adj_get_sw_if_index (adj_index_t ai);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern const u8* adj_get_rewrite (adj_index_t ai);
+
+/**
+ * @brief
+ * The global adjacnecy pool. Exposed for fast/inline data-plane access
+ */
+extern ip_adjacency_t *adj_pool;
+
+/**
+ * @brief
+ * Adjacency packet counters
+ */
+extern vlib_combined_counter_main_t adjacency_counters;
+
+/**
+ * @brief
+ * Get a pointer to an adjacency object from its index
+ */
+static inline ip_adjacency_t *
+adj_get (adj_index_t adj_index)
+{
+ return (vec_elt_at_index(adj_pool, adj_index));
+}
+
+#endif
diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c
new file mode 100644
index 00000000000..8d86e2a9f00
--- /dev/null
+++ b/src/vnet/adj/adj_glean.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/fib/fib_walk.h>
+
+/*
+ * The 'DB' of all glean adjs.
+ * There is only one glean per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static adj_index_t *adj_gleans[FIB_PROTOCOL_MAX];
+
+static inline vlib_node_registration_t*
+adj_get_glean_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (&ip4_glean_node);
+ case FIB_PROTOCOL_IP6:
+ return (&ip6_glean_node);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (NULL);
+}
+
+/*
+ * adj_glean_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The glean adj is added to an interface's connected prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+adj_index_t
+adj_glean_add_or_lock (fib_protocol_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr)
+{
+ ip_adjacency_t * adj;
+
+ vec_validate_init_empty(adj_gleans[proto], sw_if_index, ADJ_INDEX_INVALID);
+
+ if (ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ {
+ adj = adj_alloc(proto);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_GLEAN;
+ adj->ia_nh_proto = proto;
+ adj_gleans[proto][sw_if_index] = adj_get_index(adj);
+
+ if (NULL != nh_addr)
+ {
+ adj->sub_type.glean.receive_addr = *nh_addr;
+ }
+
+ adj->rewrite_header.data_bytes = 0;
+
+ vnet_rewrite_for_sw_interface(vnet_get_main(),
+ adj_fib_proto_2_nd(proto),
+ sw_if_index,
+ adj_get_glean_node(proto)->index,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+ }
+ else
+ {
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+ }
+
+ adj_lock(adj_get_index(adj));
+
+ return (adj_get_index(adj));
+}
+
+void
+adj_glean_remove (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ ASSERT(sw_if_index < vec_len(adj_gleans[proto]));
+
+ adj_gleans[proto][sw_if_index] = ADJ_INDEX_INVALID;
+}
+
+static clib_error_t *
+adj_glean_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_gleans[proto]) ||
+ ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_glean_interface_state_change);
+
+/**
+ * @brief Invoked on each SW interface of a HW interface when the
+ * HW interface state changes
+ */
+static void
+adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ void *arg)
+{
+ adj_glean_interface_state_change(vnm, sw_if_index, (uword) arg);
+}
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+adj_glean_hw_interface_state_change (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ /*
+ * walk SW interfaces on the HW
+ */
+ uword sw_flags;
+
+ sw_flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP :
+ 0);
+
+ vnet_hw_interface_walk_sw(vnm, hw_if_index,
+ adj_nbr_hw_sw_interface_state_change,
+ (void*) sw_flags);
+
+ return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+ adj_glean_hw_interface_state_change);
+
+static clib_error_t *
+adj_glean_interface_delete (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_gleans[proto]) ||
+ ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_glean_interface_delete);
+
+u8*
+format_adj_glean (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ return (format(s, "%U-glean: %U",
+ format_fib_protocol, adj->ia_nh_proto,
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ adj->rewrite_header.sw_if_index)));
+}
+
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_glean_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_glean,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a glean
+ * object.
+ *
+ * this means that these graph nodes are ones from which a glean is the
+ * parent object in the DPO-graph.
+ */
+const static char* const glean_ip4_nodes[] =
+{
+ "ip4-glean",
+ NULL,
+};
+const static char* const glean_ip6_nodes[] =
+{
+ "ip6-glean",
+ NULL,
+};
+
+const static char* const * const glean_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = glean_ip4_nodes,
+ [DPO_PROTO_IP6] = glean_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+adj_glean_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_GLEAN, &adj_glean_dpo_vft, glean_nodes);
+}
diff --git a/src/vnet/adj/adj_glean.h b/src/vnet/adj/adj_glean.h
new file mode 100644
index 00000000000..640bd2f91eb
--- /dev/null
+++ b/src/vnet/adj/adj_glean.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief Glean Adjacency
+ *
+ * A gleean adjacency represent the need to discover new peers on an
+ * attached link. Packets that hit a glean adjacency will generate an
+ * ARP/ND packet addessesed to the packet's destination address.
+ * Note this is different to an incomplete neighbour adjacency, which
+ * does not send ARP/ND requests to the packet's destination address,
+ * but instead to the next-hop address of the adjacency itself.
+ */
+
+#ifndef __ADJ_GLEAN_H__
+#define __ADJ_GLEAN_H__
+
+#include <vnet/adj/adj_types.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing glean adjacency
+ *
+ * @param proto
+ * The protocol for the neighbours that we wish to glean
+ *
+ * @param sw_if_index
+ * The interface on which to glean
+ *
+ * @param nh_addr
+ * the address applied to the interface on which to glean. This
+ * as the source address in packets when the ARP/ND packet is sent
+ */
+extern adj_index_t adj_glean_add_or_lock(fib_protocol_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr);
+
+/**
+ * @brief Format/display a glean adjacency.
+ */
+extern u8* format_adj_glean(u8* s, va_list *ap);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_glean_module_init(void);
+
+#endif
diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h
new file mode 100644
index 00000000000..833bc7c9e01
--- /dev/null
+++ b/src/vnet/adj/adj_internal.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_INTERNAL_H__
+#define __ADJ_INTERNAL_H__
+
+#include <vnet/adj/adj.h>
+#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/adj/adj_l2.h>
+
+
+/**
+ * big switch to turn on Adjacency debugging
+ */
+#undef ADJ_DEBUG
+
+/*
+ * Debug macro
+ */
+#ifdef ADJ_DEBUG
+#define ADJ_DBG(_adj, _fmt, _args...) \
+{ \
+ clib_warning("adj:[%d:%p]:" _fmt, \
+ _adj - adj_pool, _adj, \
+ ##_args); \
+}
+#else
+#define ADJ_DBG(_e, _fmt, _args...)
+#endif
+
+static inline u32
+adj_get_rewrite_node (vnet_link_t linkt)
+{
+ switch (linkt) {
+ case VNET_LINK_IP4:
+ return (ip4_rewrite_node.index);
+ case VNET_LINK_IP6:
+ return (ip6_rewrite_node.index);
+ case VNET_LINK_MPLS:
+ return (mpls_output_node.index);
+ case VNET_LINK_ETHERNET:
+ return (adj_l2_rewrite_node.index);
+ case VNET_LINK_ARP:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+static inline vnet_link_t
+adj_fib_proto_2_nd (fib_protocol_t fp)
+{
+ switch (fp)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (VNET_LINK_ARP);
+ case FIB_PROTOCOL_IP6:
+ return (VNET_LINK_IP6);
+ case FIB_PROTOCOL_MPLS:
+ return (VNET_LINK_MPLS);
+ }
+ return (0);
+}
+
+/**
+ * @brief
+ * Get a pointer to an adjacency object from its index
+ */
+static inline adj_index_t
+adj_get_index (ip_adjacency_t *adj)
+{
+ return (adj - adj_pool);
+}
+
+extern void adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
+ ip_lookup_next_t adj_next_index,
+ u32 complete_next_index,
+ u32 next_index,
+ u8 *rewrite);
+
+extern ip_adjacency_t * adj_alloc(fib_protocol_t proto);
+
+extern void adj_nbr_remove(adj_index_t ai,
+ fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index);
+extern void adj_glean_remove(fib_protocol_t proto,
+ u32 sw_if_index);
+
+#endif
diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c
new file mode 100644
index 00000000000..4d2dd7082f1
--- /dev/null
+++ b/src/vnet/adj/adj_l2.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_l2.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+
+/**
+ * @brief Trace data for a L2 Midchain
+ */
+typedef struct adj_l2_trace_t_ {
+ /** Adjacency index taken. */
+ u32 adj_index;
+} adj_l2_trace_t;
+
+static u8 *
+format_adj_l2_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ adj_l2_trace_t * t = va_arg (*args, adj_l2_trace_t *);
+
+ s = format (s, "adj-idx %d : %U",
+ t->adj_index,
+ format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
+ return s;
+}
+
+typedef enum adj_l2_rewrite_next_t_
+{
+ ADJ_L2_REWRITE_NEXT_DROP,
+} adj_l2_rewrite_next_t;
+
+always_inline uword
+adj_l2_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_midchain)
+{
+ u32 * from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, * to_next, next_index;
+ u32 cpu_index = os_get_cpu_number();
+ ethernet_main_t * em = &ethernet_main;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ char *h0;
+ u32 pi0, rw_len0, adj_index0, next0 = 0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ h0 = vlib_buffer_get_current (p0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT(adj_index0);
+
+ adj0 = adj_get (adj_index0);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], h0,
+ sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
+
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ cpu_index, adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0-sizeof(ethernet_header_t));
+
+ /* Check MTU of outgoing interface. */
+ if (PREDICT_TRUE((vlib_buffer_length_in_chain (vm, p0) <=
+ adj0[0].rewrite_header.max_l3_packet_bytes)))
+ {
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ /*
+ * Follow the feature ARC. this will result eventually in
+ * the midchain-tx node
+ */
+ vnet_feature_arc_start(em->output_feature_arc_index, tx_sw_if_index0, &next0, p0);
+ }
+ else
+ {
+ /* can't fragment L2 */
+ next0 = ADJ_L2_REWRITE_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_l2_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+adj_l2_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_l2_rewrite_inline (vm, node, frame, 0);
+}
+
+static uword
+adj_l2_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_l2_rewrite_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (adj_l2_rewrite_node) = {
+ .function = adj_l2_rewrite,
+ .name = "adj-l2-rewrite",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_l2_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_L2_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_l2_rewrite_node, adj_l2_rewrite)
+
+VLIB_REGISTER_NODE (adj_l2_midchain_node) = {
+ .function = adj_l2_midchain,
+ .name = "adj-l2-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_l2_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_L2_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_l2_midchain_node, adj_l2_midchain)
diff --git a/src/vnet/adj/adj_l2.h b/src/vnet/adj/adj_l2.h
new file mode 100644
index 00000000000..3aa1c74b224
--- /dev/null
+++ b/src/vnet/adj/adj_l2.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_L2_H__
+#define __ADJ_L2_H__
+
+#include <vnet/adj/adj.h>
+
+extern vlib_node_registration_t adj_l2_midchain_node;
+extern vlib_node_registration_t adj_l2_rewrite_node;
+
+#endif
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
new file mode 100644
index 00000000000..8c6ab5aa17b
--- /dev/null
+++ b/src/vnet/adj/adj_midchain.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/adj/adj_l2.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/fib/fib_walk.h>
+
+/**
+ * The two midchain tx feature node indices
+ */
+static u32 adj_midchain_tx_feature_node[VNET_LINK_NUM];
+static u32 adj_midchain_tx_no_count_feature_node[VNET_LINK_NUM];
+
+/**
+ * @brief Trace data for packets traversing the midchain tx node
+ */
+typedef struct adj_midchain_tx_trace_t_
+{
+ /**
+ * @brief the midchain adj we are traversing
+ */
+ adj_index_t ai;
+} adj_midchain_tx_trace_t;
+
+always_inline uword
+adj_midchain_tx_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int interface_count)
+{
+ u32 * from, * to_next, n_left_from, n_left_to_next;
+ u32 next_index;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u32 cpu_index = vm->cpu_index;
+
+ /* Vector of buffer / pkt indices we're supposed to process */
+ from = vlib_frame_vector_args (frame);
+
+ /* Number of buffers / pkts */
+ n_left_from = frame->n_vectors;
+
+ /* Speculatively send the first buffer to the last disposition we used */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ /* set up to enqueue to our disposition with index = next_index */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+
+ while (n_left_from >= 4 && n_left_to_next > 2)
+ {
+ u32 bi0, adj_index0, next0;
+ const ip_adjacency_t * adj0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+ u32 bi1, adj_index1, next1;
+ const ip_adjacency_t * adj1;
+ const dpo_id_t *dpo1;
+ vlib_buffer_t * b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ bi1 = from[1];
+ to_next[1] = bi1;
+
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer(vm, bi0);
+ b1 = vlib_get_buffer(vm, bi1);
+
+ /* Follow the DPO on which the midchain is stacked */
+ adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get(adj_index0);
+ adj1 = adj_get(adj_index1);
+
+ dpo0 = &adj0->sub_type.midchain.next_dpo;
+ dpo1 = &adj1->sub_type.midchain.next_dpo;
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (interface_count)
+ {
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ cpu_index,
+ adj0->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ cpu_index,
+ adj1->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b1));
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->ai = adj_index0;
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->ai = adj_index1;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1,
+ next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, adj_index0, next0;
+ const ip_adjacency_t * adj0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer(vm, bi0);
+
+ /* Follow the DPO on which the midchain is stacked */
+ adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get(adj_index0);
+ dpo0 = &adj0->sub_type.midchain.next_dpo;
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (interface_count)
+ {
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ cpu_index,
+ adj0->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->ai = adj_index0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_adj_midchain_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ adj_midchain_tx_trace_t *tr = va_arg (*args, adj_midchain_tx_trace_t*);
+
+ s = format(s, "adj-midchain:[%d]:%U", tr->ai,
+ format_ip_adjacency, tr->ai,
+ FORMAT_IP_ADJACENCY_NONE);
+
+ return (s);
+}
+
+static uword
+adj_midchain_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (adj_midchain_tx_inline(vm, node, frame, 1));
+}
+
+VLIB_REGISTER_NODE (adj_midchain_tx_node, static) = {
+ .function = adj_midchain_tx,
+ .name = "adj-midchain-tx",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_midchain_tx_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+static uword
+adj_midchain_tx_no_count (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (adj_midchain_tx_inline(vm, node, frame, 0));
+}
+
+VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node, static) = {
+ .function = adj_midchain_tx_no_count,
+ .name = "adj-midchain-tx-no-count",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_midchain_tx_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VNET_FEATURE_INIT (adj_midchain_tx_ip4, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP4],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip4, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP4],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_ip6, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP6],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip6, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP6],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_mpls, static) = {
+ .arc_name = "mpls-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_MPLS],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_mpls, static) = {
+ .arc_name = "mpls-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_MPLS],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_ethernet, static) = {
+ .arc_name = "ethernet-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_ETHERNET],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_ethernet, static) = {
+ .arc_name = "ethernet-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_ETHERNET],
+};
+
+static inline u32
+adj_get_midchain_node (vnet_link_t link)
+{
+ switch (link) {
+ case VNET_LINK_IP4:
+ return (ip4_midchain_node.index);
+ case VNET_LINK_IP6:
+ return (ip6_midchain_node.index);
+ case VNET_LINK_MPLS:
+ return (mpls_midchain_node.index);
+ case VNET_LINK_ETHERNET:
+ return (adj_l2_midchain_node.index);
+ case VNET_LINK_ARP:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+static u8
+adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
+{
+ u8 arc = (u8) ~0;
+ switch (adj->ia_link)
+ {
+ case VNET_LINK_IP4:
+ {
+ arc = ip4_main.lookup_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_IP6:
+ {
+ arc = ip6_main.lookup_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_MPLS:
+ {
+ arc = mpls_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_ETHERNET:
+ {
+ arc = ethernet_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_ARP:
+ ASSERT(0);
+ break;
+ }
+
+ ASSERT (arc != (u8) ~0);
+
+ return (arc);
+}
+
+/**
+ * adj_nbr_midchain_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewrite is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_midchain_flag_t flags,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+ u8 arc_index;
+ u32 feature_index;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * one time only update. since we don't support chainging the tunnel
+ * src,dst, this is all we need.
+ */
+ ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP);
+ /*
+ * tunnels can always provide a rewrite.
+ */
+ ASSERT(NULL != rewrite);
+
+ adj->sub_type.midchain.fixup_func = fixup;
+
+ arc_index = adj_midchain_get_feature_arc_index_for_link_type (adj);
+ feature_index = (flags & ADJ_MIDCHAIN_FLAG_NO_COUNT) ?
+ adj_midchain_tx_no_count_feature_node[adj->ia_link] :
+ adj_midchain_tx_feature_node[adj->ia_link];
+
+ adj->sub_type.midchain.tx_function_node = (flags & ADJ_MIDCHAIN_FLAG_NO_COUNT) ?
+ adj_midchain_tx_no_count_node.index :
+ adj_midchain_tx_node.index;
+
+ vnet_feature_enable_disable_with_index (arc_index, feature_index,
+ adj->rewrite_header.sw_if_index,
+ 1 /* enable */, 0, 0);
+
+ /*
+ * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
+ * The graph arc used/created here is from the midchain-tx node to the
+ * child's registered node. This is because post adj processing the next
+ * node are any output features, then the midchain-tx. from there we
+ * need to get to the stacked child's node.
+ */
+ dpo_stack_from_node(adj->sub_type.midchain.tx_function_node,
+ &adj->sub_type.midchain.next_dpo,
+ drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+
+ /*
+ * update the rewirte with the workers paused.
+ */
+ adj_nbr_update_rewrite_internal(adj,
+ IP_LOOKUP_NEXT_MIDCHAIN,
+ adj_get_midchain_node(adj->ia_link),
+ adj->sub_type.midchain.tx_function_node,
+ rewrite);
+}
+
+/**
+ * adj_nbr_midchain_unstack
+ *
+ * Unstack the adj. stack it on drop
+ */
+void
+adj_nbr_midchain_unstack (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * stack on the drop
+ */
+ dpo_stack(DPO_ADJACENCY_MIDCHAIN,
+ vnet_link_to_dpo_proto(adj->ia_link),
+ &adj->sub_type.midchain.next_dpo,
+ drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+
+ CLIB_MEMORY_BARRIER();
+}
+
+/**
+ * adj_nbr_midchain_stack
+ */
+void
+adj_nbr_midchain_stack (adj_index_t adj_index,
+ const dpo_id_t *next)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ ASSERT(IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index);
+
+ dpo_stack_from_node(adj->sub_type.midchain.tx_function_node,
+ &adj->sub_type.midchain.next_dpo,
+ next);
+}
+
+u8*
+format_adj_midchain (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ u32 indent = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "%U", format_vnet_link, adj->ia_link);
+ s = format (s, " via %U ",
+ format_ip46_address, &adj->sub_type.nbr.next_hop);
+ s = format (s, " %U",
+ format_vnet_rewrite,
+ vnm->vlib_main, &adj->rewrite_header,
+ sizeof (adj->rewrite_data), indent);
+ s = format (s, "\n%Ustacked-on:\n%U%U",
+ format_white_space, indent,
+ format_white_space, indent+2,
+ format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
+
+ return (s);
+}
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_midchain_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_midchain,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a midchain
+ * object.
+ *
+ * this means that these graph nodes are ones from which a midchain is the
+ * parent object in the DPO-graph.
+ */
+const static char* const midchain_ip4_nodes[] =
+{
+ "ip4-midchain",
+ NULL,
+};
+const static char* const midchain_ip6_nodes[] =
+{
+ "ip6-midchain",
+ NULL,
+};
+const static char* const midchain_mpls_nodes[] =
+{
+ "mpls-midchain",
+ NULL,
+};
+const static char* const midchain_ethernet_nodes[] =
+{
+ "adj-l2-midchain",
+ NULL,
+};
+
+const static char* const * const midchain_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = midchain_ip4_nodes,
+ [DPO_PROTO_IP6] = midchain_ip6_nodes,
+ [DPO_PROTO_MPLS] = midchain_mpls_nodes,
+ [DPO_PROTO_ETHERNET] = midchain_ethernet_nodes,
+};
+
+void
+adj_midchain_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_MIDCHAIN, &adj_midchain_dpo_vft, midchain_nodes);
+}
diff --git a/src/vnet/adj/adj_midchain.h b/src/vnet/adj/adj_midchain.h
new file mode 100644
index 00000000000..ae414aea6dc
--- /dev/null
+++ b/src/vnet/adj/adj_midchain.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Midchain Adjacency sub-type. These adjs represent an L3 peer on a
+ * tunnel interface. The tunnel's adjacency is thus not the end of the chain,
+ * and needs to stack on/link to another chain (or portion of the graph) to
+ * reach the tunnel's destination.
+ */
+
+#ifndef __ADJ_MIDCHAIN_H__
+#define __ADJ_MIDCHAIN_H__
+
+#include <vnet/adj/adj.h>
+
+/**
+ * @brief Flags controlling the midchain adjacency
+ */
+typedef enum adj_midchain_flag_t_
+{
+ /**
+ * No flags
+ */
+ ADJ_MIDCHAIN_FLAG_NONE = 0,
+
+ /**
+ * Packets TX through the midchain do not increment the interface
+ * counters. This should be used when the adj is associated with an L2
+ * interface and that L2 interface is in a bridege domain. In that case
+ * the packet will have traversed the interface's TX node, and hence have
+ * been counted, before it traverses ths midchain
+ */
+ ADJ_MIDCHAIN_FLAG_NO_COUNT = (1 << 0),
+} adj_midchain_flag_t;
+
+/**
+ * @brief
+ * Convert an existing neighbour adjacency into a midchain
+ *
+ * @param adj_index
+ * The index of the neighbour adjacency.
+ *
+ * @param post_rewrite_node
+ * The VLIB graph node that provides the post-encap fixup.
+ * where 'fixup' is e.g., correcting chksum, length, etc.
+ *
+ * @param rewrite
+ * The rewrite.
+ */
+extern void adj_nbr_midchain_update_rewrite(adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_midchain_flag_t flags,
+ u8 *rewrite);
+
+/**
+ * @brief
+ * [re]stack a midchain. 'Stacking' is the act of forming parent-child
+ * relationships in the data-plane graph.
+ *
+ * @param adj_index
+ * The index of the midchain to stack
+ *
+ * @param dpo
+ * The parent DPO to stack onto (i.e. become a child of).
+ */
+extern void adj_nbr_midchain_stack(adj_index_t adj_index,
+ const dpo_id_t *dpo);
+
+/**
+ * @brief
+ * unstack a midchain. This will break the chain between the midchain and
+ * the next graph section. This is a implemented as stack-on-drop
+ *
+ * @param adj_index
+ * The index of the midchain to stack
+ */
+extern void adj_nbr_midchain_unstack(adj_index_t adj_index);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_midchain_module_init(void);
+
+/**
+ * @brief
+ * Format a midchain adjacency
+ */
+extern u8* format_adj_midchain(u8* s, va_list *ap);
+
+#endif
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
new file mode 100644
index 00000000000..1344bb67fcc
--- /dev/null
+++ b/src/vnet/adj/adj_nbr.c
@@ -0,0 +1,1087 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/fib/fib_walk.h>
+
+/*
+ * Vector Hash tables of neighbour (traditional) adjacencies
+ * Key: interface(for the vector index), address (and its proto),
+ * link-type/ether-type.
+ */
+static BVT(clib_bihash) **adj_nbr_tables[FIB_PROTOCOL_MAX];
+
+// FIXME SIZE APPROPRIATELY. ASK DAVEB.
+#define ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (64 * 64)
+#define ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+
+#define ADJ_NBR_SET_KEY(_key, _lt, _nh) \
+{ \
+ _key.key[0] = (_nh)->as_u64[0]; \
+ _key.key[1] = (_nh)->as_u64[1]; \
+ _key.key[2] = (_lt); \
+}
+
+#define ADJ_NBR_ITF_OK(_proto, _itf) \
+ (((_itf) < vec_len(adj_nbr_tables[_proto])) && \
+ (NULL != adj_nbr_tables[_proto][sw_if_index]))
+
+static void
+adj_nbr_insert (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ adj_index_t adj_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ if (sw_if_index >= vec_len(adj_nbr_tables[nh_proto]))
+ {
+ vec_validate(adj_nbr_tables[nh_proto], sw_if_index);
+ }
+ if (NULL == adj_nbr_tables[nh_proto][sw_if_index])
+ {
+ adj_nbr_tables[nh_proto][sw_if_index] =
+ clib_mem_alloc_aligned(sizeof(BVT(clib_bihash)),
+ CLIB_CACHE_LINE_BYTES);
+ memset(adj_nbr_tables[nh_proto][sw_if_index],
+ 0,
+ sizeof(BVT(clib_bihash)));
+
+ BV(clib_bihash_init) (adj_nbr_tables[nh_proto][sw_if_index],
+ "Adjacency Neighbour table",
+ ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
+ ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);
+ }
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+ kv.value = adj_index;
+
+ BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 1);
+}
+
+void
+adj_nbr_remove (adj_index_t ai,
+ fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
+ return;
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+ kv.value = ai;
+
+ BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 0);
+}
+
+static adj_index_t
+adj_nbr_find (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+
+ if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
+ return (ADJ_INDEX_INVALID);
+
+ if (BV(clib_bihash_search)(adj_nbr_tables[nh_proto][sw_if_index],
+ &kv, &kv) < 0)
+ {
+ return (ADJ_INDEX_INVALID);
+ }
+ else
+ {
+ return (kv.value);
+ }
+}
+
+static inline u32
+adj_get_nd_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_arp_node.index);
+ case FIB_PROTOCOL_IP6:
+ return (ip6_discover_neighbor_node.index);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (ip4_arp_node.index);
+}
+
+static ip_adjacency_t*
+adj_nbr_alloc (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_alloc(nh_proto);
+
+ adj_nbr_insert(nh_proto, link_type, nh_addr,
+ sw_if_index,
+ adj_get_index(adj));
+
+ /*
+ * since we just added the ADJ we have no rewrite string for it,
+ * so its for ARP
+ */
+ adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
+ adj->sub_type.nbr.next_hop = *nh_addr;
+ adj->ia_link = link_type;
+ adj->ia_nh_proto = nh_proto;
+ adj->rewrite_header.sw_if_index = sw_if_index;
+ memset(&adj->sub_type.midchain.next_dpo, 0,
+ sizeof(adj->sub_type.midchain.next_dpo));
+
+ return (adj);
+}
+
+/*
+ * adj_add_for_nbr
+ *
+ * Add an adjacency for the neighbour requested.
+ *
+ * The key for an adj is:
+ * - the Next-hops protocol (i.e. v4 or v6)
+ * - the address of the next-hop
+ * - the interface the next-hop is reachable through
+ */
+adj_index_t
+adj_nbr_add_or_lock (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ adj_index_t adj_index;
+ ip_adjacency_t *adj;
+
+ adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
+
+ if (ADJ_INDEX_INVALID == adj_index)
+ {
+ vnet_main_t *vnm;
+
+ vnm = vnet_get_main();
+ adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+ adj_index = adj_get_index(adj);
+ adj_lock(adj_index);
+
+ vnet_rewrite_init(vnm, sw_if_index,
+ adj_get_nd_node(nh_proto),
+ vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
+ &adj->rewrite_header);
+
+ /*
+ * we need a rewrite where the destination IP address is converted
+ * to the appropriate link-layer address. This is interface specific.
+ * So ask the interface to do it.
+ */
+ vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, adj_index);
+ }
+ else
+ {
+ adj_lock(adj_index);
+ }
+
+ return (adj_index);
+}
+
+adj_index_t
+adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ u8 *rewrite)
+{
+ adj_index_t adj_index;
+ ip_adjacency_t *adj;
+
+ adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
+
+ if (ADJ_INDEX_INVALID == adj_index)
+ {
+ adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+ adj->rewrite_header.sw_if_index = sw_if_index;
+ }
+ else
+ {
+ adj = adj_get(adj_index);
+ }
+
+ adj_lock(adj_get_index(adj));
+ adj_nbr_update_rewrite(adj_get_index(adj),
+ ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ rewrite);
+
+ return (adj_get_index(adj));
+}
+
+/**
+ * adj_nbr_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_update_rewrite (adj_index_t adj_index,
+ adj_nbr_rewrite_flag_t flags,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ if (flags & ADJ_NBR_REWRITE_FLAG_COMPLETE)
+ {
+ /*
+ * update the adj's rewrite string and build the arc
+ * from the rewrite node to the interface's TX node
+ */
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_REWRITE,
+ adj_get_rewrite_node(adj->ia_link),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+ }
+ else
+ {
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_ARP,
+ adj_get_nd_node(adj->ia_nh_proto),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+ }
+}
+
+/**
+ * adj_nbr_update_rewrite_internal
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
+ u32 adj_next_index,
+ u32 this_node,
+ u32 next_node,
+ u8 *rewrite)
+{
+ ip_adjacency_t *walk_adj;
+ adj_index_t walk_ai;
+ vlib_main_t * vm;
+ u32 old_next;
+ int do_walk;
+
+ vm = vlib_get_main();
+ old_next = adj->lookup_next_index;
+
+ walk_ai = adj_get_index(adj);
+ if (VNET_LINK_MPLS == adj->ia_link)
+ {
+ /*
+ * The link type MPLS has no children in the control plane graph, it only
+ * has children in the data-palne graph. The backwalk is up the former.
+ * So we need to walk from its IP cousin.
+ */
+ walk_ai = adj_nbr_find(adj->ia_nh_proto,
+ fib_proto_to_link(adj->ia_nh_proto),
+ &adj->sub_type.nbr.next_hop,
+ adj->rewrite_header.sw_if_index);
+ }
+
+ /*
+ * Don't call the walk re-entrantly
+ */
+ if (ADJ_INDEX_INVALID != walk_ai)
+ {
+ walk_adj = adj_get(walk_ai);
+ if (IP_ADJ_SYNC_WALK_ACTIVE & walk_adj->ia_flags)
+ {
+ do_walk = 0;
+ }
+ else
+ {
+ /*
+ * Prevent re-entrant walk of the same adj
+ */
+ walk_adj->ia_flags |= IP_ADJ_SYNC_WALK_ACTIVE;
+ do_walk = 1;
+ }
+ }
+ else
+ {
+ do_walk = 0;
+ }
+
+ /*
+ * lock the adjacencies that are affected by updates this walk will provoke.
+ * Since the aim of the walk is to update children to link to a different
+ * DPO, this adj will no longer be in use and its lock count will drop to 0.
+ * We don't want it to be deleted as part of this endevour.
+ */
+ adj_lock(adj_get_index(adj));
+ adj_lock(walk_ai);
+
+ /*
+ * Updating a rewrite string is not atomic;
+ * - the rewrite string is too long to write in one instruction
+ * - when swapping from incomplete to complete, we also need to update
+ * the VLIB graph next-index of the adj.
+ * ideally we would only want to suspend forwarding via this adj whilst we
+ * do this, but we do not have that level of granularity - it's suspend all
+ * worker threads or nothing.
+ * The other chioces are:
+ * - to mark the adj down and back walk so child load-balances drop this adj
+ * from the set.
+ * - update the next_node index of this adj to point to error-drop
+ * both of which will mean for MAC change we will drop for this adj
+ * which is not acceptable. However, when the adj changes type (from
+ * complete to incomplete and vice-versa) the child DPOs, which have the
+ * VLIB graph next node index, will be sending packets to the wrong graph
+ * node. So from the options above, updating the next_node of the adj to
+ * be drop will work, but it relies on each graph node v4/v6/mpls, rewrite/
+ * arp/midchain always be valid w.r.t. a mis-match of adj type and node type
+ * (i.e. a rewrite adj in the arp node). This is not enforcable. Getting it
+ * wrong will lead to hard to find bugs since its a race condition. So we
+ * choose the more reliable method of updating the children to use the drop,
+ * then switching adj's type, then updating the children again. Did I mention
+ * that this doesn't happen often...
+ * So we need to distinguish between the two cases:
+ * 1 - mac change
+ * 2 - adj type change
+ */
+ if (do_walk &&
+ old_next != adj_next_index &&
+ ADJ_INDEX_INVALID != walk_ai)
+ {
+ /*
+ * the adj is changing type. we need to fix all children so that they
+ * stack momentarily on a drop, while the adj changes. If we don't do
+ * this the children will send packets to a VLIB graph node that does
+ * not correspond to the adj's type - and it goes downhill from there.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_DOWN,
+ /*
+ * force this walk to be synchrous. if we don't and a node in the graph
+ * (a heavily shared path-list) chooses to back-ground the walk (make it
+ * async) then it will pause and we will do the adj update below, before
+ * all the children are updated. not good.
+ */
+ .fnbw_flags = FIB_NODE_BW_FLAG_FORCE_SYNC,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
+ }
+
+ /*
+ * If we are just updating the MAC string of the adj (which we also can't
+ * do atomically), then we need to stop packets switching through the adj.
+ * We can't do that on a per-adj basis, so it's all the packets.
+ * If we are updating the type, and we walked back to the children above,
+ * then this barrier serves to flush the queues/frames.
+ */
+ vlib_worker_thread_barrier_sync(vm);
+
+ adj->lookup_next_index = adj_next_index;
+
+ if (NULL != rewrite)
+ {
+ /*
+ * new rewrite provided.
+ * fill in the adj's rewrite string, and build the VLIB graph arc.
+ */
+ vnet_rewrite_set_data_internal(&adj->rewrite_header,
+ sizeof(adj->rewrite_data),
+ rewrite,
+ vec_len(rewrite));
+ vec_free(rewrite);
+ }
+ else
+ {
+ vnet_rewrite_clear_data_internal(&adj->rewrite_header,
+ sizeof(adj->rewrite_data));
+ }
+ adj->rewrite_header.node_index = this_node;
+ adj->rewrite_header.next_index = vlib_node_add_next(vlib_get_main(),
+ this_node,
+ next_node);
+
+ /*
+ * done with the rewirte update - let the workers loose.
+ */
+ vlib_worker_thread_barrier_release(vm);
+
+ if (do_walk &&
+ (old_next != adj->lookup_next_index) &&
+ (ADJ_INDEX_INVALID != walk_ai))
+ {
+ /*
+ * backwalk to the children so they can stack on the now updated
+ * adjacency
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
+ }
+ /*
+ * Prevent re-entrant walk of the same adj
+ */
+ if (do_walk)
+ {
+ walk_adj->ia_flags &= ~IP_ADJ_SYNC_WALK_ACTIVE;
+ }
+
+ adj_unlock(adj_get_index(adj));
+ adj_unlock(walk_ai);
+}
+
+typedef struct adj_db_count_ctx_t_ {
+ u64 count;
+} adj_db_count_ctx_t;
+
+static void
+adj_db_count (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ adj_db_count_ctx_t * ctx = arg;
+ ctx->count++;
+}
+
+u32
+adj_nbr_db_size (void)
+{
+ adj_db_count_ctx_t ctx = {
+ .count = 0,
+ };
+ fib_protocol_t proto;
+ u32 sw_if_index = 0;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
+ {
+ if (NULL != adj_nbr_tables[proto][sw_if_index])
+ {
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[proto][sw_if_index],
+ adj_db_count,
+ &ctx);
+ }
+ }
+ }
+ return (ctx.count);
+}
+
+/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_ctx_t_
+{
+ adj_walk_cb_t awc_cb;
+ void *awc_ctx;
+} adj_walk_ctx_t;
+
+static void
+adj_nbr_walk_cb (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ adj_walk_ctx_t *ctx = arg;
+
+ // FIXME: can't stop early...
+ ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+void
+adj_nbr_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+ return;
+
+ adj_walk_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[adj_nh_proto][sw_if_index],
+ adj_nbr_walk_cb,
+ &awc);
+}
+
+/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_nh_ctx_t_
+{
+ adj_walk_cb_t awc_cb;
+ void *awc_ctx;
+ const ip46_address_t *awc_nh;
+} adj_walk_nh_ctx_t;
+
+static void
+adj_nbr_walk_nh_cb (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ ip_adjacency_t *adj;
+ adj_walk_nh_ctx_t *ctx = arg;
+
+ adj = adj_get(kvp->value);
+
+ if (!ip46_address_cmp(&adj->sub_type.nbr.next_hop, ctx->awc_nh))
+ ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+ const ip4_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP4, sw_if_index))
+ return;
+
+ ip46_address_t nh = {
+ .ip4 = *addr,
+ };
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = &nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[FIB_PROTOCOL_IP4][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+ const ip6_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP6, sw_if_index))
+ return;
+
+ ip46_address_t nh = {
+ .ip6 = *addr,
+ };
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = &nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[FIB_PROTOCOL_IP6][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ const ip46_address_t *nh,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+ return;
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[adj_nh_proto][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * Flags associated with the interface state walks
+ */
+typedef enum adj_nbr_interface_flags_t_
+{
+ ADJ_NBR_INTERFACE_UP = (1 << 0),
+} adj_nbr_interface_flags_t;
+
+/**
+ * Context for the state change walk of the DB
+ */
+typedef struct adj_nbr_interface_state_change_ctx_t_
+{
+ /**
+ * Flags on the interface
+ */
+ adj_nbr_interface_flags_t flags;
+} adj_nbr_interface_state_change_ctx_t;
+
+static adj_walk_rc_t
+adj_nbr_interface_state_change_one (adj_index_t ai,
+ void *arg)
+{
+ /*
+ * Back walk the graph to inform the forwarding entries
+ * that this interface state has changed. Do this synchronously
+ * since this is the walk that provides convergence
+ */
+ adj_nbr_interface_state_change_ctx_t *ctx = arg;
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = ((ctx->flags & ADJ_NBR_INTERFACE_UP) ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ /*
+ * the force sync applies only as far as the first fib_entry.
+ * And it's the fib_entry's we need to converge away from
+ * the adjacencies on the now down link
+ */
+ .fnbw_flags = (!(ctx->flags & ADJ_NBR_INTERFACE_UP) ?
+ FIB_NODE_BW_FLAG_FORCE_SYNC :
+ 0),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+/**
+ * @brief Registered function for SW interface state changes
+ */
+static clib_error_t *
+adj_nbr_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ fib_protocol_t proto;
+
+ /*
+ * walk each adj on the interface and trigger a walk from that adj
+ */
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_interface_state_change_ctx_t ctx = {
+ .flags = ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ ADJ_NBR_INTERFACE_UP :
+ 0),
+ };
+
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_state_change_one,
+ &ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION_PRIO(
+ adj_nbr_sw_interface_state_change,
+ VNET_ITF_FUNC_PRIORITY_HIGH);
+
+/**
+ * @brief Invoked on each SW interface of a HW interface when the
+ * HW interface state changes
+ */
+static void
+adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ void *arg)
+{
+ adj_nbr_interface_state_change_ctx_t *ctx = arg;
+ fib_protocol_t proto;
+
+ /*
+ * walk each adj on the interface and trigger a walk from that adj
+ */
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_state_change_one,
+ ctx);
+ }
+}
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+adj_nbr_hw_interface_state_change (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ /*
+ * walk SW interface on the HW
+ */
+ adj_nbr_interface_state_change_ctx_t ctx = {
+ .flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
+ ADJ_NBR_INTERFACE_UP :
+ 0),
+ };
+
+ vnet_hw_interface_walk_sw(vnm, hw_if_index,
+ adj_nbr_hw_sw_interface_state_change,
+ &ctx);
+
+ return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(
+ adj_nbr_hw_interface_state_change,
+ VNET_ITF_FUNC_PRIORITY_HIGH);
+
+static adj_walk_rc_t
+adj_nbr_interface_delete_one (adj_index_t ai,
+ void *arg)
+{
+ /*
+ * Back walk the graph to inform the forwarding entries
+ * that this interface has been deleted.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+/**
+ * adj_nbr_interface_add_del
+ *
+ * Registered to receive interface Add and delete notifications
+ */
+static clib_error_t *
+adj_nbr_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ fib_protocol_t proto;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_delete_one,
+ NULL);
+ }
+
+ return (NULL);
+
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
+
+
+static adj_walk_rc_t
+adj_nbr_show_one (adj_index_t ai,
+ void *arg)
+{
+ vlib_cli_output (arg, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_NONE);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static clib_error_t *
+adj_nbr_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ adj_index_t ai = ADJ_INDEX_INVALID;
+ u32 sw_if_index = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ai))
+ ;
+ else if (unformat (input, "%U",
+ unformat_vnet_sw_interface, vnet_get_main(),
+ &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (ADJ_INDEX_INVALID != ai)
+ {
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_DETAIL);
+ }
+ else if (~0 != sw_if_index)
+ {
+ fib_protocol_t proto;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_show_one,
+ vm);
+ }
+ }
+ else
+ {
+ fib_protocol_t proto;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_show_one,
+ vm);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*?
+ * Show all neighbour adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj nbr}
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
+ .path = "show adj nbr",
+ .short_help = "show adj nbr [<adj_index>] [interface]",
+ .function = adj_nbr_show,
+};
+
+static ip46_type_t
+adj_proto_to_46 (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (IP46_TYPE_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (IP46_TYPE_IP6);
+ default:
+ return (IP46_TYPE_IP4);
+ }
+ return (IP46_TYPE_IP4);
+}
+
+u8*
+format_adj_nbr_incomplete (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "arp-%U", format_vnet_link, adj->ia_link);
+ s = format (s, ": via %U",
+ format_ip46_address, &adj->sub_type.nbr.next_hop,
+ adj_proto_to_46(adj->ia_nh_proto));
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ adj->rewrite_header.sw_if_index));
+
+ return (s);
+}
+
+u8*
+format_adj_nbr (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "%U", format_vnet_link, adj->ia_link);
+ s = format (s, " via %U ",
+ format_ip46_address, &adj->sub_type.nbr.next_hop,
+ adj_proto_to_46(adj->ia_nh_proto));
+ s = format (s, "%U",
+ format_vnet_rewrite,
+ vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
+
+ return (s);
+}
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+static void
+adj_mem_show (void)
+{
+ fib_show_memory_usage("Adjacency",
+ pool_elts(adj_pool),
+ pool_len(adj_pool),
+ sizeof(ip_adjacency_t));
+}
+
+const static dpo_vft_t adj_nbr_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_nbr,
+ .dv_mem_show = adj_mem_show,
+};
+const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_nbr_incomplete,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to an adjacency
+ * object.
+ *
+ * this means that these graph nodes are ones from which a nbr is the
+ * parent object in the DPO-graph.
+ */
+const static char* const nbr_ip4_nodes[] =
+{
+ "ip4-rewrite",
+ NULL,
+};
+const static char* const nbr_ip6_nodes[] =
+{
+ "ip6-rewrite",
+ NULL,
+};
+const static char* const nbr_mpls_nodes[] =
+{
+ "mpls-output",
+ NULL,
+};
+const static char* const nbr_ethernet_nodes[] =
+{
+ "adj-l2-rewrite",
+ NULL,
+};
+const static char* const * const nbr_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = nbr_ip4_nodes,
+ [DPO_PROTO_IP6] = nbr_ip6_nodes,
+ [DPO_PROTO_MPLS] = nbr_mpls_nodes,
+ [DPO_PROTO_ETHERNET] = nbr_ethernet_nodes,
+};
+
+const static char* const nbr_incomplete_ip4_nodes[] =
+{
+ "ip4-arp",
+ NULL,
+};
+const static char* const nbr_incomplete_ip6_nodes[] =
+{
+ "ip6-discover-neighbor",
+ NULL,
+};
+const static char* const nbr_incomplete_mpls_nodes[] =
+{
+ "mpls-adj-incomplete",
+ NULL,
+};
+
+const static char* const * const nbr_incomplete_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = nbr_incomplete_ip4_nodes,
+ [DPO_PROTO_IP6] = nbr_incomplete_ip6_nodes,
+ [DPO_PROTO_MPLS] = nbr_incomplete_mpls_nodes,
+};
+
+void
+adj_nbr_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY,
+ &adj_nbr_dpo_vft,
+ nbr_nodes);
+ dpo_register(DPO_ADJACENCY_INCOMPLETE,
+ &adj_nbr_incompl_dpo_vft,
+ nbr_incomplete_nodes);
+}
diff --git a/src/vnet/adj/adj_nbr.h b/src/vnet/adj/adj_nbr.h
new file mode 100644
index 00000000000..293766b8519
--- /dev/null
+++ b/src/vnet/adj/adj_nbr.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * Neighbour Adjacency sub-type. These adjs represent an L3 peer on a
+ * connected link.
+ */
+
+#ifndef __ADJ_NBR_H__
+#define __ADJ_NBR_H__
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param nh_addr
+ * The address of the next-hop/peer to send the packet to
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ */
+extern adj_index_t adj_nbr_add_or_lock(fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param nh_addr
+ * The address of the next-hop/peer to send the packet to
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ *
+ * @param rewrite
+ * The rewrite to prepend to packets
+ */
+extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ u8 *rewrite);
+/**
+ * @brief When adding a rewrite to an adjacency these are flags that
+ * apply to that rewrite
+ */
+typedef enum adj_nbr_rewrite_flag_t_
+{
+ ADJ_NBR_REWRITE_FLAG_NONE,
+
+ /**
+ * An indication that the rewrite is incomplete, i.e. that it describes the
+ * ARP/ND rewrite when probing.
+ */
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE = ADJ_NBR_REWRITE_FLAG_NONE,
+
+ /**
+ * An indication that the rewrite is complete, i.e. that it fully describes
+ * the link-layer addressing for the desintation.
+ * The opposite of this is an incomplete rewrite that describes the ARP/ND
+ * rewrite when probing.
+ */
+ ADJ_NBR_REWRITE_FLAG_COMPLETE = (1 << 0),
+} adj_nbr_rewrite_flag_t;
+
+/**
+ * @brief
+ * Update the rewrite string for an existing adjacecny.
+ *
+ * @param
+ * The index of the adj to update
+ *
+ * @param
+ * The new rewrite
+ */
+extern void adj_nbr_update_rewrite(adj_index_t adj_index,
+ adj_nbr_rewrite_flag_t flags,
+ u8 *rewrite);
+
+/**
+ * @brief
+ * Format aa incomplete neigbour (ARP) adjacency
+ */
+extern u8* format_adj_nbr_incomplete(u8* s, va_list *ap);
+
+/**
+ * @brief
+ * Format a neigbour (REWRITE) adjacency
+ */
+extern u8* format_adj_nbr(u8* s, va_list *ap);
+
+/**
+ * @brief Walk the neighbour Adjacencies on a given interface
+ */
+extern void adj_nbr_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx);
+/**
+ * @brief Walk the neighbour Adjacencies on a given interface with a given next-hop
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ const ip46_address_t *nh,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+ const ip4_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+ const ip6_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_nbr_module_init(void);
+
+/**
+ * @brief
+ * Return the size of the adjacency database. for testing purposes
+ */
+extern u32 adj_nbr_db_size(void);
+
+#endif
diff --git a/src/vnet/adj/adj_rewrite.c b/src/vnet/adj/adj_rewrite.c
new file mode 100644
index 00000000000..7d792557724
--- /dev/null
+++ b/src/vnet/adj/adj_rewrite.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+
+/**
+ * adj_rewrite_add_and_lock
+ *
+ * A rewrite sub-type has the rewrite string provided, but no key
+ */
+adj_index_t
+adj_rewrite_add_and_lock (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ u32 sw_if_index,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_alloc(nh_proto);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ memset(&adj->sub_type.nbr.next_hop, 0, sizeof(adj->sub_type.nbr.next_hop));
+ adj->ia_link = link_type;
+ adj->ia_nh_proto = nh_proto;
+ adj->rewrite_header.sw_if_index = sw_if_index;
+
+ ASSERT(NULL != rewrite);
+
+ vnet_rewrite_for_sw_interface(vnet_get_main(),
+ link_type,
+ adj->rewrite_header.sw_if_index,
+ adj_get_rewrite_node(link_type),
+ rewrite,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+
+ adj_lock(adj_get_index(adj));
+
+ return (adj_get_index(adj));
+}
diff --git a/src/vnet/adj/adj_rewrite.h b/src/vnet/adj/adj_rewrite.h
new file mode 100644
index 00000000000..25e6bba8868
--- /dev/null
+++ b/src/vnet/adj/adj_rewrite.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A rewrite adjacency has no key, and thus cannot be 'found' from the
+ * FIB resolution code. the client therefore needs to maange these adjacencies
+ */
+
+#ifndef __ADJ_REWRITE_H__
+#define __ADJ_REWRITE_H__
+
+#include <vnet/adj/adj_types.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ *
+ * @param rewrite
+ * The rewrite to prepend to packets
+ */
+extern adj_index_t adj_rewrite_add_and_lock(fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ u32 sw_if_index,
+ u8 *rewrite);
+
+#endif
diff --git a/src/vnet/adj/adj_types.h b/src/vnet/adj/adj_types.h
new file mode 100644
index 00000000000..cf90c08418d
--- /dev/null
+++ b/src/vnet/adj/adj_types.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_TYPES_H__
+#define __ADJ_TYPES_H__
+
+#include <vnet/vnet.h>
+
+/**
+ * @brief An index for adjacencies.
+ * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of
+ * an adi_index_t. However, for us humans, we can glean much more intent
+ * from the declaration
+ * foo bar(adj_index_t t);
+ * than we can from
+ * foo bar(u32 t);
+ */
+typedef u32 adj_index_t;
+
+/**
+ * @brief Invalid ADJ index - used when no adj is known
+ * likewise blazoned capitals INVALID speak volumes where ~0 does not.
+ */
+#define ADJ_INDEX_INVALID ((u32)~0)
+
+/**
+ * @brief return codes from a adjacency walker callback function
+ */
+typedef enum adj_walk_rc_t_
+{
+ ADJ_WALK_RC_STOP,
+ ADJ_WALK_RC_CONTINUE,
+} adj_walk_rc_t;
+
+/**
+ * @brief Call back function when walking adjacencies
+ */
+typedef adj_walk_rc_t (*adj_walk_cb_t)(adj_index_t ai,
+ void *ctx);
+
+#endif