aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/adj
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/adj')
-rw-r--r--src/vnet/adj/adj.c607
-rw-r--r--src/vnet/adj/adj.h379
-rw-r--r--src/vnet/adj/adj_bfd.c184
-rw-r--r--src/vnet/adj/adj_delegate.c144
-rw-r--r--src/vnet/adj/adj_delegate.h104
-rw-r--r--src/vnet/adj/adj_glean.c285
-rw-r--r--src/vnet/adj/adj_glean.h61
-rw-r--r--src/vnet/adj/adj_internal.h112
-rw-r--r--src/vnet/adj/adj_l2.c192
-rw-r--r--src/vnet/adj/adj_l2.h24
-rw-r--r--src/vnet/adj/adj_mcast.c483
-rw-r--r--src/vnet/adj/adj_mcast.h114
-rw-r--r--src/vnet/adj/adj_midchain.c666
-rw-r--r--src/vnet/adj/adj_midchain.h82
-rw-r--r--src/vnet/adj/adj_nbr.c1124
-rw-r--r--src/vnet/adj/adj_nbr.h176
-rw-r--r--src/vnet/adj/adj_nsh.c211
-rw-r--r--src/vnet/adj/adj_nsh.h31
-rw-r--r--src/vnet/adj/adj_types.h53
-rw-r--r--src/vnet/adj/rewrite.c234
-rw-r--r--src/vnet/adj/rewrite.h350
21 files changed, 5616 insertions, 0 deletions
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
new file mode 100644
index 00000000..f8496913
--- /dev/null
+++ b/src/vnet/adj/adj.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/adj/adj_glean.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/adj/adj_delegate.h>
+#include <vnet/fib/fib_node_list.h>
+
+/* Adjacency packet/byte counters indexed by adjacency index. */
+vlib_combined_counter_main_t adjacency_counters;
+
+/*
+ * the single adj pool
+ */
+ip_adjacency_t *adj_pool;
+
+/**
+ * @brief Global Config for enabling per-adjacency counters.
+ * By default these are disabled.
+ */
+int adj_per_adj_counters;
+
+always_inline void
+adj_poison (ip_adjacency_t * adj)
+{
+ if (CLIB_DEBUG > 0)
+ {
+ memset (adj, 0xfe, sizeof (adj[0]));
+ }
+}
+
+ip_adjacency_t *
+adj_alloc (fib_protocol_t proto)
+{
+ ip_adjacency_t *adj;
+
+ pool_get_aligned(adj_pool, adj, CLIB_CACHE_LINE_BYTES);
+
+ adj_poison(adj);
+
+ /* Make sure certain fields are always initialized. */
+ /* Validate adjacency counters. */
+ vlib_validate_combined_counter(&adjacency_counters,
+ adj_get_index(adj));
+
+ fib_node_init(&adj->ia_node,
+ FIB_NODE_TYPE_ADJ);
+
+ adj->ia_nh_proto = proto;
+ adj->ia_flags = 0;
+ adj->rewrite_header.sw_if_index = ~0;
+ adj->rewrite_header.flags = 0;
+ adj->lookup_next_index = 0;
+ adj->ia_delegates = NULL;
+
+ /* lest it become a midchain in the future */
+ memset(&adj->sub_type.midchain.next_dpo, 0,
+ sizeof(adj->sub_type.midchain.next_dpo));
+
+ return (adj);
+}
+
+static int
+adj_index_is_special (adj_index_t adj_index)
+{
+ if (ADJ_INDEX_INVALID == adj_index)
+ return (!0);
+
+ return (0);
+}
+
+/**
+ * @brief Pretty print helper function for formatting specific adjacencies.
+ * @param s - input string to format
+ * @param args - other args passed to format function such as:
+ * - vnet_main_t
+ * - ip_lookup_main_t
+ * - adj_index
+ */
+u8 *
+format_ip_adjacency (u8 * s, va_list * args)
+{
+ format_ip_adjacency_flags_t fiaf;
+ ip_adjacency_t * adj;
+ u32 adj_index;
+
+ adj_index = va_arg (*args, u32);
+ fiaf = va_arg (*args, format_ip_adjacency_flags_t);
+ adj = adj_get(adj_index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "%U", format_adj_nbr, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_ARP:
+ s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ s = format (s, "%U", format_adj_glean, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ s = format (s, "%U", format_adj_midchain, adj_index, 2);
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ s = format (s, "%U", format_adj_mcast, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ s = format (s, "%U", format_adj_mcast_midchain, adj_index, 0);
+ break;
+ default:
+ break;
+ }
+
+ if (fiaf & FORMAT_IP_ADJACENCY_DETAIL)
+ {
+ adj_delegate_type_t adt;
+ adj_delegate_t *aed;
+ vlib_counter_t counts;
+
+ vlib_get_combined_counter(&adjacency_counters, adj_index, &counts);
+ s = format (s, "\n counts:[%Ld:%Ld]", counts.packets, counts.bytes);
+ s = format (s, "\n locks:%d", adj->ia_node.fn_locks);
+ s = format(s, "\n delegates:\n ");
+ FOR_EACH_ADJ_DELEGATE(adj, adt, aed,
+ {
+ s = format(s, " %U\n", format_adj_deletegate, aed);
+ });
+
+ s = format(s, "\n children:\n ");
+ s = fib_node_children_format(adj->ia_node.fn_children, s);
+ }
+
+ return s;
+}
+
+/*
+ * adj_last_lock_gone
+ *
+ * last lock/reference to the adj has gone, we no longer need it.
+ */
+static void
+adj_last_lock_gone (ip_adjacency_t *adj)
+{
+ vlib_main_t * vm = vlib_get_main();
+
+ ASSERT(0 == fib_node_list_get_size(adj->ia_node.fn_children));
+ ADJ_DBG(adj, "last-lock-gone");
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ dpo_reset(&adj->sub_type.midchain.next_dpo);
+ /* FALL THROUGH */
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_REWRITE:
+ /*
+ * complete and incomplete nbr adjs
+ */
+ adj_nbr_remove(adj_get_index(adj),
+ adj->ia_nh_proto,
+ adj->ia_link,
+ &adj->sub_type.nbr.next_hop,
+ adj->rewrite_header.sw_if_index);
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ adj_glean_remove(adj->ia_nh_proto,
+ adj->rewrite_header.sw_if_index);
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ adj_mcast_remove(adj->ia_nh_proto,
+ adj->rewrite_header.sw_if_index);
+ break;
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ /*
+ * type not stored in any DB from which we need to remove it
+ */
+ break;
+ }
+
+ vlib_worker_thread_barrier_release(vm);
+
+ fib_node_deinit(&adj->ia_node);
+ ASSERT(0 == vec_len(adj->ia_delegates));
+ vec_free(adj->ia_delegates);
+ pool_put(adj_pool, adj);
+}
+
+void
+adj_lock (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ adj = adj_get(adj_index);
+ ASSERT(adj);
+
+ ADJ_DBG(adj, "lock");
+ fib_node_lock(&adj->ia_node);
+}
+
+void
+adj_unlock (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ adj = adj_get(adj_index);
+ ASSERT(adj);
+
+ ADJ_DBG(adj, "unlock");
+ ASSERT(adj);
+
+ fib_node_unlock(&adj->ia_node);
+}
+
+u32
+adj_child_add (adj_index_t adj_index,
+ fib_node_type_t child_type,
+ fib_node_index_t child_index)
+{
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+ if (adj_index_is_special(adj_index))
+ {
+ return (~0);
+ }
+
+ return (fib_node_child_add(FIB_NODE_TYPE_ADJ,
+ adj_index,
+ child_type,
+ child_index));
+}
+
+void
+adj_child_remove (adj_index_t adj_index,
+ u32 sibling_index)
+{
+ if (adj_index_is_special(adj_index))
+ {
+ return;
+ }
+
+ fib_node_child_remove(FIB_NODE_TYPE_ADJ,
+ adj_index,
+ sibling_index);
+}
+
+/*
+ * Context for the walk to update the cached feture flags.
+ */
+typedef struct adj_feature_update_t_
+{
+ u8 arc;
+ u8 enable;
+} adj_feature_update_ctx_t;
+
+static adj_walk_rc_t
+adj_feature_update_walk_cb (adj_index_t ai,
+ void *arg)
+{
+ adj_feature_update_ctx_t *ctx = arg;
+ ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ /*
+ * this ugly mess matches the feature arc that is changing with affected
+ * adjacencies
+ */
+ if (((ctx->arc == ip6_main.lookup_main.output_feature_arc_index) &&
+ (VNET_LINK_IP6 == adj->ia_link)) ||
+ ((ctx->arc == ip4_main.lookup_main.output_feature_arc_index) &&
+ (VNET_LINK_IP4 == adj->ia_link)) ||
+ ((ctx->arc == mpls_main.output_feature_arc_index) &&
+ (VNET_LINK_MPLS == adj->ia_link)))
+ {
+ if (ctx->enable)
+ adj->rewrite_header.flags |= VNET_REWRITE_HAS_FEATURES;
+ else
+ adj->rewrite_header.flags &= ~VNET_REWRITE_HAS_FEATURES;
+ }
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+adj_feature_update (u32 sw_if_index,
+ u8 arc_index,
+ u8 is_enable)
+{
+ /*
+ * Walk all the adjacencies on the interface to update the cached
+ * 'has-features' flag
+ */
+ adj_feature_update_ctx_t ctx = {
+ .arc = arc_index,
+ .enable = is_enable,
+ };
+ adj_walk (sw_if_index, adj_feature_update_walk_cb, &ctx);
+}
+
+/**
+ * @brief Walk the Adjacencies on a given interface
+ */
+void
+adj_walk (u32 sw_if_index,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ /*
+ * walk all the neighbor adjacencies
+ */
+ fib_protocol_t proto;
+
+ FOR_EACH_FIB_IP_PROTOCOL(proto)
+ {
+ adj_nbr_walk(sw_if_index, proto, cb, ctx);
+ adj_mcast_walk(sw_if_index, proto, cb, ctx);
+ }
+}
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+vnet_link_t
+adj_get_link_type (adj_index_t ai)
+{
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ return (adj->ia_link);
+}
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+u32
+adj_get_sw_if_index (adj_index_t ai)
+{
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ return (adj->rewrite_header.sw_if_index);
+}
+
+/**
+ * @brief Return true if the adjacency is 'UP', i.e. can be used for forwarding
+ * 0 is down, !0 is up.
+ */
+int
+adj_is_up (adj_index_t ai)
+{
+ const adj_delegate_t *aed;
+
+ aed = adj_delegate_get(adj_get(ai), ADJ_DELEGATE_BFD);
+
+ if (NULL == aed)
+ {
+ /*
+ * no BFD tracking - resolved
+ */
+ return (!0);
+ }
+ else
+ {
+ /*
+ * defer to the state of the BFD tracking
+ */
+ return (ADJ_BFD_STATE_UP == aed->ad_bfd_state);
+ }
+}
+
+/**
+ * @brief Return the rewrite string of the adjacency
+ */
+const u8*
+adj_get_rewrite (adj_index_t ai)
+{
+ vnet_rewrite_header_t *rw;
+ ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+ rw = &adj->rewrite_header;
+
+ ASSERT (rw->data_bytes != 0xfefe);
+
+ return (rw->data - rw->data_bytes);
+}
+
+static fib_node_t *
+adj_get_node (fib_node_index_t index)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_get(index);
+
+ return (&adj->ia_node);
+}
+
+#define ADJ_FROM_NODE(_node) \
+ ((ip_adjacency_t*)((char*)_node - STRUCT_OFFSET_OF(ip_adjacency_t, ia_node)))
+
+static void
+adj_node_last_lock_gone (fib_node_t *node)
+{
+ adj_last_lock_gone(ADJ_FROM_NODE(node));
+}
+
+static fib_node_back_walk_rc_t
+adj_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ /*
+ * Que pasa. yo soj en el final!
+ */
+ ASSERT(0);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * Adjacency's graph node virtual function table
+ */
+static const fib_node_vft_t adj_vft = {
+ .fnv_get = adj_get_node,
+ .fnv_last_lock = adj_node_last_lock_gone,
+ .fnv_back_walk = adj_back_walk_notify,
+};
+
+static clib_error_t *
+adj_module_init (vlib_main_t * vm)
+{
+ fib_node_register_type(FIB_NODE_TYPE_ADJ, &adj_vft);
+
+ adj_nbr_module_init();
+ adj_glean_module_init();
+ adj_midchain_module_init();
+ adj_mcast_module_init();
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (adj_module_init);
+
+static clib_error_t *
+adj_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ adj_index_t ai = ADJ_INDEX_INVALID;
+ u32 sw_if_index = ~0;
+ int summary = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ai))
+ ;
+ else if (unformat (input, "sum"))
+ summary = 1;
+ else if (unformat (input, "summary"))
+ summary = 1;
+ else if (unformat (input, "%U",
+ unformat_vnet_sw_interface, vnet_get_main(),
+ &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (summary)
+ {
+ vlib_cli_output (vm, "Number of adjacenies: %d", pool_elts(adj_pool));
+ vlib_cli_output (vm, "Per-adjacency counters: %s",
+ (adj_are_counters_enabled() ?
+ "enabled":
+ "disabled"));
+ }
+ else
+ {
+ if (ADJ_INDEX_INVALID != ai)
+ {
+ if (pool_is_free_index(adj_pool, ai))
+ {
+ vlib_cli_output (vm, "adjacency %d invalid", ai);
+ return 0;
+ }
+
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_DETAIL);
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach_index(ai, adj_pool,
+ ({
+ if (~0 != sw_if_index &&
+ sw_if_index != adj_get_sw_if_index(ai))
+ {
+ }
+ else
+ {
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_NONE);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ return 0;
+}
+
+/*?
+ * Show all adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj}
+ * [@0]
+ * [@1] glean: loop0
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (adj_show_command, static) = {
+ .path = "show adj",
+ .short_help = "show adj [<adj_index>] [interface] [summary]",
+ .function = adj_show,
+};
+
+/**
+ * @brief CLI invoked function to enable/disable per-adj counters
+ */
+static clib_error_t *
+adj_cli_counters_set (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = NULL;
+ int enable = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ enable = 1;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (enable != ~0)
+ {
+ /* user requested something sensible */
+ adj_per_adj_counters = enable;
+ }
+ else
+ {
+ error = clib_error_return (0, "specify 'enable' or 'disable'");
+ }
+
+ return (error);
+}
+
+/*?
+ * Enabe/disble per-adjacency counters. This is optional because it comes with
+ * a non-negligible performance cost.
+ ?*/
+VLIB_CLI_COMMAND (adj_cli_counters_set_command, static) = {
+ .path = "adjacency counters",
+ .short_help = "adjacency counters [enable|disable]",
+ .function = adj_cli_counters_set,
+};
diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h
new file mode 100644
index 00000000..ed5eb1f1
--- /dev/null
+++ b/src/vnet/adj/adj.h
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * An adjacency is a representation of an attached L3 peer.
+ *
+ * Adjacency Sub-types:
+ * - neighbour: a representation of an attached L3 peer.
+ * Key:{addr,interface,link/ether-type}
+ * SHARED
+ * - glean: used to drive ARP/ND for packets destined to a local sub-net.
+ * 'glean' mean use the packet's destination address as the target
+ * address in the ARP packet.
+ * UNSHARED. Only one per-interface.
+ * - midchain: a nighbour adj on a virtual/tunnel interface.
+ *
+ * The API to create and update the adjacency is very sub-type specific. This
+ * is intentional as it encourages the user to carefully consider which adjacency
+ * sub-type they are really using, and hence assign it data in the appropriate
+ * sub-type space in the union of sub-types. This prevents the adj becoming a
+ * disorganised dumping group for 'my features needs a u16 somewhere' data. It
+ * is important to enforce this approach as space in the adjacency is a premium,
+ * as we need it to fit in 1 cache line.
+ *
+ * the API is also based around an index to an ajdacency not a raw pointer. This
+ * is so the user doesn't suffer the same limp inducing firearm injuries that
+ * the author suffered as the adjacenices can realloc.
+ */
+
+#ifndef __ADJ_H__
+#define __ADJ_H__
+
+#include <vnet/adj/adj_types.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_glean.h>
+#include <vnet/adj/rewrite.h>
+
+/** @brief Common (IP4/IP6) next index stored in adjacency. */
+typedef enum
+{
+ /** Adjacency to drop this packet. */
+ IP_LOOKUP_NEXT_DROP,
+ /** Adjacency to punt this packet. */
+ IP_LOOKUP_NEXT_PUNT,
+
+ /** This packet is for one of our own IP addresses. */
+ IP_LOOKUP_NEXT_LOCAL,
+
+ /** This packet matches an "incomplete adjacency" and packets
+ need to be passed to ARP to find rewrite string for
+ this destination. */
+ IP_LOOKUP_NEXT_ARP,
+
+ /** This packet matches an "interface route" and packets
+ need to be passed to ARP to find rewrite string for
+ this destination. */
+ IP_LOOKUP_NEXT_GLEAN,
+
+ /** This packet is to be rewritten and forwarded to the next
+ processing node. This is typically the output interface but
+ might be another node for further output processing. */
+ IP_LOOKUP_NEXT_REWRITE,
+
+ /** This packets follow a mid-chain adjacency */
+ IP_LOOKUP_NEXT_MIDCHAIN,
+
+ /** This packets needs to go to ICMP error */
+ IP_LOOKUP_NEXT_ICMP_ERROR,
+
+ /** Multicast Adjacency. */
+ IP_LOOKUP_NEXT_MCAST,
+
+ /** Multicast Midchain Adjacency. An Adjacency for sending macst packets
+ * on a tunnel/virtual interface */
+ IP_LOOKUP_NEXT_MCAST_MIDCHAIN,
+
+ IP_LOOKUP_N_NEXT,
+} __attribute__ ((packed)) ip_lookup_next_t;
+
+typedef enum
+{
+ IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT,
+} ip4_lookup_next_t;
+
+typedef enum
+{
+ /* Hop-by-hop header handling */
+ IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT,
+ IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP,
+ IP6_LOOKUP_NEXT_POP_HOP_BY_HOP,
+ IP6_LOOKUP_N_NEXT,
+} ip6_lookup_next_t;
+
+#define IP4_LOOKUP_NEXT_NODES { \
+ [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \
+ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \
+ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \
+ [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \
+ [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \
+ [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast", \
+ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \
+ [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip4-mcast-midchain", \
+ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \
+}
+
+#define IP6_LOOKUP_NEXT_NODES { \
+ [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \
+ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \
+ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \
+ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \
+ [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \
+ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \
+ [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast", \
+ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \
+ [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip6-mcast-midchain", \
+ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \
+ [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \
+ [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \
+ [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", \
+}
+
+/**
+ * Forward delcartion
+ */
+struct ip_adjacency_t_;
+
+/**
+ * @brief A function type for post-rewrite fixups on midchain adjacency
+ */
+typedef void (*adj_midchain_fixup_t) (vlib_main_t * vm,
+ struct ip_adjacency_t_ * adj,
+ vlib_buffer_t * b0);
+
+/**
+ * @brief Flags on an IP adjacency
+ */
+typedef enum ip_adjacency_flags_t_
+{
+ ADJ_FLAG_NONE = 0,
+
+ /**
+ * Currently a sync walk is active. Used to prevent re-entrant walking
+ */
+ ADJ_FLAG_SYNC_WALK_ACTIVE = (1 << 0),
+
+ /**
+ * Packets TX through the midchain do not increment the interface
+ * counters. This should be used when the adj is associated with an L2
+ * interface and that L2 interface is in a bridege domain. In that case
+ * the packet will have traversed the interface's TX node, and hence have
+ * been counted, before it traverses ths midchain
+ */
+ ADJ_FLAG_MIDCHAIN_NO_COUNT = (1 << 1),
+} __attribute__ ((packed)) adj_flags_t;
+
+/**
+ * @brief IP unicast adjacency.
+ * @note cache aligned.
+ *
+ * An adjacency is a represenation of a peer on a particular link.
+ */
+typedef struct ip_adjacency_t_
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ /**
+ * Linkage into the FIB node grpah. First member since this type
+ * has 8 byte alignment requirements.
+ */
+ fib_node_t ia_node;
+
+ /**
+ * Next hop after ip4-lookup.
+ * This is not accessed in the rewrite nodes.
+ * 1-bytes
+ */
+ ip_lookup_next_t lookup_next_index;
+
+ /**
+ * link/ether-type
+ * 1 bytes
+ */
+ vnet_link_t ia_link;
+
+ /**
+ * The protocol of the neighbor/peer. i.e. the protocol with
+ * which to interpret the 'next-hop' attirbutes of the sub-types.
+ * 1-btyes
+ */
+ fib_protocol_t ia_nh_proto;
+
+ /**
+ * Flags on the adjacency
+ * 1-bytes
+ */
+ adj_flags_t ia_flags;
+
+ union
+ {
+ /**
+ * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE
+ *
+ * neighbour adjacency sub-type;
+ */
+ struct
+ {
+ ip46_address_t next_hop;
+ } nbr;
+ /**
+ * IP_LOOKUP_NEXT_MIDCHAIN
+ *
+ * A nbr adj that is also recursive. Think tunnels.
+ * A nbr adj can transition to be of type MDICHAIN
+ * so be sure to leave the two structs with the next_hop
+ * fields aligned.
+ */
+ struct
+ {
+ /**
+ * The recursive next-hop.
+ * This field MUST be at the same memory location as
+ * sub_type.nbr.next_hop
+ */
+ ip46_address_t next_hop;
+ /**
+ * The next DPO to use
+ */
+ dpo_id_t next_dpo;
+ /**
+ * A function to perform the post-rewrite fixup
+ */
+ adj_midchain_fixup_t fixup_func;
+ } midchain;
+ /**
+ * IP_LOOKUP_NEXT_GLEAN
+ *
+ * Glean the address to ARP for from the packet's destination.
+ * Technically these aren't adjacencies, i.e. they are not a
+ * representation of a peer. One day we might untangle this coupling
+ * and use a new Glean DPO.
+ */
+ struct
+ {
+ ip46_address_t receive_addr;
+ } glean;
+ } sub_type;
+
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+
+ /* Rewrite in second/third cache lines */
+ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
+
+ /**
+ * more control plane members that do not fit on the first cacheline
+ */
+ /**
+ * A sorted vector of delegates
+ */
+ struct adj_delegate_t_ *ia_delegates;
+
+} ip_adjacency_t;
+
+STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0),
+ "IP adjacency cachline 0 is not offset");
+STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) ==
+ CLIB_CACHE_LINE_BYTES),
+ "IP adjacency cachline 1 is more than one cachline size offset");
+
+/**
+ * @brief
+ * Take a reference counting lock on the adjacency
+ */
+extern void adj_lock(adj_index_t adj_index);
+/**
+ * @brief
+ * Release a reference counting lock on the adjacency
+ */
+extern void adj_unlock(adj_index_t adj_index);
+
+/**
+ * @brief
+ * Add a child dependent to an adjacency. The child will
+ * thus be informed via its registerd back-walk function
+ * when the adjacency state changes.
+ */
+extern u32 adj_child_add(adj_index_t adj_index,
+ fib_node_type_t type,
+ fib_node_index_t child_index);
+/**
+ * @brief
+ * Remove a child dependent
+ */
+extern void adj_child_remove(adj_index_t adj_index,
+ u32 sibling_index);
+
+/**
+ * @brief Walk the Adjacencies on a given interface
+ */
+extern void adj_walk (u32 sw_if_index,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern vnet_link_t adj_get_link_type (adj_index_t ai);
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+extern u32 adj_get_sw_if_index (adj_index_t ai);
+
+/**
+ * @brief Return true if the adjacency is 'UP', i.e. can be used for forwarding.
+ * 0 is down, !0 is up.
+ */
+extern int adj_is_up (adj_index_t ai);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern const u8* adj_get_rewrite (adj_index_t ai);
+
+/**
+ * @brief Notify the adjacency subsystem that the features settings for
+ * an interface have changed
+ */
+extern void adj_feature_update (u32 sw_if_index, u8 arc_index, u8 is_enable);
+
+/**
+ * @brief
+ * The global adjacnecy pool. Exposed for fast/inline data-plane access
+ */
+extern ip_adjacency_t *adj_pool;
+
+/**
+ * @brief
+ * Adjacency packet counters
+ */
+extern vlib_combined_counter_main_t adjacency_counters;
+
+/**
+ * @brief Global Config for enabling per-adjacency counters
+ * This is configurable because it comes with a non-negligible
+ * performance cost. */
+extern int adj_per_adj_counters;
+
+/**
+ * @brief
+ * Get a pointer to an adjacency object from its index
+ */
+static inline ip_adjacency_t *
+adj_get (adj_index_t adj_index)
+{
+ return (vec_elt_at_index(adj_pool, adj_index));
+}
+
+/**
+ * @brief Get the global configuration option for enabling per-adj counters
+ */
+static inline int
+adj_are_counters_enabled (void)
+{
+ return (adj_per_adj_counters);
+}
+
+#endif
diff --git a/src/vnet/adj/adj_bfd.c b/src/vnet/adj/adj_bfd.c
new file mode 100644
index 00000000..3d294c46
--- /dev/null
+++ b/src/vnet/adj/adj_bfd.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/bfd/bfd_main.h>
+
+#include <vnet/adj/adj_delegate.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/fib/fib_walk.h>
+
+static adj_bfd_state_t
+adj_bfd_bfd_state_to_fib (bfd_state_e bstate)
+{
+ switch (bstate)
+ {
+ case BFD_STATE_up:
+ return (ADJ_BFD_STATE_UP);
+ case BFD_STATE_down:
+ case BFD_STATE_admin_down:
+ case BFD_STATE_init:
+ return (ADJ_BFD_STATE_DOWN);
+ }
+ return (ADJ_BFD_STATE_DOWN);
+}
+
+static void
+adj_bfd_update_walk (adj_index_t ai)
+{
+ /*
+ * initiate a backwalk of dependent children
+ * to notify of the state change of this adj.
+ */
+ fib_node_back_walk_ctx_t ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+ };
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &ctx);
+}
+
+/**
+ * @brief Callback function registered with BFD module to receive notifications
+ * of the CRUD of BFD sessions
+ * would be static but for the fact it's called from the unit-tests
+ */
+void
+adj_bfd_notify (bfd_listen_event_e event,
+ const bfd_session_t *session)
+{
+ const bfd_udp_key_t *key;
+ fib_protocol_t fproto;
+ adj_delegate_t *aed;
+ adj_index_t ai;
+
+ if (BFD_HOP_TYPE_SINGLE != session->hop_type)
+ {
+ /*
+ * multi-hop BFD sessions attach directly to the FIB entry
+ * single-hop adj to the associate adjacency.
+ */
+ return;
+ }
+
+ key = &session->udp.key;
+
+ fproto = (ip46_address_is_ip4 (&key->peer_addr) ?
+ FIB_PROTOCOL_IP4:
+ FIB_PROTOCOL_IP6);
+
+ /*
+ * find the adj that corresponds to the BFD session.
+ */
+ ai = adj_nbr_add_or_lock(fproto,
+ fib_proto_to_link(fproto),
+ &key->peer_addr,
+ key->sw_if_index);
+
+ switch (event)
+ {
+ case BFD_LISTEN_EVENT_CREATE:
+ /*
+ * The creation of a new session
+ */
+ if ((ADJ_INDEX_INVALID != ai) &&
+ (aed = adj_delegate_get(adj_get(ai),
+ ADJ_DELEGATE_BFD)))
+ {
+ /*
+ * already got state for this adj
+ */
+ }
+ else
+ {
+ /*
+ * lock the adj. add the delegate.
+ * Lockinging the adj prevents it being removed and thus maintains
+ * the BFD derived states
+ */
+ adj_lock(ai);
+
+ aed = adj_delegate_find_or_add(adj_get(ai), ADJ_DELEGATE_BFD);
+
+ /*
+ * pretend the session is up and skip the walk.
+ * If we set it down then we get traffic loss on new children.
+ * if we walk then we lose traffic for existing children. Wait
+ * for the first BFD UP/DOWN before we let the session's state
+ * influence forwarding.
+ */
+ aed->ad_bfd_state = ADJ_BFD_STATE_UP;
+ aed->ad_bfd_index = session->bs_idx;
+ }
+ break;
+
+ case BFD_LISTEN_EVENT_UPDATE:
+ /*
+ * state change up/dowm and
+ */
+ aed = adj_delegate_get(adj_get(ai), ADJ_DELEGATE_BFD);
+
+ if (NULL != aed)
+ {
+ aed->ad_bfd_state = adj_bfd_bfd_state_to_fib(session->local_state);
+ adj_bfd_update_walk(ai);
+ }
+ /*
+ * else
+ * not an adj with BFD state
+ */
+ break;
+
+ case BFD_LISTEN_EVENT_DELETE:
+ /*
+ * session has been removed.
+ */
+
+ if (adj_delegate_get(adj_get(ai), ADJ_DELEGATE_BFD))
+ {
+ /*
+ * has an associated BFD tracking delegate
+ * remove the BFD tracking deletgate, update children, then
+ * unlock the adj
+ */
+ adj_delegate_remove(adj_get(ai), ADJ_DELEGATE_BFD);
+
+ adj_bfd_update_walk(ai);
+ adj_unlock(ai);
+ }
+ /*
+ * else
+ * no BFD associated state
+ */
+ break;
+ }
+
+ /*
+ * unlock match of the add-or-lock at the start
+ */
+ adj_unlock(ai);
+}
+
+static clib_error_t *
+adj_bfd_main_init (vlib_main_t * vm)
+{
+ clib_error_t * error = NULL;
+
+ if ((error = vlib_call_init_function (vm, bfd_main_init)))
+ return (error);
+
+ bfd_register_listener(adj_bfd_notify);
+
+ return (error);
+}
+
+VLIB_INIT_FUNCTION (adj_bfd_main_init);
diff --git a/src/vnet/adj/adj_delegate.c b/src/vnet/adj/adj_delegate.c
new file mode 100644
index 00000000..701b36e2
--- /dev/null
+++ b/src/vnet/adj/adj_delegate.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_delegate.h>
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+
+static adj_delegate_t *
+adj_delegate_find_i (const ip_adjacency_t *adj,
+ adj_delegate_type_t type,
+ u32 *index)
+{
+ adj_delegate_t *delegate;
+ int ii;
+
+ ii = 0;
+ vec_foreach(delegate, adj->ia_delegates)
+ {
+ if (delegate->ad_type == type)
+ {
+ if (NULL != index)
+ *index = ii;
+
+ return (delegate);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+
+ return (NULL);
+}
+
+adj_delegate_t *
+adj_delegate_get (const ip_adjacency_t *adj,
+ adj_delegate_type_t type)
+{
+ return (adj_delegate_find_i(adj, type, NULL));
+}
+
+void
+adj_delegate_remove (ip_adjacency_t *adj,
+ adj_delegate_type_t type)
+{
+ adj_delegate_t *aed;
+ u32 index = ~0;
+
+ aed = adj_delegate_find_i(adj, type, &index);
+
+ ASSERT(NULL != aed);
+
+ vec_del1(adj->ia_delegates, index);
+}
+
+static int
+adj_delegate_cmp_for_sort (void * v1,
+ void * v2)
+{
+ adj_delegate_t *delegate1 = v1, *delegate2 = v2;
+
+ return (delegate1->ad_type - delegate2->ad_type);
+}
+
+static void
+adj_delegate_init (ip_adjacency_t *adj,
+ adj_delegate_type_t type)
+
+{
+ adj_delegate_t delegate = {
+ .ad_adj_index = adj_get_index(adj),
+ .ad_type = type,
+ };
+
+ vec_add1(adj->ia_delegates, delegate);
+ vec_sort_with_function(adj->ia_delegates,
+ adj_delegate_cmp_for_sort);
+}
+
+adj_delegate_t *
+adj_delegate_find_or_add (ip_adjacency_t *adj,
+ adj_delegate_type_t adt)
+{
+ adj_delegate_t *delegate;
+
+ delegate = adj_delegate_get(adj, adt);
+
+ if (NULL == delegate)
+ {
+ adj_delegate_init(adj, adt);
+ }
+
+ return (adj_delegate_get(adj, adt));
+}
+
+/**
+ * typedef for printing a delegate
+ */
+typedef u8 * (*adj_delegate_format_t)(const adj_delegate_t *aed,
+ u8 *s);
+
+/**
+ * Print a delegate that represents BFD tracking
+ */
+static u8 *
+adj_delegate_fmt_bfd (const adj_delegate_t *aed,
+ u8 *s)
+{
+ s = format(s, "BFD:[state:%d index:%d]",
+ aed->ad_bfd_state,
+ aed->ad_bfd_index);
+
+ return (s);
+}
+
+/**
+ * A delegate type to formatter map
+ */
+static adj_delegate_format_t aed_formatters[] =
+{
+ [ADJ_DELEGATE_BFD] = adj_delegate_fmt_bfd,
+};
+
+u8 *
+format_adj_deletegate (u8 * s, va_list * args)
+{
+ adj_delegate_t *aed;
+
+ aed = va_arg (*args, adj_delegate_t *);
+
+ return (aed_formatters[aed->ad_type](aed, s));
+}
diff --git a/src/vnet/adj/adj_delegate.h b/src/vnet/adj/adj_delegate.h
new file mode 100644
index 00000000..17651203
--- /dev/null
+++ b/src/vnet/adj/adj_delegate.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_DELEGATE_T__
+#define __ADJ_DELEGATE_T__
+
+#include <vnet/adj/adj.h>
+
+/**
+ * Delegate types
+ */
+typedef enum adj_delegate_type_t_ {
+ /**
+ * BFD session state
+ */
+ ADJ_DELEGATE_BFD,
+} adj_delegate_type_t;
+
+#define FOR_EACH_ADJ_DELEGATE(_adj, _adt, _aed, _body) \
+{ \
+ for (_adt = ADJ_DELEGATE_BFD; \
+ _adt <= ADJ_DELEGATE_BFD; \
+ _adt++) \
+ { \
+ _aed = adj_delegate_get(_adj, _adt); \
+ if (NULL != _aed) { \
+ _body; \
+ } \
+ } \
+}
+
+/**
+ * Distillation of the BFD session states into a go/no-go for using
+ * the associated tracked adjacency
+ */
+typedef enum adj_bfd_state_t_
+{
+ ADJ_BFD_STATE_DOWN,
+ ADJ_BFD_STATE_UP,
+} adj_bfd_state_t;
+
+/**
+ * A Delagate is a means to implement the Delagation design pattern;
+ * the extension of an object's functionality through the composition of,
+ * and delgation to, other objects.
+ * These 'other' objects are delegates. Delagates are thus attached to
+ * ADJ objects to extend their functionality.
+ */
+typedef struct adj_delegate_t_
+{
+ /**
+ * The ADJ entry object to which the delagate is attached
+ */
+ adj_index_t ad_adj_index;
+
+ /**
+ * The delagate type
+ */
+ adj_delegate_type_t ad_type;
+
+ /**
+ * A union of data for the different delegate types
+ */
+ union
+ {
+ /**
+ * BFD delegate daa
+ */
+ struct {
+ /**
+ * BFD session state
+ */
+ adj_bfd_state_t ad_bfd_state;
+ /**
+ * BFD session index
+ */
+ u32 ad_bfd_index;
+ };
+ };
+} adj_delegate_t;
+
+extern void adj_delegate_remove(ip_adjacency_t *adj,
+ adj_delegate_type_t type);
+
+extern adj_delegate_t *adj_delegate_find_or_add(ip_adjacency_t *adj,
+ adj_delegate_type_t fdt);
+extern adj_delegate_t *adj_delegate_get(const ip_adjacency_t *adj,
+ adj_delegate_type_t type);
+
+extern u8 *format_adj_deletegate(u8 * s, va_list * args);
+
+#endif
diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c
new file mode 100644
index 00000000..8d86e2a9
--- /dev/null
+++ b/src/vnet/adj/adj_glean.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/fib/fib_walk.h>
+
+/*
+ * The 'DB' of all glean adjs.
+ * There is only one glean per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static adj_index_t *adj_gleans[FIB_PROTOCOL_MAX];
+
+static inline vlib_node_registration_t*
+adj_get_glean_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (&ip4_glean_node);
+ case FIB_PROTOCOL_IP6:
+ return (&ip6_glean_node);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (NULL);
+}
+
+/*
+ * adj_glean_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The glean adj is added to an interface's connected prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+adj_index_t
+adj_glean_add_or_lock (fib_protocol_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr)
+{
+ ip_adjacency_t * adj;
+
+ vec_validate_init_empty(adj_gleans[proto], sw_if_index, ADJ_INDEX_INVALID);
+
+ if (ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ {
+ adj = adj_alloc(proto);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_GLEAN;
+ adj->ia_nh_proto = proto;
+ adj_gleans[proto][sw_if_index] = adj_get_index(adj);
+
+ if (NULL != nh_addr)
+ {
+ adj->sub_type.glean.receive_addr = *nh_addr;
+ }
+
+ adj->rewrite_header.data_bytes = 0;
+
+ vnet_rewrite_for_sw_interface(vnet_get_main(),
+ adj_fib_proto_2_nd(proto),
+ sw_if_index,
+ adj_get_glean_node(proto)->index,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+ }
+ else
+ {
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+ }
+
+ adj_lock(adj_get_index(adj));
+
+ return (adj_get_index(adj));
+}
+
+void
+adj_glean_remove (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ ASSERT(sw_if_index < vec_len(adj_gleans[proto]));
+
+ adj_gleans[proto][sw_if_index] = ADJ_INDEX_INVALID;
+}
+
+static clib_error_t *
+adj_glean_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_gleans[proto]) ||
+ ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_glean_interface_state_change);
+
+/**
+ * @brief Invoked on each SW interface of a HW interface when the
+ * HW interface state changes
+ */
+static void
+adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ void *arg)
+{
+ adj_glean_interface_state_change(vnm, sw_if_index, (uword) arg);
+}
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+adj_glean_hw_interface_state_change (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ /*
+ * walk SW interfaces on the HW
+ */
+ uword sw_flags;
+
+ sw_flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP :
+ 0);
+
+ vnet_hw_interface_walk_sw(vnm, hw_if_index,
+ adj_nbr_hw_sw_interface_state_change,
+ (void*) sw_flags);
+
+ return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+ adj_glean_hw_interface_state_change);
+
+static clib_error_t *
+adj_glean_interface_delete (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_gleans[proto]) ||
+ ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_gleans[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_glean_interface_delete);
+
+u8*
+format_adj_glean (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ return (format(s, "%U-glean: %U",
+ format_fib_protocol, adj->ia_nh_proto,
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ adj->rewrite_header.sw_if_index)));
+}
+
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_glean_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_glean,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a glean
+ * object.
+ *
+ * this means that these graph nodes are ones from which a glean is the
+ * parent object in the DPO-graph.
+ */
+const static char* const glean_ip4_nodes[] =
+{
+ "ip4-glean",
+ NULL,
+};
+const static char* const glean_ip6_nodes[] =
+{
+ "ip6-glean",
+ NULL,
+};
+
+const static char* const * const glean_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = glean_ip4_nodes,
+ [DPO_PROTO_IP6] = glean_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+adj_glean_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_GLEAN, &adj_glean_dpo_vft, glean_nodes);
+}
diff --git a/src/vnet/adj/adj_glean.h b/src/vnet/adj/adj_glean.h
new file mode 100644
index 00000000..640bd2f9
--- /dev/null
+++ b/src/vnet/adj/adj_glean.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief Glean Adjacency
+ *
+ * A gleean adjacency represent the need to discover new peers on an
+ * attached link. Packets that hit a glean adjacency will generate an
+ * ARP/ND packet addessesed to the packet's destination address.
+ * Note this is different to an incomplete neighbour adjacency, which
+ * does not send ARP/ND requests to the packet's destination address,
+ * but instead to the next-hop address of the adjacency itself.
+ */
+
+#ifndef __ADJ_GLEAN_H__
+#define __ADJ_GLEAN_H__
+
+#include <vnet/adj/adj_types.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing glean adjacency
+ *
+ * @param proto
+ * The protocol for the neighbours that we wish to glean
+ *
+ * @param sw_if_index
+ * The interface on which to glean
+ *
+ * @param nh_addr
+ * the address applied to the interface on which to glean. This
+ * as the source address in packets when the ARP/ND packet is sent
+ */
+extern adj_index_t adj_glean_add_or_lock(fib_protocol_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr);
+
+/**
+ * @brief Format/display a glean adjacency.
+ */
+extern u8* format_adj_glean(u8* s, va_list *ap);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_glean_module_init(void);
+
+#endif
diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h
new file mode 100644
index 00000000..2c123c54
--- /dev/null
+++ b/src/vnet/adj/adj_internal.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_INTERNAL_H__
+#define __ADJ_INTERNAL_H__
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/adj/adj_l2.h>
+#include <vnet/adj/adj_nsh.h>
+
+/**
+ * big switch to turn on Adjacency debugging
+ */
+#undef ADJ_DEBUG
+
+/*
+ * Debug macro
+ */
+#ifdef ADJ_DEBUG
+#define ADJ_DBG(_adj, _fmt, _args...) \
+{ \
+ clib_warning("adj:[%d:%p]:" _fmt, \
+ _adj - adj_pool, _adj, \
+ ##_args); \
+}
+#else
+#define ADJ_DBG(_e, _fmt, _args...)
+#endif
+
+static inline u32
+adj_get_rewrite_node (vnet_link_t linkt)
+{
+ switch (linkt) {
+ case VNET_LINK_IP4:
+ return (ip4_rewrite_node.index);
+ case VNET_LINK_IP6:
+ return (ip6_rewrite_node.index);
+ case VNET_LINK_MPLS:
+ return (mpls_output_node.index);
+ case VNET_LINK_ETHERNET:
+ return (adj_l2_rewrite_node.index);
+ case VNET_LINK_NSH:
+ return (adj_nsh_rewrite_node.index);
+ case VNET_LINK_ARP:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+static inline vnet_link_t
+adj_fib_proto_2_nd (fib_protocol_t fp)
+{
+ switch (fp)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (VNET_LINK_ARP);
+ case FIB_PROTOCOL_IP6:
+ return (VNET_LINK_IP6);
+ case FIB_PROTOCOL_MPLS:
+ return (VNET_LINK_MPLS);
+ }
+ return (0);
+}
+
+/**
+ * @brief
+ * Get a pointer to an adjacency object from its index
+ */
+static inline adj_index_t
+adj_get_index (ip_adjacency_t *adj)
+{
+ return (adj - adj_pool);
+}
+
+extern void adj_nbr_update_rewrite_internal(ip_adjacency_t *adj,
+ ip_lookup_next_t adj_next_index,
+ u32 complete_next_index,
+ u32 next_index,
+ u8 *rewrite);
+extern void adj_midchain_setup(adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags);
+
+extern ip_adjacency_t * adj_alloc(fib_protocol_t proto);
+
+extern void adj_nbr_remove(adj_index_t ai,
+ fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index);
+extern void adj_glean_remove(fib_protocol_t proto,
+ u32 sw_if_index);
+extern void adj_mcast_remove(fib_protocol_t proto,
+ u32 sw_if_index);
+
+#endif
diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c
new file mode 100644
index 00000000..20d70dd4
--- /dev/null
+++ b/src/vnet/adj/adj_l2.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_l2.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+
+/**
+ * @brief Trace data for a L2 Midchain
+ */
+typedef struct adj_l2_trace_t_ {
+ /** Adjacency index taken. */
+ u32 adj_index;
+} adj_l2_trace_t;
+
+static u8 *
+format_adj_l2_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ adj_l2_trace_t * t = va_arg (*args, adj_l2_trace_t *);
+
+ s = format (s, "adj-idx %d : %U",
+ t->adj_index,
+ format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
+ return s;
+}
+
+typedef enum adj_l2_rewrite_next_t_
+{
+ ADJ_L2_REWRITE_NEXT_DROP,
+} adj_l2_rewrite_next_t;
+
+always_inline uword
+adj_l2_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_midchain)
+{
+ u32 * from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, * to_next, next_index;
+ u32 thread_index = vlib_get_thread_index();
+ ethernet_main_t * em = &ethernet_main;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ char *h0;
+ u32 pi0, rw_len0, adj_index0, next0 = 0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ h0 = vlib_buffer_get_current (p0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get (adj_index0);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], h0,
+ sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] = adj0->rewrite_header.sw_if_index;
+
+ vlib_increment_combined_counter(&adjacency_counters,
+ thread_index,
+ adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ if (PREDICT_TRUE((vlib_buffer_length_in_chain (vm, p0) <=
+ adj0[0].rewrite_header.max_l3_packet_bytes)))
+ {
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ /*
+ * Follow the feature ARC. this will result eventually in
+ * the midchain-tx node
+ */
+ vnet_feature_arc_start(em->output_feature_arc_index, tx_sw_if_index0, &next0, p0);
+ }
+ else
+ {
+ /* can't fragment L2 */
+ next0 = ADJ_L2_REWRITE_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_l2_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+adj_l2_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_l2_rewrite_inline (vm, node, frame, 0);
+}
+
+static uword
+adj_l2_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_l2_rewrite_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (adj_l2_rewrite_node) = {
+ .function = adj_l2_rewrite,
+ .name = "adj-l2-rewrite",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_l2_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_L2_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_l2_rewrite_node, adj_l2_rewrite)
+
+VLIB_REGISTER_NODE (adj_l2_midchain_node) = {
+ .function = adj_l2_midchain,
+ .name = "adj-l2-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_l2_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_L2_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_l2_midchain_node, adj_l2_midchain)
diff --git a/src/vnet/adj/adj_l2.h b/src/vnet/adj/adj_l2.h
new file mode 100644
index 00000000..3aa1c74b
--- /dev/null
+++ b/src/vnet/adj/adj_l2.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_L2_H__
+#define __ADJ_L2_H__
+
+#include <vnet/adj/adj.h>
+
+extern vlib_node_registration_t adj_l2_midchain_node;
+extern vlib_node_registration_t adj_l2_rewrite_node;
+
+#endif
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
new file mode 100644
index 00000000..da06cd00
--- /dev/null
+++ b/src/vnet/adj/adj_mcast.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/ip/ip.h>
+
+/*
+ * The 'DB' of all mcast adjs.
+ * There is only one mcast per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static adj_index_t *adj_mcasts[FIB_PROTOCOL_MAX];
+
+static u32
+adj_get_mcast_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_rewrite_mcast_node.index);
+ case FIB_PROTOCOL_IP6:
+ return (ip6_rewrite_mcast_node.index);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+/*
+ * adj_mcast_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The mcast adj is added to an interface's connected prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+adj_index_t
+adj_mcast_add_or_lock (fib_protocol_t proto,
+ vnet_link_t link_type,
+ u32 sw_if_index)
+{
+ ip_adjacency_t * adj;
+
+ vec_validate_init_empty(adj_mcasts[proto], sw_if_index, ADJ_INDEX_INVALID);
+
+ if (ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index])
+ {
+ vnet_main_t *vnm;
+
+ vnm = vnet_get_main();
+ adj = adj_alloc(proto);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_MCAST;
+ adj->ia_nh_proto = proto;
+ adj->ia_link = link_type;
+ adj_mcasts[proto][sw_if_index] = adj_get_index(adj);
+ adj_lock(adj_get_index(adj));
+
+ vnet_rewrite_init(vnm, sw_if_index,
+ adj_get_mcast_node(proto),
+ vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
+ &adj->rewrite_header);
+
+ /*
+ * we need a rewrite where the destination IP address is converted
+ * to the appropriate link-layer address. This is interface specific.
+ * So ask the interface to do it.
+ */
+ vnet_update_adjacency_for_sw_interface(vnm, sw_if_index,
+ adj_get_index(adj));
+ }
+ else
+ {
+ adj = adj_get(adj_mcasts[proto][sw_if_index]);
+ adj_lock(adj_get_index(adj));
+ }
+
+ return (adj_get_index(adj));
+}
+
+/**
+ * adj_mcast_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_mcast_update_rewrite (adj_index_t adj_index,
+ u8 *rewrite,
+ u8 offset,
+ u32 mask)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * update the adj's rewrite string and build the arc
+ * from the rewrite node to the interface's TX node
+ */
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST,
+ adj_get_mcast_node(adj->ia_nh_proto),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+ /*
+ * set the fields corresponding to the mcast IP address rewrite
+ * The mask must be stored in network byte order, since the packet's
+ * IP address will also be in network order.
+ */
+ adj->rewrite_header.dst_mcast_offset = offset;
+ adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask);
+}
+
+/**
+ * adj_mcast_midchain_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_mcast_midchain_update_rewrite (adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags,
+ u8 *rewrite,
+ u8 offset,
+ u32 mask)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * one time only update. since we don't support chainging the tunnel
+ * src,dst, this is all we need.
+ */
+ ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_MCAST);
+ /*
+ * tunnels can always provide a rewrite.
+ */
+ ASSERT(NULL != rewrite);
+
+ adj_midchain_setup(adj_index, fixup, flags);
+
+ /*
+ * update the adj's rewrite string and build the arc
+ * from the rewrite node to the interface's TX node
+ */
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST_MIDCHAIN,
+ adj_get_mcast_node(adj->ia_nh_proto),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+
+ /*
+ * set the fields corresponding to the mcast IP address rewrite
+ * The mask must be stored in network byte order, since the packet's
+ * IP address will also be in network order.
+ */
+ adj->rewrite_header.dst_mcast_offset = offset;
+ adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask);
+}
+
+void
+adj_mcast_remove (fib_protocol_t proto,
+ u32 sw_if_index)
+{
+ ASSERT(sw_if_index < vec_len(adj_mcasts[proto]));
+
+ adj_mcasts[proto][sw_if_index] = ADJ_INDEX_INVALID;
+}
+
+static clib_error_t *
+adj_mcast_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ /*
+ * for each mcast on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_mcasts[proto]) ||
+ ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_mcasts[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_mcast_interface_state_change);
+
+/**
+ * @brief Invoked on each SW interface of a HW interface when the
+ * HW interface state changes
+ */
+static void
+adj_mcast_hw_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ void *arg)
+{
+ adj_mcast_interface_state_change(vnm, sw_if_index, (uword) arg);
+}
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+adj_mcast_hw_interface_state_change (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ /*
+ * walk SW interfaces on the HW
+ */
+ uword sw_flags;
+
+ sw_flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP :
+ 0);
+
+ vnet_hw_interface_walk_sw(vnm, hw_if_index,
+ adj_mcast_hw_sw_interface_state_change,
+ (void*) sw_flags);
+
+ return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+ adj_mcast_hw_interface_state_change);
+
+static clib_error_t *
+adj_mcast_interface_delete (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ /*
+ * for each mcast on the interface trigger a walk back to the children
+ */
+ fib_protocol_t proto;
+ ip_adjacency_t *adj;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ if (sw_if_index >= vec_len(adj_mcasts[proto]) ||
+ ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index])
+ continue;
+
+ adj = adj_get(adj_mcasts[proto][sw_if_index]);
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_mcast_interface_delete);
+
+/**
+ * @brief Walk the multicast Adjacencies on a given interface
+ */
+void
+adj_mcast_walk (u32 sw_if_index,
+ fib_protocol_t proto,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (vec_len(adj_mcasts[proto]) > sw_if_index)
+ {
+ if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index])
+ {
+ cb(adj_mcasts[proto][sw_if_index], ctx);
+ }
+ }
+}
+
+u8*
+format_adj_mcast (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format(s, "%U-mcast: ",
+ format_fib_protocol, adj->ia_nh_proto);
+ if (adj->rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)
+ s = format(s, "[features] ");
+ s = format (s, "%U",
+ format_vnet_rewrite,
+ &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
+
+ return (s);
+}
+
+u8*
+format_adj_mcast_midchain (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format(s, "%U-mcast-midchain: ",
+ format_fib_protocol, adj->ia_nh_proto);
+ s = format (s, "%U",
+ format_vnet_rewrite,
+ vnm->vlib_main, &adj->rewrite_header,
+ sizeof (adj->rewrite_data), 0);
+ s = format (s, "\n%Ustacked-on:\n%U%U",
+ format_white_space, indent,
+ format_white_space, indent+2,
+ format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
+
+ return (s);
+}
+
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_mcast_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_mcast,
+};
+const static dpo_vft_t adj_mcast_midchain_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_mcast_midchain,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a mcast
+ * object.
+ *
+ * this means that these graph nodes are ones from which a mcast is the
+ * parent object in the DPO-graph.
+ */
+const static char* const adj_mcast_ip4_nodes[] =
+{
+ "ip4-rewrite-mcast",
+ NULL,
+};
+const static char* const adj_mcast_ip6_nodes[] =
+{
+ "ip6-rewrite-mcast",
+ NULL,
+};
+
+const static char* const * const adj_mcast_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = adj_mcast_ip4_nodes,
+ [DPO_PROTO_IP6] = adj_mcast_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a mcast
+ * object.
+ *
+ * this means that these graph nodes are ones from which a mcast is the
+ * parent object in the DPO-graph.
+ */
+const static char* const adj_mcast_midchain_ip4_nodes[] =
+{
+ "ip4-mcast-midchain",
+ NULL,
+};
+const static char* const adj_mcast_midchain_ip6_nodes[] =
+{
+ "ip6-mcast-midchain",
+ NULL,
+};
+
+const static char* const * const adj_mcast_midchain_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = adj_mcast_midchain_ip4_nodes,
+ [DPO_PROTO_IP6] = adj_mcast_midchain_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+/**
+ * @brief Return the size of the adj DB.
+ * This is only for testing purposes so an efficient implementation is not needed
+ */
+u32
+adj_mcast_db_size (void)
+{
+ u32 n_adjs, sw_if_index;
+ fib_protocol_t proto;
+
+ n_adjs = 0;
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ for (sw_if_index = 0;
+ sw_if_index < vec_len(adj_mcasts[proto]);
+ sw_if_index++)
+ {
+ if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index])
+ {
+ n_adjs++;
+ }
+ }
+ }
+
+ return (n_adjs);
+}
+
+void
+adj_mcast_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_MCAST,
+ &adj_mcast_dpo_vft,
+ adj_mcast_nodes);
+ dpo_register(DPO_ADJACENCY_MCAST_MIDCHAIN,
+ &adj_mcast_midchain_dpo_vft,
+ adj_mcast_midchain_nodes);
+}
diff --git a/src/vnet/adj/adj_mcast.h b/src/vnet/adj/adj_mcast.h
new file mode 100644
index 00000000..bfb0d6f6
--- /dev/null
+++ b/src/vnet/adj/adj_mcast.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief Mcast Adjacency
+ *
+ * The multicast adjacency forwards IP traffic on an interface toward a multicast
+ * group address. This is a different type of adjacency to a unicast adjacency
+ * since the application of the MAC header is different, and so the VLIB node
+ * visited is also different. DPO types have different VLIB nodes.
+ */
+
+#ifndef __ADJ_MCAST_H__
+#define __ADJ_MCAST_H__
+
+#include <vnet/adj/adj_types.h>
+#include <vnet/adj/adj_midchain.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing mcast adjacency
+ *
+ * @param proto
+ * The protocol for the neighbours that we wish to mcast
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param sw_if_index
+ * The interface on which to mcast
+ */
+extern adj_index_t adj_mcast_add_or_lock(fib_protocol_t proto,
+ vnet_link_t link_type,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Update the rewrite string for an existing adjacecny.
+ *
+ * @param
+ * The index of the adj to update
+ *
+ * @param
+ * The new rewrite
+ *
+ * @param
+ * The offset in the rewrite a which to write in packet's
+ * IP Address
+ *
+ * @param
+ * The mask to apply to the packet berfore the rewrite.
+ */
+extern void adj_mcast_update_rewrite(adj_index_t adj_index,
+ u8 *rewrite,
+ u8 offset,
+ u32 mask);
+
+/**
+ * @brief
+ * Update the rewrite string for an existing adjacecny and
+ * Convert the adjacency into a midchain
+ *
+ * @param
+ * The index of the adj to update
+ *
+ * @param
+ * The new rewrite
+ */
+extern void adj_mcast_midchain_update_rewrite(adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags,
+ u8 *rewrite,
+ u8 offset,
+ u32 mask);
+/**
+ * @brief Walk the multicast Adjacencies on a given interface
+ */
+extern void adj_mcast_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Format/display a mcast adjacency.
+ */
+extern u8* format_adj_mcast(u8* s, va_list *ap);
+extern u8* format_adj_mcast_midchain(u8* s, va_list *ap);
+
+/**
+ * @brief Get the sze of the mcast adj DB. Test purposes only.
+ */
+extern u32 adj_mcast_db_size(void);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_mcast_module_init(void);
+
+#endif
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
new file mode 100644
index 00000000..e9a510b0
--- /dev/null
+++ b/src/vnet/adj/adj_midchain.c
@@ -0,0 +1,666 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/adj/adj_l2.h>
+#include <vnet/adj/adj_nsh.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/fib/fib_walk.h>
+
+/**
+ * The two midchain tx feature node indices
+ */
+static u32 adj_midchain_tx_feature_node[VNET_LINK_NUM];
+static u32 adj_midchain_tx_no_count_feature_node[VNET_LINK_NUM];
+
+/**
+ * @brief Trace data for packets traversing the midchain tx node
+ */
+typedef struct adj_midchain_tx_trace_t_
+{
+ /**
+ * @brief the midchain adj we are traversing
+ */
+ adj_index_t ai;
+} adj_midchain_tx_trace_t;
+
+always_inline uword
+adj_midchain_tx_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int interface_count)
+{
+ u32 * from, * to_next, n_left_from, n_left_to_next;
+ u32 next_index;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ u32 thread_index = vm->thread_index;
+
+ /* Vector of buffer / pkt indices we're supposed to process */
+ from = vlib_frame_vector_args (frame);
+
+ /* Number of buffers / pkts */
+ n_left_from = frame->n_vectors;
+
+ /* Speculatively send the first buffer to the last disposition we used */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ /* set up to enqueue to our disposition with index = next_index */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next > 4)
+ {
+ u32 bi0, adj_index0, next0;
+ const ip_adjacency_t * adj0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+ u32 bi1, adj_index1, next1;
+ const ip_adjacency_t * adj1;
+ const dpo_id_t *dpo1;
+ vlib_buffer_t * b1;
+ u32 bi2, adj_index2, next2;
+ const ip_adjacency_t * adj2;
+ const dpo_id_t *dpo2;
+ vlib_buffer_t * b2;
+ u32 bi3, adj_index3, next3;
+ const ip_adjacency_t * adj3;
+ const dpo_id_t *dpo3;
+ vlib_buffer_t * b3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p4, * p5;
+ vlib_buffer_t * p6, * p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+ }
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ bi1 = from[1];
+ to_next[1] = bi1;
+ bi2 = from[2];
+ to_next[2] = bi2;
+ bi3 = from[3];
+ to_next[3] = bi3;
+
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer(vm, bi0);
+ b1 = vlib_get_buffer(vm, bi1);
+ b2 = vlib_get_buffer(vm, bi2);
+ b3 = vlib_get_buffer(vm, bi3);
+
+ /* Follow the DPO on which the midchain is stacked */
+ adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+ adj_index2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX];
+ adj_index3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get(adj_index0);
+ adj1 = adj_get(adj_index1);
+ adj2 = adj_get(adj_index2);
+ adj3 = adj_get(adj_index3);
+
+ dpo0 = &adj0->sub_type.midchain.next_dpo;
+ dpo1 = &adj1->sub_type.midchain.next_dpo;
+ dpo2 = &adj2->sub_type.midchain.next_dpo;
+ dpo3 = &adj3->sub_type.midchain.next_dpo;
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+ next2 = dpo2->dpoi_next_node;
+ next3 = dpo3->dpoi_next_node;
+
+ vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer(b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+ vnet_buffer(b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+
+ if (interface_count)
+ {
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj0->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj1->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b1));
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj2->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b2));
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj3->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b3));
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->ai = adj_index0;
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->ai = adj_index1;
+ }
+ if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b2, sizeof (*tr));
+ tr->ai = adj_index2;
+ }
+ if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b3, sizeof (*tr));
+ tr->ai = adj_index3;
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, adj_index0, next0;
+ const ip_adjacency_t * adj0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer(vm, bi0);
+
+ /* Follow the DPO on which the midchain is stacked */
+ adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get(adj_index0);
+ dpo0 = &adj0->sub_type.midchain.next_dpo;
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (interface_count)
+ {
+ vlib_increment_combined_counter (im->combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ thread_index,
+ adj0->rewrite_header.sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->ai = adj_index0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_adj_midchain_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ adj_midchain_tx_trace_t *tr = va_arg (*args, adj_midchain_tx_trace_t*);
+
+ s = format(s, "adj-midchain:[%d]:%U", tr->ai,
+ format_ip_adjacency, tr->ai,
+ FORMAT_IP_ADJACENCY_NONE);
+
+ return (s);
+}
+
+static uword
+adj_midchain_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (adj_midchain_tx_inline(vm, node, frame, 1));
+}
+
+VLIB_REGISTER_NODE (adj_midchain_tx_node, static) = {
+ .function = adj_midchain_tx,
+ .name = "adj-midchain-tx",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_midchain_tx_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+static uword
+adj_midchain_tx_no_count (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (adj_midchain_tx_inline(vm, node, frame, 0));
+}
+
+VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node, static) = {
+ .function = adj_midchain_tx_no_count,
+ .name = "adj-midchain-tx-no-count",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_midchain_tx_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VNET_FEATURE_INIT (adj_midchain_tx_ip4, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP4],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip4, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP4],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_ip6, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP6],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip6, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP6],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_mpls, static) = {
+ .arc_name = "mpls-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_MPLS],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_mpls, static) = {
+ .arc_name = "mpls-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("interface-output"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_MPLS],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_ethernet, static) = {
+ .arc_name = "ethernet-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_ETHERNET],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_ethernet, static) = {
+ .arc_name = "ethernet-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_ETHERNET],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_nsh, static) = {
+ .arc_name = "nsh-output",
+ .node_name = "adj-midchain-tx",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_NSH],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_nsh, static) = {
+ .arc_name = "nsh-output",
+ .node_name = "adj-midchain-tx-no-count",
+ .runs_before = VNET_FEATURES ("error-drop"),
+ .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_NSH],
+};
+
+static inline u32
+adj_get_midchain_node (vnet_link_t link)
+{
+ switch (link) {
+ case VNET_LINK_IP4:
+ return (ip4_midchain_node.index);
+ case VNET_LINK_IP6:
+ return (ip6_midchain_node.index);
+ case VNET_LINK_MPLS:
+ return (mpls_midchain_node.index);
+ case VNET_LINK_ETHERNET:
+ return (adj_l2_midchain_node.index);
+ case VNET_LINK_NSH:
+ return (adj_nsh_midchain_node.index);
+ case VNET_LINK_ARP:
+ break;
+ }
+ ASSERT(0);
+ return (0);
+}
+
+static u8
+adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
+{
+ u8 arc = (u8) ~0;
+ switch (adj->ia_link)
+ {
+ case VNET_LINK_IP4:
+ {
+ arc = ip4_main.lookup_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_IP6:
+ {
+ arc = ip6_main.lookup_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_MPLS:
+ {
+ arc = mpls_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_ETHERNET:
+ {
+ arc = ethernet_main.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_NSH:
+ {
+ arc = nsh_main_dummy.output_feature_arc_index;
+ break;
+ }
+ case VNET_LINK_ARP:
+ ASSERT(0);
+ break;
+ }
+
+ ASSERT (arc != (u8) ~0);
+
+ return (arc);
+}
+
+static u32
+adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
+{
+ return ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ?
+ adj_midchain_tx_no_count_node.index :
+ adj_midchain_tx_node.index);
+}
+
+static u32
+adj_nbr_midchain_get_feature_node (ip_adjacency_t *adj)
+{
+ if (adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT)
+ {
+ return (adj_midchain_tx_no_count_feature_node[adj->ia_link]);
+ }
+
+ return (adj_midchain_tx_feature_node[adj->ia_link]);
+}
+
+/**
+ * adj_midchain_setup
+ *
+ * Setup the adj as a mid-chain
+ */
+void
+adj_midchain_setup (adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags)
+{
+ u32 feature_index, tx_node;
+ ip_adjacency_t *adj;
+ u8 arc_index;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ adj->sub_type.midchain.fixup_func = fixup;
+ adj->ia_flags |= flags;
+
+ arc_index = adj_midchain_get_feature_arc_index_for_link_type (adj);
+ feature_index = adj_nbr_midchain_get_feature_node(adj);
+ tx_node = adj_nbr_midchain_get_tx_node(adj);
+
+ vnet_feature_enable_disable_with_index (arc_index, feature_index,
+ adj->rewrite_header.sw_if_index,
+ 1 /* enable */, 0, 0);
+
+ /*
+ * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
+ * The graph arc used/created here is from the midchain-tx node to the
+ * child's registered node. This is because post adj processing the next
+ * node are any output features, then the midchain-tx. from there we
+ * need to get to the stacked child's node.
+ */
+ dpo_stack_from_node(tx_node,
+ &adj->sub_type.midchain.next_dpo,
+ drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+}
+
+/**
+ * adj_nbr_midchain_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewrite is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * one time only update. since we don't support chainging the tunnel
+ * src,dst, this is all we need.
+ */
+ ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP);
+ /*
+ * tunnels can always provide a rewrite.
+ */
+ ASSERT(NULL != rewrite);
+
+ adj_midchain_setup(adj_index, fixup, flags);
+
+ /*
+ * update the rewirte with the workers paused.
+ */
+ adj_nbr_update_rewrite_internal(adj,
+ IP_LOOKUP_NEXT_MIDCHAIN,
+ adj_get_midchain_node(adj->ia_link),
+ adj_nbr_midchain_get_tx_node(adj),
+ rewrite);
+}
+
+/**
+ * adj_nbr_midchain_unstack
+ *
+ * Unstack the adj. stack it on drop
+ */
+void
+adj_nbr_midchain_unstack (adj_index_t adj_index)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ /*
+ * stack on the drop
+ */
+ dpo_stack(DPO_ADJACENCY_MIDCHAIN,
+ vnet_link_to_dpo_proto(adj->ia_link),
+ &adj->sub_type.midchain.next_dpo,
+ drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+ CLIB_MEMORY_BARRIER();
+}
+
+/**
+ * adj_nbr_midchain_stack
+ */
+void
+adj_nbr_midchain_stack (adj_index_t adj_index,
+ const dpo_id_t *next)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ ASSERT((IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index) ||
+ (IP_LOOKUP_NEXT_MCAST_MIDCHAIN == adj->lookup_next_index));
+
+ dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj),
+ &adj->sub_type.midchain.next_dpo,
+ next);
+}
+
+u8*
+format_adj_midchain (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ u32 indent = va_arg(*ap, u32);
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "%U", format_vnet_link, adj->ia_link);
+ s = format (s, " via %U ",
+ format_ip46_address, &adj->sub_type.nbr.next_hop);
+ s = format (s, " %U",
+ format_vnet_rewrite,
+ &adj->rewrite_header, sizeof (adj->rewrite_data), indent);
+ s = format (s, "\n%Ustacked-on:\n%U%U",
+ format_white_space, indent,
+ format_white_space, indent+2,
+ format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
+
+ return (s);
+}
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_midchain_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_midchain,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a midchain
+ * object.
+ *
+ * this means that these graph nodes are ones from which a midchain is the
+ * parent object in the DPO-graph.
+ */
+const static char* const midchain_ip4_nodes[] =
+{
+ "ip4-midchain",
+ NULL,
+};
+const static char* const midchain_ip6_nodes[] =
+{
+ "ip6-midchain",
+ NULL,
+};
+const static char* const midchain_mpls_nodes[] =
+{
+ "mpls-midchain",
+ NULL,
+};
+const static char* const midchain_ethernet_nodes[] =
+{
+ "adj-l2-midchain",
+ NULL,
+};
+const static char* const midchain_nsh_nodes[] =
+{
+ "adj-nsh-midchain",
+ NULL,
+};
+
+const static char* const * const midchain_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = midchain_ip4_nodes,
+ [DPO_PROTO_IP6] = midchain_ip6_nodes,
+ [DPO_PROTO_MPLS] = midchain_mpls_nodes,
+ [DPO_PROTO_ETHERNET] = midchain_ethernet_nodes,
+ [DPO_PROTO_NSH] = midchain_nsh_nodes,
+};
+
+void
+adj_midchain_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY_MIDCHAIN, &adj_midchain_dpo_vft, midchain_nodes);
+}
diff --git a/src/vnet/adj/adj_midchain.h b/src/vnet/adj/adj_midchain.h
new file mode 100644
index 00000000..27ca1d33
--- /dev/null
+++ b/src/vnet/adj/adj_midchain.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Midchain Adjacency sub-type. These adjs represent an L3 peer on a
+ * tunnel interface. The tunnel's adjacency is thus not the end of the chain,
+ * and needs to stack on/link to another chain (or portion of the graph) to
+ * reach the tunnel's destination.
+ */
+
+#ifndef __ADJ_MIDCHAIN_H__
+#define __ADJ_MIDCHAIN_H__
+
+#include <vnet/adj/adj.h>
+
+/**
+ * @brief
+ * Convert an existing neighbour adjacency into a midchain
+ *
+ * @param adj_index
+ * The index of the neighbour adjacency.
+ *
+ * @param post_rewrite_node
+ * The VLIB graph node that provides the post-encap fixup.
+ * where 'fixup' is e.g., correcting chksum, length, etc.
+ *
+ * @param rewrite
+ * The rewrite.
+ */
+extern void adj_nbr_midchain_update_rewrite(adj_index_t adj_index,
+ adj_midchain_fixup_t fixup,
+ adj_flags_t flags,
+ u8 *rewrite);
+
+/**
+ * @brief
+ * [re]stack a midchain. 'Stacking' is the act of forming parent-child
+ * relationships in the data-plane graph.
+ *
+ * @param adj_index
+ * The index of the midchain to stack
+ *
+ * @param dpo
+ * The parent DPO to stack onto (i.e. become a child of).
+ */
+extern void adj_nbr_midchain_stack(adj_index_t adj_index,
+ const dpo_id_t *dpo);
+
+/**
+ * @brief
+ * unstack a midchain. This will break the chain between the midchain and
+ * the next graph section. This is a implemented as stack-on-drop
+ *
+ * @param adj_index
+ * The index of the midchain to stack
+ */
+extern void adj_nbr_midchain_unstack(adj_index_t adj_index);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_midchain_module_init(void);
+
+/**
+ * @brief
+ * Format a midchain adjacency
+ */
+extern u8* format_adj_midchain(u8* s, va_list *ap);
+
+#endif
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
new file mode 100644
index 00000000..3d450d1f
--- /dev/null
+++ b/src/vnet/adj/adj_nbr.c
@@ -0,0 +1,1124 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/fib/fib_walk.h>
+
+/*
+ * Vector Hash tables of neighbour (traditional) adjacencies
+ * Key: interface(for the vector index), address (and its proto),
+ * link-type/ether-type.
+ */
+static BVT(clib_bihash) **adj_nbr_tables[FIB_PROTOCOL_MAX];
+
+// FIXME SIZE APPROPRIATELY. ASK DAVEB.
+#define ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (64 * 64)
+#define ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+
+#define ADJ_NBR_SET_KEY(_key, _lt, _nh) \
+{ \
+ _key.key[0] = (_nh)->as_u64[0]; \
+ _key.key[1] = (_nh)->as_u64[1]; \
+ _key.key[2] = (_lt); \
+}
+
+#define ADJ_NBR_ITF_OK(_proto, _itf) \
+ (((_itf) < vec_len(adj_nbr_tables[_proto])) && \
+ (NULL != adj_nbr_tables[_proto][sw_if_index]))
+
+static void
+adj_nbr_insert (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ adj_index_t adj_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ if (sw_if_index >= vec_len(adj_nbr_tables[nh_proto]))
+ {
+ vec_validate(adj_nbr_tables[nh_proto], sw_if_index);
+ }
+ if (NULL == adj_nbr_tables[nh_proto][sw_if_index])
+ {
+ adj_nbr_tables[nh_proto][sw_if_index] =
+ clib_mem_alloc_aligned(sizeof(BVT(clib_bihash)),
+ CLIB_CACHE_LINE_BYTES);
+ memset(adj_nbr_tables[nh_proto][sw_if_index],
+ 0,
+ sizeof(BVT(clib_bihash)));
+
+ BV(clib_bihash_init) (adj_nbr_tables[nh_proto][sw_if_index],
+ "Adjacency Neighbour table",
+ ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
+ ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);
+ }
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+ kv.value = adj_index;
+
+ BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 1);
+}
+
+void
+adj_nbr_remove (adj_index_t ai,
+ fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
+ return;
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+ kv.value = ai;
+
+ BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 0);
+}
+
+static adj_index_t
+adj_nbr_find (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ BVT(clib_bihash_kv) kv;
+
+ ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
+
+ if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
+ return (ADJ_INDEX_INVALID);
+
+ if (BV(clib_bihash_search)(adj_nbr_tables[nh_proto][sw_if_index],
+ &kv, &kv) < 0)
+ {
+ return (ADJ_INDEX_INVALID);
+ }
+ else
+ {
+ return (kv.value);
+ }
+}
+
+static inline u32
+adj_get_nd_node (fib_protocol_t proto)
+{
+ switch (proto) {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_arp_node.index);
+ case FIB_PROTOCOL_IP6:
+ return (ip6_discover_neighbor_node.index);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ ASSERT(0);
+ return (ip4_arp_node.index);
+}
+
+/**
+ * @brief Check and set feature flags if o/p interface has any o/p features.
+ */
+static void
+adj_nbr_evaluate_feature (adj_index_t ai)
+{
+ ip_adjacency_t *adj;
+ vnet_feature_main_t *fm = &feature_main;
+ i16 feature_count;
+ u8 arc_index;
+ u32 sw_if_index;
+
+ adj = adj_get(ai);
+
+ switch (adj->ia_link)
+ {
+ case VNET_LINK_IP4:
+ arc_index = ip4_main.lookup_main.output_feature_arc_index;
+ break;
+ case VNET_LINK_IP6:
+ arc_index = ip6_main.lookup_main.output_feature_arc_index;
+ break;
+ case VNET_LINK_MPLS:
+ arc_index = mpls_main.output_feature_arc_index;
+ break;
+ default:
+ return;
+ }
+
+ sw_if_index = adj->rewrite_header.sw_if_index;
+ if (vec_len(fm->feature_count_by_sw_if_index[arc_index]) > sw_if_index)
+ {
+ feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index];
+ if (feature_count > 0)
+ adj->rewrite_header.flags |= VNET_REWRITE_HAS_FEATURES;
+ }
+
+ return;
+}
+
+static ip_adjacency_t*
+adj_nbr_alloc (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_alloc(nh_proto);
+
+ adj_nbr_insert(nh_proto, link_type, nh_addr,
+ sw_if_index,
+ adj_get_index(adj));
+
+ /*
+ * since we just added the ADJ we have no rewrite string for it,
+ * so its for ARP
+ */
+ adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
+ adj->sub_type.nbr.next_hop = *nh_addr;
+ adj->ia_link = link_type;
+ adj->ia_nh_proto = nh_proto;
+ adj->rewrite_header.sw_if_index = sw_if_index;
+
+ adj_nbr_evaluate_feature (adj_get_index(adj));
+ return (adj);
+}
+
+/*
+ * adj_nbr_add_or_lock
+ *
+ * Add an adjacency for the neighbour requested.
+ *
+ * The key for an adj is:
+ * - the Next-hops protocol (i.e. v4 or v6)
+ * - the address of the next-hop
+ * - the interface the next-hop is reachable through
+ */
+adj_index_t
+adj_nbr_add_or_lock (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index)
+{
+ adj_index_t adj_index;
+ ip_adjacency_t *adj;
+
+ adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
+
+ if (ADJ_INDEX_INVALID == adj_index)
+ {
+ vnet_main_t *vnm;
+
+ vnm = vnet_get_main();
+ adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+ adj_index = adj_get_index(adj);
+ adj_lock(adj_index);
+
+ vnet_rewrite_init(vnm, sw_if_index,
+ adj_get_nd_node(nh_proto),
+ vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
+ &adj->rewrite_header);
+
+ /*
+ * we need a rewrite where the destination IP address is converted
+ * to the appropriate link-layer address. This is interface specific.
+ * So ask the interface to do it.
+ */
+ vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, adj_index);
+ }
+ else
+ {
+ adj_lock(adj_index);
+ }
+
+ return (adj_index);
+}
+
+adj_index_t
+adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ u8 *rewrite)
+{
+ adj_index_t adj_index;
+ ip_adjacency_t *adj;
+
+ adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
+
+ if (ADJ_INDEX_INVALID == adj_index)
+ {
+ adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+ adj->rewrite_header.sw_if_index = sw_if_index;
+ }
+ else
+ {
+ adj = adj_get(adj_index);
+ }
+
+ adj_lock(adj_get_index(adj));
+ adj_nbr_update_rewrite(adj_get_index(adj),
+ ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ rewrite);
+
+ return (adj_get_index(adj));
+}
+
+/**
+ * adj_nbr_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_update_rewrite (adj_index_t adj_index,
+ adj_nbr_rewrite_flag_t flags,
+ u8 *rewrite)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+ adj = adj_get(adj_index);
+
+ if (flags & ADJ_NBR_REWRITE_FLAG_COMPLETE)
+ {
+ /*
+ * update the adj's rewrite string and build the arc
+ * from the rewrite node to the interface's TX node
+ */
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_REWRITE,
+ adj_get_rewrite_node(adj->ia_link),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+ }
+ else
+ {
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_ARP,
+ adj_get_nd_node(adj->ia_nh_proto),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+ }
+}
+
+/**
+ * adj_nbr_update_rewrite_internal
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
+ ip_lookup_next_t adj_next_index,
+ u32 this_node,
+ u32 next_node,
+ u8 *rewrite)
+{
+ ip_adjacency_t *walk_adj;
+ adj_index_t walk_ai;
+ vlib_main_t * vm;
+ u32 old_next;
+ int do_walk;
+
+ vm = vlib_get_main();
+ old_next = adj->lookup_next_index;
+
+ walk_ai = adj_get_index(adj);
+ if (VNET_LINK_MPLS == adj->ia_link)
+ {
+ /*
+ * The link type MPLS has no children in the control plane graph, it only
+ * has children in the data-palne graph. The backwalk is up the former.
+ * So we need to walk from its IP cousin.
+ */
+ walk_ai = adj_nbr_find(adj->ia_nh_proto,
+ fib_proto_to_link(adj->ia_nh_proto),
+ &adj->sub_type.nbr.next_hop,
+ adj->rewrite_header.sw_if_index);
+ }
+
+ /*
+ * Don't call the walk re-entrantly
+ */
+ if (ADJ_INDEX_INVALID != walk_ai)
+ {
+ walk_adj = adj_get(walk_ai);
+ if (ADJ_FLAG_SYNC_WALK_ACTIVE & walk_adj->ia_flags)
+ {
+ do_walk = 0;
+ }
+ else
+ {
+ /*
+ * Prevent re-entrant walk of the same adj
+ */
+ walk_adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
+ do_walk = 1;
+ }
+ }
+ else
+ {
+ do_walk = 0;
+ }
+
+ /*
+ * lock the adjacencies that are affected by updates this walk will provoke.
+ * Since the aim of the walk is to update children to link to a different
+ * DPO, this adj will no longer be in use and its lock count will drop to 0.
+ * We don't want it to be deleted as part of this endevour.
+ */
+ adj_lock(adj_get_index(adj));
+ adj_lock(walk_ai);
+
+ /*
+ * Updating a rewrite string is not atomic;
+ * - the rewrite string is too long to write in one instruction
+ * - when swapping from incomplete to complete, we also need to update
+ * the VLIB graph next-index of the adj.
+ * ideally we would only want to suspend forwarding via this adj whilst we
+ * do this, but we do not have that level of granularity - it's suspend all
+ * worker threads or nothing.
+ * The other chioces are:
+ * - to mark the adj down and back walk so child load-balances drop this adj
+ * from the set.
+ * - update the next_node index of this adj to point to error-drop
+ * both of which will mean for MAC change we will drop for this adj
+ * which is not acceptable. However, when the adj changes type (from
+ * complete to incomplete and vice-versa) the child DPOs, which have the
+ * VLIB graph next node index, will be sending packets to the wrong graph
+ * node. So from the options above, updating the next_node of the adj to
+ * be drop will work, but it relies on each graph node v4/v6/mpls, rewrite/
+ * arp/midchain always be valid w.r.t. a mis-match of adj type and node type
+ * (i.e. a rewrite adj in the arp node). This is not enforcable. Getting it
+ * wrong will lead to hard to find bugs since its a race condition. So we
+ * choose the more reliable method of updating the children to use the drop,
+ * then switching adj's type, then updating the children again. Did I mention
+ * that this doesn't happen often...
+ * So we need to distinguish between the two cases:
+ * 1 - mac change
+ * 2 - adj type change
+ */
+ if (do_walk &&
+ old_next != adj_next_index &&
+ ADJ_INDEX_INVALID != walk_ai)
+ {
+ /*
+ * the adj is changing type. we need to fix all children so that they
+ * stack momentarily on a drop, while the adj changes. If we don't do
+ * this the children will send packets to a VLIB graph node that does
+ * not correspond to the adj's type - and it goes downhill from there.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_DOWN,
+ /*
+ * force this walk to be synchrous. if we don't and a node in the graph
+ * (a heavily shared path-list) chooses to back-ground the walk (make it
+ * async) then it will pause and we will do the adj update below, before
+ * all the children are updated. not good.
+ */
+ .fnbw_flags = FIB_NODE_BW_FLAG_FORCE_SYNC,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
+ }
+
+ /*
+ * If we are just updating the MAC string of the adj (which we also can't
+ * do atomically), then we need to stop packets switching through the adj.
+ * We can't do that on a per-adj basis, so it's all the packets.
+ * If we are updating the type, and we walked back to the children above,
+ * then this barrier serves to flush the queues/frames.
+ */
+ vlib_worker_thread_barrier_sync(vm);
+
+ adj->lookup_next_index = adj_next_index;
+
+ if (NULL != rewrite)
+ {
+ /*
+ * new rewrite provided.
+ * fill in the adj's rewrite string, and build the VLIB graph arc.
+ */
+ vnet_rewrite_set_data_internal(&adj->rewrite_header,
+ sizeof(adj->rewrite_data),
+ rewrite,
+ vec_len(rewrite));
+ vec_free(rewrite);
+ }
+ else
+ {
+ vnet_rewrite_clear_data_internal(&adj->rewrite_header,
+ sizeof(adj->rewrite_data));
+ }
+ adj->rewrite_header.next_index = vlib_node_add_next(vlib_get_main(),
+ this_node,
+ next_node);
+
+ /*
+ * done with the rewirte update - let the workers loose.
+ */
+ vlib_worker_thread_barrier_release(vm);
+
+ if (do_walk &&
+ (old_next != adj->lookup_next_index) &&
+ (ADJ_INDEX_INVALID != walk_ai))
+ {
+ /*
+ * backwalk to the children so they can stack on the now updated
+ * adjacency
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
+ }
+ /*
+ * Prevent re-entrant walk of the same adj
+ */
+ if (do_walk)
+ {
+ walk_adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
+ }
+
+ adj_unlock(adj_get_index(adj));
+ adj_unlock(walk_ai);
+}
+
+typedef struct adj_db_count_ctx_t_ {
+ u64 count;
+} adj_db_count_ctx_t;
+
+static void
+adj_db_count (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ adj_db_count_ctx_t * ctx = arg;
+ ctx->count++;
+}
+
+u32
+adj_nbr_db_size (void)
+{
+ adj_db_count_ctx_t ctx = {
+ .count = 0,
+ };
+ fib_protocol_t proto;
+ u32 sw_if_index = 0;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
+ {
+ if (NULL != adj_nbr_tables[proto][sw_if_index])
+ {
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[proto][sw_if_index],
+ adj_db_count,
+ &ctx);
+ }
+ }
+ }
+ return (ctx.count);
+}
+
+/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_ctx_t_
+{
+ adj_walk_cb_t awc_cb;
+ void *awc_ctx;
+} adj_walk_ctx_t;
+
+static void
+adj_nbr_walk_cb (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ adj_walk_ctx_t *ctx = arg;
+
+ // FIXME: can't stop early...
+ ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+void
+adj_nbr_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+ return;
+
+ adj_walk_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[adj_nh_proto][sw_if_index],
+ adj_nbr_walk_cb,
+ &awc);
+}
+
+/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_nh_ctx_t_
+{
+ adj_walk_cb_t awc_cb;
+ void *awc_ctx;
+ const ip46_address_t *awc_nh;
+} adj_walk_nh_ctx_t;
+
+static void
+adj_nbr_walk_nh_cb (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ ip_adjacency_t *adj;
+ adj_walk_nh_ctx_t *ctx = arg;
+
+ adj = adj_get(kvp->value);
+
+ if (!ip46_address_cmp(&adj->sub_type.nbr.next_hop, ctx->awc_nh))
+ ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+ const ip4_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP4, sw_if_index))
+ return;
+
+ ip46_address_t nh = {
+ .ip4 = *addr,
+ };
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = &nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[FIB_PROTOCOL_IP4][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+ const ip6_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP6, sw_if_index))
+ return;
+
+ ip46_address_t nh = {
+ .ip6 = *addr,
+ };
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = &nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[FIB_PROTOCOL_IP6][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ const ip46_address_t *nh,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+ return;
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[adj_nh_proto][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * Flags associated with the interface state walks
+ */
+typedef enum adj_nbr_interface_flags_t_
+{
+ ADJ_NBR_INTERFACE_UP = (1 << 0),
+} adj_nbr_interface_flags_t;
+
+/**
+ * Context for the state change walk of the DB
+ */
+typedef struct adj_nbr_interface_state_change_ctx_t_
+{
+ /**
+ * Flags on the interface
+ */
+ adj_nbr_interface_flags_t flags;
+} adj_nbr_interface_state_change_ctx_t;
+
+static adj_walk_rc_t
+adj_nbr_interface_state_change_one (adj_index_t ai,
+ void *arg)
+{
+ /*
+ * Back walk the graph to inform the forwarding entries
+ * that this interface state has changed. Do this synchronously
+ * since this is the walk that provides convergence
+ */
+ adj_nbr_interface_state_change_ctx_t *ctx = arg;
+
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = ((ctx->flags & ADJ_NBR_INTERFACE_UP) ?
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+ /*
+ * the force sync applies only as far as the first fib_entry.
+ * And it's the fib_entry's we need to converge away from
+ * the adjacencies on the now down link
+ */
+ .fnbw_flags = (!(ctx->flags & ADJ_NBR_INTERFACE_UP) ?
+ FIB_NODE_BW_FLAG_FORCE_SYNC :
+ 0),
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+/**
+ * @brief Registered function for SW interface state changes
+ */
+static clib_error_t *
+adj_nbr_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ fib_protocol_t proto;
+
+ /*
+ * walk each adj on the interface and trigger a walk from that adj
+ */
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_interface_state_change_ctx_t ctx = {
+ .flags = ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ ADJ_NBR_INTERFACE_UP :
+ 0),
+ };
+
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_state_change_one,
+ &ctx);
+ }
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION_PRIO(
+ adj_nbr_sw_interface_state_change,
+ VNET_ITF_FUNC_PRIORITY_HIGH);
+
+/**
+ * @brief Invoked on each SW interface of a HW interface when the
+ * HW interface state changes
+ */
+static void
+adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm,
+ u32 sw_if_index,
+ void *arg)
+{
+ adj_nbr_interface_state_change_ctx_t *ctx = arg;
+ fib_protocol_t proto;
+
+ /*
+ * walk each adj on the interface and trigger a walk from that adj
+ */
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_state_change_one,
+ ctx);
+ }
+}
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+adj_nbr_hw_interface_state_change (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ /*
+ * walk SW interface on the HW
+ */
+ adj_nbr_interface_state_change_ctx_t ctx = {
+ .flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
+ ADJ_NBR_INTERFACE_UP :
+ 0),
+ };
+
+ vnet_hw_interface_walk_sw(vnm, hw_if_index,
+ adj_nbr_hw_sw_interface_state_change,
+ &ctx);
+
+ return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(
+ adj_nbr_hw_interface_state_change,
+ VNET_ITF_FUNC_PRIORITY_HIGH);
+
+static adj_walk_rc_t
+adj_nbr_interface_delete_one (adj_index_t ai,
+ void *arg)
+{
+ /*
+ * Back walk the graph to inform the forwarding entries
+ * that this interface has been deleted.
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+/**
+ * adj_nbr_interface_add_del
+ *
+ * Registered to receive interface Add and delete notifications
+ */
+static clib_error_t *
+adj_nbr_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ fib_protocol_t proto;
+
+ if (is_add)
+ {
+ /*
+ * not interested in interface additions. we will not back walk
+ * to resolve paths through newly added interfaces. Why? The control
+ * plane should have the brains to add interfaces first, then routes.
+ * So the case where there are paths with a interface that matches
+ * one just created is the case where the path resolved through an
+ * interface that was deleted, and still has not been removed. The
+ * new interface added, is NO GUARANTEE that the interface being
+ * added now, even though it may have the same sw_if_index, is the
+ * same interface that the path needs. So tough!
+ * If the control plane wants these routes to resolve it needs to
+ * remove and add them again.
+ */
+ return (NULL);
+ }
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_delete_one,
+ NULL);
+ }
+
+ return (NULL);
+
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
+
+
+static adj_walk_rc_t
+adj_nbr_show_one (adj_index_t ai,
+ void *arg)
+{
+ vlib_cli_output (arg, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_NONE);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static clib_error_t *
+adj_nbr_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ adj_index_t ai = ADJ_INDEX_INVALID;
+ u32 sw_if_index = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ai))
+ ;
+ else if (unformat (input, "%U",
+ unformat_vnet_sw_interface, vnet_get_main(),
+ &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (ADJ_INDEX_INVALID != ai)
+ {
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_DETAIL);
+ }
+ else if (~0 != sw_if_index)
+ {
+ fib_protocol_t proto;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_show_one,
+ vm);
+ }
+ }
+ else
+ {
+ fib_protocol_t proto;
+
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_show_one,
+ vm);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*?
+ * Show all neighbour adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj nbr}
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
+ .path = "show adj nbr",
+ .short_help = "show adj nbr [<adj_index>] [interface]",
+ .function = adj_nbr_show,
+};
+
+static ip46_type_t
+adj_proto_to_46 (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (IP46_TYPE_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (IP46_TYPE_IP6);
+ default:
+ return (IP46_TYPE_IP4);
+ }
+ return (IP46_TYPE_IP4);
+}
+
+u8*
+format_adj_nbr_incomplete (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "arp-%U", format_vnet_link, adj->ia_link);
+ s = format (s, ": via %U",
+ format_ip46_address, &adj->sub_type.nbr.next_hop,
+ adj_proto_to_46(adj->ia_nh_proto));
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface(vnm,
+ adj->rewrite_header.sw_if_index));
+
+ return (s);
+}
+
+u8*
+format_adj_nbr (u8* s, va_list *ap)
+{
+ index_t index = va_arg(*ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+ ip_adjacency_t * adj = adj_get(index);
+
+ s = format (s, "%U", format_vnet_link, adj->ia_link);
+ s = format (s, " via %U ",
+ format_ip46_address, &adj->sub_type.nbr.next_hop,
+ adj_proto_to_46(adj->ia_nh_proto));
+ s = format (s, "%U",
+ format_vnet_rewrite,
+ &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
+
+ return (s);
+}
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+ adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+ adj_unlock(dpo->dpoi_index);
+}
+
+static void
+adj_mem_show (void)
+{
+ fib_show_memory_usage("Adjacency",
+ pool_elts(adj_pool),
+ pool_len(adj_pool),
+ sizeof(ip_adjacency_t));
+}
+
+const static dpo_vft_t adj_nbr_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_nbr,
+ .dv_mem_show = adj_mem_show,
+};
+const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
+ .dv_lock = adj_dpo_lock,
+ .dv_unlock = adj_dpo_unlock,
+ .dv_format = format_adj_nbr_incomplete,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to an adjacency
+ * object.
+ *
+ * this means that these graph nodes are ones from which a nbr is the
+ * parent object in the DPO-graph.
+ */
+const static char* const nbr_ip4_nodes[] =
+{
+ "ip4-rewrite",
+ NULL,
+};
+const static char* const nbr_ip6_nodes[] =
+{
+ "ip6-rewrite",
+ NULL,
+};
+const static char* const nbr_mpls_nodes[] =
+{
+ "mpls-output",
+ NULL,
+};
+const static char* const nbr_ethernet_nodes[] =
+{
+ "adj-l2-rewrite",
+ NULL,
+};
+const static char* const * const nbr_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = nbr_ip4_nodes,
+ [DPO_PROTO_IP6] = nbr_ip6_nodes,
+ [DPO_PROTO_MPLS] = nbr_mpls_nodes,
+ [DPO_PROTO_ETHERNET] = nbr_ethernet_nodes,
+};
+
+const static char* const nbr_incomplete_ip4_nodes[] =
+{
+ "ip4-arp",
+ NULL,
+};
+const static char* const nbr_incomplete_ip6_nodes[] =
+{
+ "ip6-discover-neighbor",
+ NULL,
+};
+const static char* const nbr_incomplete_mpls_nodes[] =
+{
+ "mpls-adj-incomplete",
+ NULL,
+};
+
+const static char* const * const nbr_incomplete_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = nbr_incomplete_ip4_nodes,
+ [DPO_PROTO_IP6] = nbr_incomplete_ip6_nodes,
+ [DPO_PROTO_MPLS] = nbr_incomplete_mpls_nodes,
+};
+
+void
+adj_nbr_module_init (void)
+{
+ dpo_register(DPO_ADJACENCY,
+ &adj_nbr_dpo_vft,
+ nbr_nodes);
+ dpo_register(DPO_ADJACENCY_INCOMPLETE,
+ &adj_nbr_incompl_dpo_vft,
+ nbr_incomplete_nodes);
+}
diff --git a/src/vnet/adj/adj_nbr.h b/src/vnet/adj/adj_nbr.h
new file mode 100644
index 00000000..293766b8
--- /dev/null
+++ b/src/vnet/adj/adj_nbr.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * Neighbour Adjacency sub-type. These adjs represent an L3 peer on a
+ * connected link.
+ */
+
+#ifndef __ADJ_NBR_H__
+#define __ADJ_NBR_H__
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param nh_addr
+ * The address of the next-hop/peer to send the packet to
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ */
+extern adj_index_t adj_nbr_add_or_lock(fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index);
+
+/**
+ * @brief
+ * Add (and lock) a new or lock an existing neighbour adjacency
+ *
+ * @param nh_proto
+ * The protocol for the next-hop address (v4 or v6)
+ *
+ * @param link_type
+ * A description of the protocol of the packets that will forward
+ * through this adj. On an ethernet interface this is the MAC header's
+ * ether-type
+ *
+ * @param nh_addr
+ * The address of the next-hop/peer to send the packet to
+ *
+ * @param sw_if_index
+ * The interface on which the peer resides
+ *
+ * @param rewrite
+ * The rewrite to prepend to packets
+ */
+extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto,
+ vnet_link_t link_type,
+ const ip46_address_t *nh_addr,
+ u32 sw_if_index,
+ u8 *rewrite);
+/**
+ * @brief When adding a rewrite to an adjacency these are flags that
+ * apply to that rewrite
+ */
+typedef enum adj_nbr_rewrite_flag_t_
+{
+ ADJ_NBR_REWRITE_FLAG_NONE,
+
+ /**
+ * An indication that the rewrite is incomplete, i.e. that it describes the
+ * ARP/ND rewrite when probing.
+ */
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE = ADJ_NBR_REWRITE_FLAG_NONE,
+
+ /**
+ * An indication that the rewrite is complete, i.e. that it fully describes
+ * the link-layer addressing for the desintation.
+ * The opposite of this is an incomplete rewrite that describes the ARP/ND
+ * rewrite when probing.
+ */
+ ADJ_NBR_REWRITE_FLAG_COMPLETE = (1 << 0),
+} adj_nbr_rewrite_flag_t;
+
+/**
+ * @brief
+ * Update the rewrite string for an existing adjacecny.
+ *
+ * @param
+ * The index of the adj to update
+ *
+ * @param
+ * The new rewrite
+ */
+extern void adj_nbr_update_rewrite(adj_index_t adj_index,
+ adj_nbr_rewrite_flag_t flags,
+ u8 *rewrite);
+
+/**
+ * @brief
+ * Format aa incomplete neigbour (ARP) adjacency
+ */
+extern u8* format_adj_nbr_incomplete(u8* s, va_list *ap);
+
+/**
+ * @brief
+ * Format a neigbour (REWRITE) adjacency
+ */
+extern u8* format_adj_nbr(u8* s, va_list *ap);
+
+/**
+ * @brief Walk the neighbour Adjacencies on a given interface
+ */
+extern void adj_nbr_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx);
+/**
+ * @brief Walk the neighbour Adjacencies on a given interface with a given next-hop
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ const ip46_address_t *nh,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+ const ip4_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+ const ip6_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief
+ * Module initialisation
+ */
+extern void adj_nbr_module_init(void);
+
+/**
+ * @brief
+ * Return the size of the adjacency database. for testing purposes
+ */
+extern u32 adj_nbr_db_size(void);
+
+#endif
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
new file mode 100644
index 00000000..128570b0
--- /dev/null
+++ b/src/vnet/adj/adj_nsh.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_nsh.h>
+#include <vnet/ip/ip.h>
+
+nsh_main_dummy_t nsh_main_dummy;
+
+/**
+ * @brief Trace data for a NSH Midchain
+ */
+typedef struct adj_nsh_trace_t_ {
+ /** Adjacency index taken. */
+ u32 adj_index;
+} adj_nsh_trace_t;
+
+static u8 *
+format_adj_nsh_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ adj_nsh_trace_t * t = va_arg (*args, adj_nsh_trace_t *);
+
+ s = format (s, "adj-idx %d : %U",
+ t->adj_index,
+ format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
+ return s;
+}
+
+typedef enum adj_nsh_rewrite_next_t_
+{
+ ADJ_NSH_REWRITE_NEXT_DROP,
+} adj_gpe_rewrite_next_t;
+
+always_inline uword
+adj_nsh_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int is_midchain)
+{
+ u32 * from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, * to_next, next_index;
+ u32 thread_index = vlib_get_thread_index();
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ char *h0;
+ u32 pi0, rw_len0, adj_index0, next0 = 0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ h0 = vlib_buffer_get_current (p0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT(adj_index0);
+
+ adj0 = adj_get (adj_index0);
+
+ /* Guess we are only writing on simple IP4 header. */
+ vnet_rewrite_one_header(adj0[0], h0, sizeof(ip4_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
+
+ vlib_increment_combined_counter(&adjacency_counters,
+ thread_index,
+ adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ if (PREDICT_TRUE((vlib_buffer_length_in_chain (vm, p0) <=
+ adj0[0].rewrite_header.max_l3_packet_bytes)))
+ {
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ /*
+ * Follow the feature ARC. this will result eventually in
+ * the midchain-tx node
+ */
+ vnet_feature_arc_start (nsh_main_dummy.output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+ else
+ {
+ /* can't fragment NSH */
+ next0 = ADJ_NSH_REWRITE_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ adj_nsh_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+adj_nsh_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_nsh_rewrite_inline (vm, node, frame, 0);
+}
+
+static uword
+adj_nsh_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return adj_nsh_rewrite_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (adj_nsh_rewrite_node) = {
+ .function = adj_nsh_rewrite,
+ .name = "adj-nsh-rewrite",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_nsh_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_NSH_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_nsh_rewrite_node, adj_nsh_rewrite)
+
+VLIB_REGISTER_NODE (adj_nsh_midchain_node) = {
+ .function = adj_nsh_midchain,
+ .name = "adj-nsh-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_adj_nsh_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ADJ_NSH_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_nsh_midchain_node, adj_nsh_midchain)
+
+/* Built-in ip4 tx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (nsh_output, static) =
+{
+ .arc_name = "nsh-output",
+ .start_nodes = VNET_FEATURES ("adj-nsh-midchain"),
+ .arc_index_ptr = &nsh_main_dummy.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (nsh_tx_drop, static) =
+{
+ .arc_name = "nsh-output",
+ .node_name = "error-drop",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/adj/adj_nsh.h b/src/vnet/adj/adj_nsh.h
new file mode 100644
index 00000000..5501fbb9
--- /dev/null
+++ b/src/vnet/adj/adj_nsh.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_NSH_H__
+#define __ADJ_NSH_H__
+
+#include <vnet/adj/adj.h>
+
+extern vlib_node_registration_t adj_nsh_midchain_node;
+extern vlib_node_registration_t adj_nsh_rewrite_node;
+
+typedef struct _nsh_main_dummy
+{
+ u8 output_feature_arc_index;
+} nsh_main_dummy_t;
+
+extern nsh_main_dummy_t nsh_main_dummy;
+
+#endif
diff --git a/src/vnet/adj/adj_types.h b/src/vnet/adj/adj_types.h
new file mode 100644
index 00000000..cf90c084
--- /dev/null
+++ b/src/vnet/adj/adj_types.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_TYPES_H__
+#define __ADJ_TYPES_H__
+
+#include <vnet/vnet.h>
+
+/**
+ * @brief An index for adjacencies.
+ * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of
+ * an adi_index_t. However, for us humans, we can glean much more intent
+ * from the declaration
+ * foo bar(adj_index_t t);
+ * than we can from
+ * foo bar(u32 t);
+ */
+typedef u32 adj_index_t;
+
+/**
+ * @brief Invalid ADJ index - used when no adj is known
+ * likewise blazoned capitals INVALID speak volumes where ~0 does not.
+ */
+#define ADJ_INDEX_INVALID ((u32)~0)
+
+/**
+ * @brief return codes from a adjacency walker callback function
+ */
+typedef enum adj_walk_rc_t_
+{
+ ADJ_WALK_RC_STOP,
+ ADJ_WALK_RC_CONTINUE,
+} adj_walk_rc_t;
+
+/**
+ * @brief Call back function when walking adjacencies
+ */
+typedef adj_walk_rc_t (*adj_walk_cb_t)(adj_index_t ai,
+ void *ctx);
+
+#endif
diff --git a/src/vnet/adj/rewrite.c b/src/vnet/adj/rewrite.c
new file mode 100644
index 00000000..47fb74df
--- /dev/null
+++ b/src/vnet/adj/rewrite.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * rewrite.c: packet rewrite
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/lookup.h>
+
+void
+vnet_rewrite_copy_slow_path (vnet_rewrite_data_t * p0,
+ vnet_rewrite_data_t * rw0,
+ word n_left, uword most_likely_size)
+{
+ uword n_done =
+ round_pow2 (most_likely_size, sizeof (rw0[0])) / sizeof (rw0[0]);
+
+ p0 -= n_done;
+ rw0 -= n_done;
+
+ /* As we enter the cleanup loop, p0 and rw0 point to the last chunk written
+ by the fast path. Hence, the constant 1, which the
+ vnet_rewrite_copy_one macro renders as p0[-1] = rw0[-1]. */
+
+ while (n_left > 0)
+ {
+ vnet_rewrite_copy_one (p0, rw0, 1);
+ p0--;
+ rw0--;
+ n_left--;
+ }
+}
+
+u8 *
+format_vnet_rewrite (u8 * s, va_list * args)
+{
+ vnet_rewrite_header_t *rw = va_arg (*args, vnet_rewrite_header_t *);
+ u32 max_data_bytes = va_arg (*args, u32);
+ CLIB_UNUSED (uword indent) = va_arg (*args, u32);
+ vnet_main_t *vnm = vnet_get_main ();
+
+ if (rw->sw_if_index != ~0)
+ {
+ vnet_sw_interface_t *si;
+ si = vnet_get_sw_interface_safe (vnm, rw->sw_if_index);
+ if (NULL != si)
+ s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si);
+ else
+ s = format (s, "DELETED:%d", rw->sw_if_index);
+ }
+
+ /* Format rewrite string. */
+ if (rw->data_bytes > 0)
+
+ s = format (s, "%U",
+ format_hex_bytes,
+ rw->data + max_data_bytes - rw->data_bytes, rw->data_bytes);
+
+ return s;
+}
+
+u32
+vnet_tx_node_index_for_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ return (hw->output_node_index);
+}
+
+void
+vnet_rewrite_init (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 this_node, u32 next_node, vnet_rewrite_header_t * rw)
+{
+ rw->sw_if_index = sw_if_index;
+ rw->next_index = vlib_node_add_next (vnm->vlib_main, this_node, next_node);
+ rw->max_l3_packet_bytes =
+ vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX);
+}
+
+void
+vnet_rewrite_for_sw_interface (vnet_main_t * vnm,
+ vnet_link_t link_type,
+ u32 sw_if_index,
+ u32 node_index,
+ void *dst_address,
+ vnet_rewrite_header_t * rw,
+ u32 max_rewrite_bytes)
+{
+
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+ u8 *rewrite = NULL;
+
+ vnet_rewrite_init (vnm, sw_if_index, node_index,
+ vnet_tx_node_index_for_sw_interface (vnm, sw_if_index),
+ rw);
+
+ ASSERT (hc->build_rewrite);
+ rewrite = hc->build_rewrite (vnm, sw_if_index, link_type, dst_address);
+
+ ASSERT (vec_len (rewrite) < max_rewrite_bytes);
+ vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rewrite,
+ vec_len (rewrite));
+ vec_free (rewrite);
+}
+
+void
+vnet_rewrite_for_tunnel (vnet_main_t * vnm,
+ u32 tx_sw_if_index,
+ u32 rewrite_node_index,
+ u32 post_rewrite_node_index,
+ vnet_rewrite_header_t * rw,
+ u8 * rewrite_data, u32 rewrite_length)
+{
+ ip_adjacency_t *adj = 0;
+ /*
+ * Installed into vnet_buffer(b)->sw_if_index[VLIB_TX] e.g.
+ * by ip4_rewrite_inline. If the post-rewrite node injects into
+ * ipX-forward, this will be interpreted as a FIB number.
+ */
+ rw->sw_if_index = tx_sw_if_index;
+ rw->next_index = vlib_node_add_next (vnm->vlib_main, rewrite_node_index,
+ post_rewrite_node_index);
+ rw->max_l3_packet_bytes = (u16) ~ 0; /* we can't know at this point */
+
+ ASSERT (rewrite_length < sizeof (adj->rewrite_data));
+ /* Leave room for ethernet + VLAN tag */
+ vnet_rewrite_set_data_internal (rw, sizeof (adj->rewrite_data),
+ rewrite_data, rewrite_length);
+}
+
+void
+serialize_vnet_rewrite (serialize_main_t * m, va_list * va)
+{
+ vnet_rewrite_header_t *rw = va_arg (*va, vnet_rewrite_header_t *);
+ u32 max_data_bytes = va_arg (*va, u32);
+ u8 *p;
+
+ serialize_integer (m, rw->sw_if_index, sizeof (rw->sw_if_index));
+ serialize_integer (m, rw->data_bytes, sizeof (rw->data_bytes));
+ serialize_integer (m, rw->max_l3_packet_bytes,
+ sizeof (rw->max_l3_packet_bytes));
+ p = serialize_get (m, rw->data_bytes);
+ clib_memcpy (p, vnet_rewrite_get_data_internal (rw, max_data_bytes),
+ rw->data_bytes);
+}
+
+void
+unserialize_vnet_rewrite (serialize_main_t * m, va_list * va)
+{
+ vnet_rewrite_header_t *rw = va_arg (*va, vnet_rewrite_header_t *);
+ u32 max_data_bytes = va_arg (*va, u32);
+ u8 *p;
+
+ /* It is up to user to fill these in. */
+ rw->next_index = ~0;
+
+ unserialize_integer (m, &rw->sw_if_index, sizeof (rw->sw_if_index));
+ unserialize_integer (m, &rw->data_bytes, sizeof (rw->data_bytes));
+ unserialize_integer (m, &rw->max_l3_packet_bytes,
+ sizeof (rw->max_l3_packet_bytes));
+ p = unserialize_get (m, rw->data_bytes);
+ clib_memcpy (vnet_rewrite_get_data_internal (rw, max_data_bytes), p,
+ rw->data_bytes);
+}
+
+u8 *
+vnet_build_rewrite_for_sw_interface (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+ ASSERT (hc->build_rewrite);
+ return (hc->build_rewrite (vnm, sw_if_index, link_type, dst_address));
+}
+
+
+void
+vnet_update_adjacency_for_sw_interface (vnet_main_t * vnm,
+ u32 sw_if_index, u32 ai)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+ ASSERT (hc->update_adjacency);
+ hc->update_adjacency (vnm, sw_if_index, ai);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h
new file mode 100644
index 00000000..1dea72f5
--- /dev/null
+++ b/src/vnet/adj/rewrite.h
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * rewrite.h: packet rewrite
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_rewrite_h
+#define included_vnet_rewrite_h
+
+#include <vlib/vlib.h>
+#include <vnet/l3_types.h>
+
+/* Consider using vector types for speed? */
+typedef uword vnet_rewrite_data_t;
+
+/**
+ * Flags associated with the rewrite/adjacency
+ */
+typedef enum vnet_rewrite_flags_t_
+{
+ /**
+ * This adjacency/interface has output features configured
+ */
+ VNET_REWRITE_HAS_FEATURES = (1 << 0),
+} __attribute__ ((packed)) vnet_rewrite_flags_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ /* Interface to mark re-written packets with. */
+ u32 sw_if_index;
+
+ /* Next node to feed after packet rewrite is done. */
+ u16 next_index;
+
+ /* Number of bytes in rewrite data. */
+ u16 data_bytes;
+
+ /* Max packet size layer 3 (MTU) for output interface.
+ Used for MTU check after packet rewrite. */
+ u16 max_l3_packet_bytes;
+
+ /* Data-plane flags on the adjacency/rewrite */
+ vnet_rewrite_flags_t flags;
+
+ /* When dynamically writing a multicast destination L2 addresss
+ * this is the offset from the IP address at which to write in the
+ * IP->MAC address translation.
+ */
+ u8 dst_mcast_offset;
+
+ /* The mask to apply to the lower 4 bytes of the IP address before ORing
+ * into the destinaiton MAC address */
+ u32 dst_mcast_mask;
+
+ /* Rewrite string starting at end and going backwards. */
+ u8 data[0];
+}) vnet_rewrite_header_t;
+/* *INDENT-ON* */
+
+/**
+ * At 16 bytes of rewrite herader we have enought space left for a IPv6
+ * (40 bytes) + LISP-GPE (8 bytes) in the cache line
+ */
+STATIC_ASSERT (sizeof (vnet_rewrite_header_t) <= 16,
+ "Rewrite header too big");
+
+/*
+ Helper macro for declaring rewrite string w/ given max-size.
+
+ Typical usage:
+ typedef struct {
+ //
+ int a, b;
+
+ // Total adjacency is 64 bytes.
+ vnet_rewrite_declare(64 - 2*sizeof(int)) rw;
+ } my_adjacency_t;
+*/
+#define vnet_declare_rewrite(total_bytes) \
+struct { \
+ vnet_rewrite_header_t rewrite_header; \
+ \
+ u8 rewrite_data[(total_bytes) - sizeof (vnet_rewrite_header_t)]; \
+}
+
+always_inline void
+vnet_rewrite_clear_data_internal (vnet_rewrite_header_t * rw, int max_size)
+{
+ /* Sanity check values carefully for this memset operation */
+ ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
+
+ rw->data_bytes = 0;
+ memset (rw->data, 0xfe, max_size);
+}
+
+always_inline void
+vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw,
+ int max_size, void *data, int data_bytes)
+{
+ /* Sanity check values carefully for this memset operation */
+ ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
+ ASSERT ((data_bytes >= 0) && (data_bytes < max_size));
+
+ rw->data_bytes = data_bytes;
+ clib_memcpy (rw->data + max_size - data_bytes, data, data_bytes);
+ memset (rw->data, 0xfe, max_size - data_bytes);
+}
+
+#define vnet_rewrite_set_data(rw,data,data_bytes) \
+ vnet_rewrite_set_data_internal (&((rw).rewrite_header), \
+ sizeof ((rw).rewrite_data), \
+ (data), \
+ (data_bytes))
+
+always_inline void *
+vnet_rewrite_get_data_internal (vnet_rewrite_header_t * rw, int max_size)
+{
+ ASSERT (rw->data_bytes <= max_size);
+ return rw->data + max_size - rw->data_bytes;
+}
+
+#define vnet_rewrite_get_data(rw) \
+ vnet_rewrite_get_data_internal (&((rw).rewrite_header), sizeof ((rw).rewrite_data))
+
+always_inline void
+vnet_rewrite_copy_one (vnet_rewrite_data_t * p0, vnet_rewrite_data_t * rw0,
+ int i)
+{
+ p0[-i] = rw0[-i];
+}
+
+void vnet_rewrite_copy_slow_path (vnet_rewrite_data_t * p0,
+ vnet_rewrite_data_t * rw0,
+ word n_left, uword most_likely_size);
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u64 a;
+ u32 b;
+ u16 c;
+}) eh_copy_t;
+/* *INDENT-ON* */
+
+always_inline void
+_vnet_rewrite_one_header (vnet_rewrite_header_t * h0,
+ void *packet0, int max_size, int most_likely_size)
+{
+ vnet_rewrite_data_t *p0 = packet0;
+ vnet_rewrite_data_t *rw0 = (vnet_rewrite_data_t *) (h0->data + max_size);
+ word n_left0;
+
+ /* 0xfefe => poisoned adjacency => crash */
+ ASSERT (h0->data_bytes != 0xfefe);
+
+ if (PREDICT_TRUE (h0->data_bytes == sizeof (eh_copy_t)))
+ {
+ eh_copy_t *s, *d;
+ s = (eh_copy_t *) (h0->data + max_size - sizeof (eh_copy_t));
+ d = (eh_copy_t *) (((u8 *) packet0) - sizeof (eh_copy_t));
+ clib_memcpy (d, s, sizeof (eh_copy_t));
+ return;
+ }
+
+
+#define _(i) \
+ do { \
+ if (most_likely_size > ((i)-1)*sizeof (vnet_rewrite_data_t)) \
+ vnet_rewrite_copy_one (p0, rw0, (i)); \
+ } while (0)
+
+ _(4);
+ _(3);
+ _(2);
+ _(1);
+
+#undef _
+
+ n_left0 = (int)
+ (((int) h0->data_bytes - most_likely_size) + (sizeof (rw0[0]) - 1))
+ / (int) sizeof (rw0[0]);
+ if (PREDICT_FALSE (n_left0 > 0))
+ vnet_rewrite_copy_slow_path (p0, rw0, n_left0, most_likely_size);
+}
+
+always_inline void
+_vnet_rewrite_two_headers (vnet_rewrite_header_t * h0,
+ vnet_rewrite_header_t * h1,
+ void *packet0,
+ void *packet1, int max_size, int most_likely_size)
+{
+ vnet_rewrite_data_t *p0 = packet0;
+ vnet_rewrite_data_t *p1 = packet1;
+ vnet_rewrite_data_t *rw0 = (vnet_rewrite_data_t *) (h0->data + max_size);
+ vnet_rewrite_data_t *rw1 = (vnet_rewrite_data_t *) (h1->data + max_size);
+ word n_left0, n_left1;
+ int slow_path;
+
+ /* 0xfefe => poisoned adjacency => crash */
+ ASSERT (h0->data_bytes != 0xfefe);
+ ASSERT (h1->data_bytes != 0xfefe);
+
+ /* Arithmetic calculation: bytes0 == bytes1 == 14 */
+ slow_path = h0->data_bytes ^ h1->data_bytes;
+ slow_path += h0->data_bytes ^ sizeof (eh_copy_t);
+
+ if (PREDICT_TRUE (slow_path == 0))
+ {
+ eh_copy_t *s0, *d0, *s1, *d1;
+ s0 = (eh_copy_t *) (h0->data + max_size - sizeof (eh_copy_t));
+ d0 = (eh_copy_t *) (((u8 *) packet0) - sizeof (eh_copy_t));
+ clib_memcpy (d0, s0, sizeof (eh_copy_t));
+ s1 = (eh_copy_t *) (h1->data + max_size - sizeof (eh_copy_t));
+ d1 = (eh_copy_t *) (((u8 *) packet1) - sizeof (eh_copy_t));
+ clib_memcpy (d1, s1, sizeof (eh_copy_t));
+ return;
+ }
+
+#define _(i) \
+ do { \
+ if (most_likely_size > ((i)-1)*sizeof (vnet_rewrite_data_t)) \
+ { \
+ vnet_rewrite_copy_one (p0, rw0, (i)); \
+ vnet_rewrite_copy_one (p1, rw1, (i)); \
+ } \
+ } while (0)
+
+ _(4);
+ _(3);
+ _(2);
+ _(1);
+
+#undef _
+
+ n_left0 = (int)
+ (((int) h0->data_bytes - most_likely_size) + (sizeof (rw0[0]) - 1))
+ / (int) sizeof (rw0[0]);
+ n_left1 = (int)
+ (((int) h1->data_bytes - most_likely_size) + (sizeof (rw1[0]) - 1))
+ / (int) sizeof (rw1[0]);
+
+ if (PREDICT_FALSE (n_left0 > 0 || n_left1 > 0))
+ {
+ vnet_rewrite_copy_slow_path (p0, rw0, n_left0, most_likely_size);
+ vnet_rewrite_copy_slow_path (p1, rw1, n_left1, most_likely_size);
+ }
+}
+
+#define vnet_rewrite_one_header(rw0,p0,most_likely_size) \
+ _vnet_rewrite_one_header (&((rw0).rewrite_header), (p0), \
+ sizeof ((rw0).rewrite_data), \
+ (most_likely_size))
+
+#define vnet_rewrite_two_headers(rw0,rw1,p0,p1,most_likely_size) \
+ _vnet_rewrite_two_headers (&((rw0).rewrite_header), &((rw1).rewrite_header), \
+ (p0), (p1), \
+ sizeof ((rw0).rewrite_data), \
+ (most_likely_size))
+
+always_inline void
+_vnet_fixup_one_header (vnet_rewrite_header_t * h0,
+ u8 * addr, u32 addr_len, u8 * packet0)
+{
+ if (PREDICT_TRUE (h0->dst_mcast_mask))
+ {
+ /* location to write to in the packet */
+ u8 *p0 = packet0 - h0->dst_mcast_offset;
+ u32 *p1 = (u32 *) p0;
+ /* location to copy from in the L3 dest address */
+ u32 *a0 = (u32 *) (addr + addr_len - sizeof (h0->dst_mcast_mask));
+
+ *p1 |= (*a0 & h0->dst_mcast_mask);
+ }
+}
+
+#define vnet_fixup_one_header(rw0,addr,p0) \
+ _vnet_fixup_one_header (&((rw0).rewrite_header), \
+ (u8*)(addr), sizeof((*addr)), \
+ (u8*)(p0))
+
+#define VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST ((void *) 0)
+/** Deprecated */
+void vnet_rewrite_for_sw_interface (struct vnet_main_t *vnm,
+ vnet_link_t packet_type,
+ u32 sw_if_index,
+ u32 node_index,
+ void *dst_address,
+ vnet_rewrite_header_t * rw,
+ u32 max_rewrite_bytes);
+
+u32 vnet_tx_node_index_for_sw_interface (struct vnet_main_t *vnm,
+ u32 sw_if_index);
+
+void vnet_rewrite_init (struct vnet_main_t *vnm,
+ u32 sw_if_index,
+ u32 this_node,
+ u32 next_node, vnet_rewrite_header_t * rw);
+
+u8 *vnet_build_rewrite_for_sw_interface (struct vnet_main_t *vnm,
+ u32 sw_if_index,
+ vnet_link_t packet_type,
+ const void *dst_address);
+void vnet_update_adjacency_for_sw_interface (struct vnet_main_t *vnm,
+ u32 sw_if_index, u32 ai);
+
+format_function_t format_vnet_rewrite;
+
+serialize_function_t serialize_vnet_rewrite, unserialize_vnet_rewrite;
+
+#endif /* included_vnet_rewrite_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */