From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Mon, 19 Dec 2016 23:05:39 +0100
Subject: Reorganize source tree to use single autotools instance

Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/vnet/dpo/classify_dpo.c     |  131 +++++
 src/vnet/dpo/classify_dpo.h     |   56 ++
 src/vnet/dpo/dpo.c              |  500 +++++++++++++++++
 src/vnet/dpo/dpo.h              |  381 +++++++++++++
 src/vnet/dpo/drop_dpo.c         |  106 ++++
 src/vnet/dpo/drop_dpo.h         |   31 +
 src/vnet/dpo/ip_null_dpo.c      |  408 ++++++++++++++
 src/vnet/dpo/ip_null_dpo.h      |   56 ++
 src/vnet/dpo/load_balance.c     |  993 ++++++++++++++++++++++++++++++++
 src/vnet/dpo/load_balance.h     |  211 +++++++
 src/vnet/dpo/load_balance_map.c |  575 +++++++++++++++++++
 src/vnet/dpo/load_balance_map.h |   79 +++
 src/vnet/dpo/lookup_dpo.c       | 1185 +++++++++++++++++++++++++++++++++++++++
 src/vnet/dpo/lookup_dpo.h       |  108 ++++
 src/vnet/dpo/mpls_label_dpo.c   |  570 +++++++++++++++++++
 src/vnet/dpo/mpls_label_dpo.h   |  101 ++++
 src/vnet/dpo/punt_dpo.c         |  100 ++++
 src/vnet/dpo/punt_dpo.h         |   30 +
 src/vnet/dpo/receive_dpo.c      |  165 ++++++
 src/vnet/dpo/receive_dpo.h      |   62 ++
 20 files changed, 5848 insertions(+)
 create mode 100644 src/vnet/dpo/classify_dpo.c
 create mode 100644 src/vnet/dpo/classify_dpo.h
 create mode 100644 src/vnet/dpo/dpo.c
 create mode 100644 src/vnet/dpo/dpo.h
 create mode 100644 src/vnet/dpo/drop_dpo.c
 create mode 100644 src/vnet/dpo/drop_dpo.h
 create mode 100644 src/vnet/dpo/ip_null_dpo.c
 create mode 100644 src/vnet/dpo/ip_null_dpo.h
 create mode 100644 src/vnet/dpo/load_balance.c
 create mode 100644 src/vnet/dpo/load_balance.h
 create mode 100644 src/vnet/dpo/load_balance_map.c
 create mode 100644 src/vnet/dpo/load_balance_map.h
 create mode 100644 src/vnet/dpo/lookup_dpo.c
 create mode 100644 src/vnet/dpo/lookup_dpo.h
 create mode 100644 src/vnet/dpo/mpls_label_dpo.c
 create mode 100644 src/vnet/dpo/mpls_label_dpo.h
 create mode 100644 src/vnet/dpo/punt_dpo.c
 create mode 100644 src/vnet/dpo/punt_dpo.h
 create mode 100644 src/vnet/dpo/receive_dpo.c
 create mode 100644 src/vnet/dpo/receive_dpo.h

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/dpo/classify_dpo.c b/src/vnet/dpo/classify_dpo.c
new file mode 100644
index 00000000..9e7886c9
--- /dev/null
+++ b/src/vnet/dpo/classify_dpo.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+classify_dpo_t *classify_dpo_pool;
+
+static classify_dpo_t *
+classify_dpo_alloc (void)
+{
+    classify_dpo_t *cd;
+
+    pool_get_aligned(classify_dpo_pool, cd, CLIB_CACHE_LINE_BYTES);
+    memset(cd, 0, sizeof(*cd));
+
+    return (cd);
+}
+
+static index_t
+classify_dpo_get_index (classify_dpo_t *cd)
+{
+    return (cd - classify_dpo_pool);
+}
+
+index_t
+classify_dpo_create (dpo_proto_t proto,
+                     u32 classify_table_index)
+{
+    classify_dpo_t *cd;
+
+    cd = classify_dpo_alloc();
+    cd->cd_proto = proto;
+    cd->cd_table_index = classify_table_index;
+
+    return (classify_dpo_get_index(cd));
+}
+
+u8*
+format_classify_dpo (u8 *s, va_list *args)
+{
+    index_t index = va_arg (*args, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+    classify_dpo_t *cd;
+
+    cd = classify_dpo_get(index);
+
+    return (format(s, "%U-classify:[%d]:table:%d",
+		   format_dpo_proto, cd->cd_proto,
+		   index, cd->cd_table_index));
+}
+
+static void
+classify_dpo_lock (dpo_id_t *dpo)
+{
+    classify_dpo_t *cd;
+
+    cd = classify_dpo_get(dpo->dpoi_index);
+
+    cd->cd_locks++;
+}
+
+static void
+classify_dpo_unlock (dpo_id_t *dpo)
+{
+    classify_dpo_t *cd;
+
+    cd = classify_dpo_get(dpo->dpoi_index);
+
+    cd->cd_locks--;
+
+    if (0 == cd->cd_locks)
+    {
+	pool_put(classify_dpo_pool, cd);
+    }
+}
+
+static void
+classify_dpo_mem_show (void)
+{
+    fib_show_memory_usage("Classify",
+			  pool_elts(classify_dpo_pool),
+			  pool_len(classify_dpo_pool),
+			  sizeof(classify_dpo_t));
+}
+
+const static dpo_vft_t cd_vft = {
+    .dv_lock = classify_dpo_lock,
+    .dv_unlock = classify_dpo_unlock,
+    .dv_format = format_classify_dpo,
+    .dv_mem_show = classify_dpo_mem_show,
+};
+
+const static char* const classify_ip4_nodes[] =
+{
+    "ip4-classify",
+    NULL,
+};
+const static char* const classify_ip6_nodes[] =
+{
+    "ip6-classify",
+    NULL,
+};
+const static char* const * const classify_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = classify_ip4_nodes,
+    [DPO_PROTO_IP6]  = classify_ip6_nodes,
+    [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+classify_dpo_module_init (void)
+{
+    dpo_register(DPO_CLASSIFY, &cd_vft, classify_nodes);
+}
diff --git a/src/vnet/dpo/classify_dpo.h b/src/vnet/dpo/classify_dpo.h
new file mode 100644
index 00000000..48f4b2bf
--- /dev/null
+++ b/src/vnet/dpo/classify_dpo.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CLASSIFY_DPO_H__
+#define __CLASSIFY_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct classify_dpo_t
+{
+    dpo_proto_t cd_proto;
+
+    u32 cd_table_index;
+
+    /**
+     * Number of locks/users of the label
+     */
+    u16 cd_locks;
+} classify_dpo_t;
+
+extern index_t classify_dpo_create(dpo_proto_t proto,
+                                   u32 classify_table_index);
+
+extern u8* format_classify_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern classify_dpo_t *classify_dpo_pool;
+
+static inline classify_dpo_t *
+classify_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(classify_dpo_pool, index));
+}
+
+extern void classify_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
new file mode 100644
index 00000000..688d2892
--- /dev/null
+++ b/src/vnet/dpo/dpo.c
@@ -0,0 +1,500 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP.
+ * 
+ * The DPO is a base class that is specialised by other objects to provide
+ * concreate actions
+ *
+ * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances.
+ */
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/format.h>
+#include <vnet/adj/adj.h>
+
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+
+/**
+ * Array of char* names for the DPO types and protos
+ */
+static const char* dpo_type_names[] = DPO_TYPES;
+static const char* dpo_proto_names[] = DPO_PROTOS;
+
+/**
+ * @brief Vector of virtual function tables for the DPO types
+ *
+ * This is a vector so we can dynamically register new DPO types in plugins.
+ */
+static dpo_vft_t *dpo_vfts;
+
+/**
+ * @brief vector of graph node names associated with each DPO type and protocol.
+ *
+ *   dpo_nodes[child_type][child_proto][node_X] = node_name;
+ * i.e.
+ *   dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][0] = "ip4-lookup"
+ *   dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][1] = "ip4-load-balance"
+ *
+ * This is a vector so we can dynamically register new DPO types in plugins.
+ */
+static const char* const * const ** dpo_nodes;
+
+/**
+ * @brief Vector of edge indicies from parent DPO nodes to child
+ *
+ * dpo_edges[child_type][child_proto][parent_type][parent_proto] = edge_index
+ *
+ * This array is derived at init time from the dpo_nodes above. Note that
+ * the third dimension in dpo_nodes is lost, hence, the edge index from each
+ * node MUST be the same.
+ * Including both the child and parent protocol is required to support the
+ * case where it changes as the grapth is traversed, most notablly when an
+ * MPLS label is popped.
+ *
+ * Note that this array is child type specific, not child instance specific.
+ */
+static u32 ****dpo_edges;
+
+/**
+ * @brief The DPO type value that can be assigend to the next dynamic
+ *        type registration.
+ */
+static dpo_type_t dpo_dynamic = DPO_LAST;
+
+dpo_proto_t
+vnet_link_to_dpo_proto (vnet_link_t linkt)
+{
+    switch (linkt)
+    {
+    case VNET_LINK_IP6:
+        return (DPO_PROTO_IP6);
+    case VNET_LINK_IP4:
+        return (DPO_PROTO_IP4);
+    case VNET_LINK_MPLS:
+        return (DPO_PROTO_MPLS);
+    case VNET_LINK_ETHERNET:
+        return (DPO_PROTO_ETHERNET);
+    case VNET_LINK_ARP:
+	break;
+    }
+    ASSERT(0);
+    return (0);
+}
+
+u8 *
+format_dpo_type (u8 * s, va_list * args)
+{
+    dpo_type_t type = va_arg (*args, int);
+
+    s = format(s, "%s", dpo_type_names[type]);
+
+    return (s);
+}
+
+u8 *
+format_dpo_id (u8 * s, va_list * args)
+{
+    dpo_id_t *dpo = va_arg (*args, dpo_id_t*);
+    u32 indent = va_arg (*args, u32);
+
+    s = format(s, "[@%d]: ", dpo->dpoi_next_node);
+
+    if (NULL != dpo_vfts[dpo->dpoi_type].dv_format)
+    {
+        return (format(s, "%U",
+                       dpo_vfts[dpo->dpoi_type].dv_format,
+                       dpo->dpoi_index,
+                       indent));
+    }
+
+    switch (dpo->dpoi_type)
+    {
+    case DPO_FIRST:
+	s = format(s, "unset");
+	break;
+    default:
+	s = format(s, "unknown");
+	break;
+    }
+    return (s);
+}
+
+u8 *
+format_dpo_proto (u8 * s, va_list * args)
+{
+    dpo_proto_t proto = va_arg (*args, int);
+
+    return (format(s, "%s", dpo_proto_names[proto]));
+}
+
+void
+dpo_set (dpo_id_t *dpo,
+	 dpo_type_t type,
+	 dpo_proto_t proto,
+	 index_t index)
+{
+    dpo_id_t tmp = *dpo;
+
+    dpo->dpoi_type = type;
+    dpo->dpoi_proto = proto,
+    dpo->dpoi_index = index;
+
+    if (DPO_ADJACENCY == type)
+    {
+	/*
+	 * set the adj subtype
+	 */
+	ip_adjacency_t *adj;
+
+	adj = adj_get(index);
+
+	switch (adj->lookup_next_index)
+	{
+	case IP_LOOKUP_NEXT_ARP:
+	    dpo->dpoi_type = DPO_ADJACENCY_INCOMPLETE;
+	    break;
+	case IP_LOOKUP_NEXT_MIDCHAIN:
+	    dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN;
+	    break;
+	default:
+	    break;
+	}
+    }
+    dpo_lock(dpo);
+    dpo_unlock(&tmp);
+}
+
+void
+dpo_reset (dpo_id_t *dpo)
+{
+    dpo_id_t tmp = DPO_INVALID;
+
+    /*
+     * use the atomic copy operation.
+     */
+    dpo_copy(dpo, &tmp);
+}
+
+/**
+ * \brief
+ * Compare two Data-path objects
+ *
+ * like memcmp, return 0 is matching, !0 otherwise.
+ */
+int
+dpo_cmp (const dpo_id_t *dpo1,
+	 const dpo_id_t *dpo2)
+{
+    int res;
+
+    res = dpo1->dpoi_type - dpo2->dpoi_type;
+
+    if (0 != res) return (res);
+
+    return (dpo1->dpoi_index - dpo2->dpoi_index);
+}
+
+void
+dpo_copy (dpo_id_t *dst,
+	  const dpo_id_t *src)
+{
+    dpo_id_t tmp = *dst;
+
+    /*
+     * the destination is written in a single u64 write - hence atomically w.r.t
+     * any packets inflight.
+     */
+    *((u64*)dst) = *(u64*)src; 
+
+    dpo_lock(dst);
+    dpo_unlock(&tmp);    
+}
+
+int
+dpo_is_adj (const dpo_id_t *dpo)
+{
+    return ((dpo->dpoi_type == DPO_ADJACENCY) ||
+	    (dpo->dpoi_type == DPO_ADJACENCY_INCOMPLETE) ||
+	    (dpo->dpoi_type == DPO_ADJACENCY_MIDCHAIN) ||
+	    (dpo->dpoi_type == DPO_ADJACENCY_GLEAN));
+}
+
+void
+dpo_register (dpo_type_t type,
+	      const dpo_vft_t *vft,
+              const char * const * const * nodes)
+{
+    vec_validate(dpo_vfts, type);
+    dpo_vfts[type] = *vft;
+
+    vec_validate(dpo_nodes, type);
+    dpo_nodes[type] = nodes;
+}
+
+dpo_type_t
+dpo_register_new_type (const dpo_vft_t *vft,
+                       const char * const * const * nodes)
+{
+    dpo_type_t type = dpo_dynamic++;
+
+    dpo_register(type, vft, nodes);
+
+    return (type);
+}
+
+void
+dpo_lock (dpo_id_t *dpo)
+{
+    if (!dpo_id_is_valid(dpo))
+	return;
+
+    dpo_vfts[dpo->dpoi_type].dv_lock(dpo);
+}
+
+void
+dpo_unlock (dpo_id_t *dpo)
+{
+    if (!dpo_id_is_valid(dpo))
+	return;
+
+    dpo_vfts[dpo->dpoi_type].dv_unlock(dpo);
+}
+
+
+static u32
+dpo_get_next_node (dpo_type_t child_type,
+                   dpo_proto_t child_proto,
+                   const dpo_id_t *parent_dpo)
+{
+    dpo_proto_t parent_proto;
+    dpo_type_t parent_type;
+
+    parent_type = parent_dpo->dpoi_type;
+    parent_proto = parent_dpo->dpoi_proto;
+
+    vec_validate(dpo_edges, child_type);
+    vec_validate(dpo_edges[child_type], child_proto);
+    vec_validate(dpo_edges[child_type][child_proto], parent_type);
+    vec_validate_init_empty(
+        dpo_edges[child_type][child_proto][parent_type],
+        parent_proto, ~0);
+
+    /*
+     * if the edge index has not yet been created for this node to node transistion
+     */
+    if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto])
+    {
+        vlib_node_t *parent_node, *child_node;
+        vlib_main_t *vm;
+        u32 edge ,pp, cc;
+
+        vm = vlib_get_main();
+
+        ASSERT(NULL != dpo_nodes[child_type]);
+        ASSERT(NULL != dpo_nodes[child_type][child_proto]);
+        ASSERT(NULL != dpo_nodes[parent_type]);
+        ASSERT(NULL != dpo_nodes[parent_type][parent_proto]);
+
+        cc = 0;
+
+        /*
+         * create a graph arc from each of the parent's registered node types,
+         * to each of the childs.
+         */
+        while (NULL != dpo_nodes[child_type][child_proto][cc])
+        {
+            child_node =
+                vlib_get_node_by_name(vm,
+                                      (u8*) dpo_nodes[child_type][child_proto][cc]);
+
+            pp = 0;
+
+            while (NULL != dpo_nodes[parent_type][parent_proto][pp])
+            {
+                parent_node =
+                    vlib_get_node_by_name(vm,
+                                          (u8*) dpo_nodes[parent_type][parent_proto][pp]);
+
+                edge = vlib_node_add_next(vm,
+                                          child_node->index,
+                                          parent_node->index);
+
+                if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto])
+                {
+                    dpo_edges[child_type][child_proto][parent_type][parent_proto] = edge;
+                }
+                else
+                {
+                    ASSERT(dpo_edges[child_type][child_proto][parent_type][parent_proto] == edge);
+                }
+                pp++;
+            }
+            cc++;
+        }
+    }
+
+    return (dpo_edges[child_type][child_proto][parent_type][parent_proto]);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child parent
+ * relationship. The VLIB graph arc used is taken from the parent and child types
+ * passed.
+ */
+static void
+dpo_stack_i (u32 edge,
+             dpo_id_t *dpo,
+             const dpo_id_t *parent)
+{
+    /*
+     * in order to get an atomic update of the parent we create a temporary,
+     * from a copy of the child, and add the next_node. then we copy to the parent
+     */
+    dpo_id_t tmp = DPO_INVALID;
+    dpo_copy(&tmp, parent);
+
+    /*
+     * get the edge index for the parent to child VLIB graph transisition
+     */
+    tmp.dpoi_next_node = edge;
+
+    /*
+     * this update is atomic.
+     */
+    dpo_copy(dpo, &tmp);
+
+    dpo_reset(&tmp);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child-parent
+ * relationship. The VLIB graph arc used is taken from the parent and child types
+ * passed.
+ */
+void
+dpo_stack (dpo_type_t child_type,
+           dpo_proto_t child_proto,
+           dpo_id_t *dpo,
+           const dpo_id_t *parent)
+{
+    dpo_stack_i(dpo_get_next_node(child_type, child_proto, parent), dpo, parent);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child parent
+ * relationship. A new VLIB graph arc is created from the child node passed
+ * to the nodes registered by the parent. The VLIB infra will ensure this arc
+ * is added only once.
+ */
+void
+dpo_stack_from_node (u32 child_node_index,
+                     dpo_id_t *dpo,
+                     const dpo_id_t *parent)
+{
+    dpo_proto_t parent_proto;
+    vlib_node_t *parent_node;
+    dpo_type_t parent_type;
+    vlib_main_t *vm;
+    u32 edge;
+
+    parent_type = parent->dpoi_type;
+    parent_proto = parent->dpoi_proto;
+
+    vm = vlib_get_main();
+
+    ASSERT(NULL != dpo_nodes[parent_type]);
+    ASSERT(NULL != dpo_nodes[parent_type][parent_proto]);
+
+    parent_node =
+        vlib_get_node_by_name(vm, (u8*) dpo_nodes[parent_type][parent_proto][0]);
+
+    edge = vlib_node_add_next(vm,
+                              child_node_index,
+                              parent_node->index);
+
+    dpo_stack_i(edge, dpo, parent);
+}
+
+static clib_error_t *
+dpo_module_init (vlib_main_t * vm)
+{
+    drop_dpo_module_init();
+    punt_dpo_module_init();
+    receive_dpo_module_init();
+    load_balance_module_init();
+    mpls_label_dpo_module_init();
+    classify_dpo_module_init();
+    lookup_dpo_module_init();
+    ip_null_dpo_module_init();
+
+    return (NULL);
+}
+
+VLIB_INIT_FUNCTION(dpo_module_init);
+
+static clib_error_t *
+dpo_memory_show (vlib_main_t * vm,
+		 unformat_input_t * input,
+		 vlib_cli_command_t * cmd)
+{
+    dpo_vft_t *vft;
+
+    vlib_cli_output (vm, "DPO memory");
+    vlib_cli_output (vm, "%=30s %=5s %=8s/%=9s   totals",
+		     "Name","Size", "in-use", "allocated");
+
+    vec_foreach(vft, dpo_vfts)
+    {
+	if (NULL != vft->dv_mem_show)
+	    vft->dv_mem_show();
+    }
+
+    return (NULL);
+}
+
+/* *INDENT-OFF* */
+/*?
+ * The '<em>sh dpo memory </em>' command displays the memory usage for each
+ * data-plane object type.
+ *
+ * @cliexpar
+ * @cliexstart{show dpo memory}
+ * DPO memory
+ *             Name               Size  in-use /allocated   totals
+ *         load-balance            64     12   /    12      768/768
+ *           Adjacency            256      1   /    1       256/256
+ *            Receive              24      5   /    5       120/120
+ *            Lookup               12      0   /    0       0/0
+ *           Classify              12      0   /    0       0/0
+ *          MPLS label             24      0   /    0       0/0
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (show_fib_memory, static) = {
+    .path = "show dpo memory",
+    .function = dpo_memory_show,
+    .short_help = "show dpo memory",
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
new file mode 100644
index 00000000..1efcbc88
--- /dev/null
+++ b/src/vnet/dpo/dpo.h
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP's data-path.
+ * 
+ * The DPO can be considered to be like is a base class that is specialised
+ * by other objects to provide concreate actions
+ *
+ * The VLIB graph nodes are graph of DPO types, the DPO graph is a graph of
+ * instances.
+ */
+
+#ifndef __DPO_H__
+#define __DPO_H__
+
+#include <vnet/vnet.h>
+
+/**
+ * @brief An index for adjacencies.
+ * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of
+ * an index_t. However, for us humans, we can glean much more intent
+ * from the declaration
+ *  foo barindex_t t);
+ * than we can from
+ *  foo bar(u32 t);
+ */
+typedef u32 index_t;
+
+/**
+ * @brief Invalid index - used when no index is known
+ * blazoned capitals INVALID speak volumes where ~0 does not.
+ */
+#define INDEX_INVALID ((index_t)(~0))
+
+/**
+ * @brief Data path protocol.
+ * Actions performed on packets in the data-plane can be described and represented
+ * by protocol independent objects, i.e. ADJACENCY, but the spceifics actions
+ * required during ADJACENCY processing can be protocol dependent. For example,
+ * the adjacency rewrite node performs a ip4 checksum calculation,  ip6 and MPLS
+ * do not, all 3 perform a TTL decrement. The VLIB graph nodes are thus protocol
+ * dependent, and thus each graph edge/arc is too.
+ * When programming a DPO's next node arc from child to parent it is thus required
+ * to know the parent's data-path protocol so the correct arc index can be used.
+ */
+typedef enum dpo_proto_t_
+{
+#if CLIB_DEBUG > 0
+    DPO_PROTO_IP4 = 1,
+#else
+    DPO_PROTO_IP4 = 0,
+#endif
+    DPO_PROTO_IP6,
+    DPO_PROTO_ETHERNET,
+    DPO_PROTO_MPLS,
+} __attribute__((packed)) dpo_proto_t;
+
+#define DPO_PROTO_NUM ((dpo_proto_t)(DPO_PROTO_MPLS+1))
+#define DPO_PROTO_NONE ((dpo_proto_t)(DPO_PROTO_NUM+1))
+
+#define DPO_PROTOS {		\
+    [DPO_PROTO_IP4]  = "ip4",	\
+    [DPO_PROTO_IP6]  = "ip6",	\
+    [DPO_PROTO_ETHERNET]  = "ethernet", \
+    [DPO_PROTO_MPLS] = "mpls",	\
+}
+
+#define FOR_EACH_DPO_PROTO(_proto)    \
+    for (_proto = DPO_PROTO_IP4;      \
+	 _proto <= DPO_PROTO_MPLS;    \
+	 _proto++)
+
+/**
+ * @brief Common types of data-path objects
+ * New types can be dynamically added using dpo_register_new_type()
+ */
+typedef enum dpo_type_t_ {
+    /**
+     * A non-zero value first so we can spot unitialisation errors
+     */
+    DPO_FIRST,
+    DPO_DROP,
+    DPO_IP_NULL,
+    DPO_PUNT,
+    /**
+     * @brief load-balancing over a choice of [un]equal cost paths
+     */
+    DPO_LOAD_BALANCE,
+    DPO_ADJACENCY,
+    DPO_ADJACENCY_INCOMPLETE,
+    DPO_ADJACENCY_MIDCHAIN,
+    DPO_ADJACENCY_GLEAN,
+    DPO_RECEIVE,
+    DPO_LOOKUP,
+    DPO_LISP_CP,
+    DPO_CLASSIFY,
+    DPO_MPLS_LABEL,
+    DPO_LAST,
+} __attribute__((packed)) dpo_type_t;
+
+#define DPO_TYPE_NUM DPO_LAST
+
+#define DPO_TYPES {			\
+    [DPO_FIRST] = "dpo-invalid",	\
+    [DPO_DROP] = "dpo-drop",	\
+    [DPO_IP_NULL] = "dpo-ip-null",		\
+    [DPO_PUNT] = "dpo-punt",	\
+    [DPO_ADJACENCY] = "dpo-adjacency",	\
+    [DPO_ADJACENCY_INCOMPLETE] = "dpo-adjacency-incomplete",	\
+    [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin",	\
+    [DPO_ADJACENCY_GLEAN] = "dpo-glean",	\
+    [DPO_RECEIVE] = "dpo-receive",	\
+    [DPO_LOOKUP] = "dpo-lookup",	\
+    [DPO_LOAD_BALANCE] = "dpo-load-balance",	\
+    [DPO_LISP_CP] = "dpo-lisp-cp",	\
+    [DPO_CLASSIFY] = "dpo-classify",	\
+    [DPO_MPLS_LABEL] = "dpo-mpls-label"	\
+}
+
+/**
+ * @brief The identity of a DPO is a combination of its type and its
+ * instance number/index of objects of that type
+ */
+typedef struct dpo_id_t_ {
+    /**
+     * the type
+     */
+    dpo_type_t dpoi_type;
+    /**
+     * the data-path protocol of the type.
+     */
+    dpo_proto_t dpoi_proto;
+    /**
+     * The next VLIB node to follow.
+     */
+    u16 dpoi_next_node;
+    /**
+     * the index of objects of that type
+     */
+    index_t dpoi_index;
+} __attribute__ ((aligned(sizeof(u64)))) dpo_id_t;
+
+STATIC_ASSERT(sizeof(dpo_id_t) <= sizeof(u64),
+	      "DPO ID is greater than sizeof u64 "
+	      "atomic updates need to be revisited");
+
+/**
+ * @brief An initialiser for DPOs declared on the stack.
+ * Thenext node is set to 0 since VLIB graph nodes should set 0 index to drop.
+ */
+#define DPO_INVALID                \
+{                                  \
+    .dpoi_type = DPO_FIRST,        \
+    .dpoi_proto = DPO_PROTO_NONE,  \
+    .dpoi_index = INDEX_INVALID,   \
+    .dpoi_next_node = 0,           \
+}
+
+/**
+ * @brief Return true if the DPO object is valid, i.e. has been initialised.
+ */
+static inline int
+dpo_id_is_valid (const dpo_id_t *dpoi)
+{
+    return (dpoi->dpoi_type != DPO_FIRST &&
+	    dpoi->dpoi_index != INDEX_INVALID);
+}
+
+extern dpo_proto_t vnet_link_to_dpo_proto(vnet_link_t linkt);
+
+/**
+ * @brief
+ *  Take a reference counting lock on the DPO
+ */
+extern void dpo_lock(dpo_id_t *dpo);
+
+/**
+ * @brief
+ *  Release a reference counting lock on the DPO
+ */
+extern void dpo_unlock(dpo_id_t *dpo);
+
+/**
+ * @brief Set/create a DPO ID
+ * The DPO will be locked.
+ *
+ * @param dpo
+ *  The DPO object to configure
+ *
+ * @param type
+ *  The dpo_type_t of the DPO
+ *
+ * @param proto
+ *  The dpo_proto_t of the DPO
+ *
+ * @param index
+ *  The type specific index of the DPO
+ */
+extern void dpo_set(dpo_id_t *dpo,
+		    dpo_type_t type,
+		    dpo_proto_t proto,
+		    index_t index);
+
+/**
+ * @brief reset a DPO ID
+ * The DPO will be unlocked.
+ *
+ * @param dpo
+ *  The DPO object to reset
+ */
+extern void dpo_reset(dpo_id_t *dpo);
+
+/**
+ * @brief compare two DPOs for equality
+ */
+extern int dpo_cmp(const dpo_id_t *dpo1,
+		   const dpo_id_t *dpo2);
+
+/**
+ * @brief
+ *  atomic copy a data-plane object.
+ * This is safe to use when the dst DPO is currently switching packets
+ */
+extern void dpo_copy(dpo_id_t *dst,
+		     const dpo_id_t *src);
+
+/**
+ * @brief Return TRUE is the DPO is any type of adjacency
+ */
+extern int dpo_is_adj(const dpo_id_t *dpo);
+
+/**
+ * @biref Format a DPO_id_t oject
+ */
+extern u8 *format_dpo_id(u8 * s, va_list * args);
+
+/**
+ * @biref format a DPO type
+ */
+extern u8 *format_dpo_type(u8 * s, va_list * args);
+
+/**
+ * @brief format a DPO protocol
+ */
+extern u8 *format_dpo_proto(u8 * s, va_list * args);
+
+/**
+ * @brief
+ *  Set and stack a DPO.
+ *  The DPO passed is set to the parent DPO and the necessary
+ *  VLIB graph arcs are created. The child_type and child_proto
+ * are used to get the VLID nodes from which the arcs are added.
+ *
+ * @param child_type
+ *  Child DPO type.
+ *
+ * @param child_proto
+ *  Child DPO proto
+ *
+ * @parem dpo
+ *  This is the DPO to stack and set.
+ *
+ * @paren parent_dpo
+ *  The parent DPO to stack onto.
+ */
+extern void dpo_stack(dpo_type_t child_type,
+                      dpo_proto_t child_proto,
+                      dpo_id_t *dpo,
+                      const dpo_id_t *parent_dpo);
+
+/**
+ * @brief 
+ *  Set and stack a DPO.
+ *  The DPO passed is set to the parent DPO and the necessary
+ *  VLIB graph arcs are created, from the child_node passed.
+ *
+ * @param child_node
+ *  The VLIB grpah node index to create an arc from to the parent
+ *
+ * @parem dpo
+ *  This is the DPO to stack and set.
+ *
+ * @paren parent_dpo
+ *  The parent DPO to stack onto.
+ */ 
+extern void dpo_stack_from_node(u32 child_node,
+                                dpo_id_t *dpo,
+                                const dpo_id_t *parent);
+
+/**
+ * @brief  A lock function registered for a DPO type
+ */
+typedef void (*dpo_lock_fn_t)(dpo_id_t *dpo);
+
+/**
+ * @brief An unlock function registered for a DPO type
+ */
+typedef void (*dpo_unlock_fn_t)(dpo_id_t *dpo);
+
+/**
+ * @brief An memory usage show command
+ */
+typedef void (*dpo_mem_show_t)(void);
+
+/**
+ * @brief A virtual function table regisitered for a DPO type
+ */
+typedef struct dpo_vft_t_
+{
+    /**
+     * A reference counting lock function
+     */
+    dpo_lock_fn_t dv_lock;
+    /**
+     * A reference counting unlock function
+     */
+    dpo_lock_fn_t dv_unlock;
+    /**
+     * A format function
+     */
+    format_function_t *dv_format;
+    /**
+     * A show memory usage function
+     */
+    dpo_mem_show_t dv_mem_show;
+} dpo_vft_t;
+
+
+/**
+ * @brief For a given DPO type Register:
+ *   - a virtual function table
+ *   - a NULL terminated array of graph nodes from which that object type
+ *     will originate packets, i.e. the nodes in which the object type will be
+ *     the parent DPO in the DP graph. The ndoes are per-data-path protocol
+ *     (see above).
+ *
+ * @param type
+ *  The type being registered. 
+ *
+ * @param vft
+ *  The virtual function table to register for the type.
+ *
+ * @param nodes
+ *  The string description of the per-protocol VLIB graph nodes.
+ */
+extern void dpo_register(dpo_type_t type,
+			 const dpo_vft_t *vft,
+			 const char * const * const * nodes);
+
+/**
+ * @brief Create and register a new DPO type.
+ *
+ * This can be used by plugins to create new DPO types that are not listed
+ * in dpo_type_t enum
+ *
+ * @param vft
+ *  The virtual function table to register for the type.
+ *
+ * @param nodes
+ *  The string description of the per-protocol VLIB graph nodes.
+ *
+ * @return The new dpo_type_t
+ */
+extern dpo_type_t dpo_register_new_type(const dpo_vft_t *vft,
+					const char * const * const * nodes);
+
+#endif
diff --git a/src/vnet/dpo/drop_dpo.c b/src/vnet/dpo/drop_dpo.c
new file mode 100644
index 00000000..5118d2a4
--- /dev/null
+++ b/src/vnet/dpo/drop_dpo.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing dropping the packet
+ */
+
+#include <vnet/dpo/dpo.h>
+
+static dpo_id_t drop_dpos[DPO_PROTO_NUM];
+
+const dpo_id_t *
+drop_dpo_get (dpo_proto_t proto)
+{
+    dpo_set(&drop_dpos[proto], DPO_DROP, proto, proto);
+
+    return (&drop_dpos[proto]);
+}
+
+int
+dpo_is_drop (const dpo_id_t *dpo)
+{
+    return (dpo->dpoi_type == DPO_DROP);
+}
+
+static void
+drop_dpo_lock (dpo_id_t *dpo)
+{
+    /*
+     * not maintaining a lock count on the drop
+     * more trouble than it's worth.
+     * There always needs to be one around. no point it managaing its lifetime
+     */
+}
+static void
+drop_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_drop_dpo (u8 *s, va_list *ap)
+{
+    CLIB_UNUSED(index_t index) = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+
+    return (format(s, "dpo-drop %U", format_dpo_proto, index));
+}
+
+const static dpo_vft_t drop_vft = {
+    .dv_lock   = drop_dpo_lock,
+    .dv_unlock = drop_dpo_unlock,
+    .dv_format = format_drop_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a drop
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a drop is the
+ * parent object in the DPO-graph.
+ */
+const static char* const drop_ip4_nodes[] =
+{
+    "ip4-drop",
+    NULL,
+};
+const static char* const drop_ip6_nodes[] =
+{
+    "ip6-drop",
+    NULL,
+};
+const static char* const drop_mpls_nodes[] =
+{
+    "mpls-drop",
+    NULL,
+};
+const static char* const drop_ethernet_nodes[] =
+{
+    "error-drop",
+    NULL,
+};
+const static char* const * const drop_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = drop_ip4_nodes,
+    [DPO_PROTO_IP6]  = drop_ip6_nodes,
+    [DPO_PROTO_MPLS] = drop_mpls_nodes,
+    [DPO_PROTO_ETHERNET] = drop_ethernet_nodes,
+};
+
+void
+drop_dpo_module_init (void)
+{
+    dpo_register(DPO_DROP, &drop_vft, drop_nodes);
+}
diff --git a/src/vnet/dpo/drop_dpo.h b/src/vnet/dpo/drop_dpo.h
new file mode 100644
index 00000000..436df36c
--- /dev/null
+++ b/src/vnet/dpo/drop_dpo.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The Drop DPO will drop all packets, no questions asked. It is valid
+ * for any packet protocol.
+ */
+
+#ifndef __DROP_DPO_H__
+#define __DROP_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern int dpo_is_drop(const dpo_id_t *dpo);
+
+extern const dpo_id_t *drop_dpo_get(dpo_proto_t proto);
+
+extern void drop_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/ip_null_dpo.c b/src/vnet/dpo/ip_null_dpo.c
new file mode 100644
index 00000000..22682e4e
--- /dev/null
+++ b/src/vnet/dpo/ip_null_dpo.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing dropping the packet
+ */
+
+#include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/ip/ip.h>
+
+/**
+ * @brief A representation of the IP_NULL DPO
+ */
+typedef struct ip_null_dpo_t_
+{
+    /**
+     * @brief The action to take on a packet
+     */
+    ip_null_dpo_action_t ind_action;
+    /**
+     * @brief The next VLIB node
+     */
+    u32 ind_next_index;
+    /**
+     * rate limits
+     */
+} ip_null_dpo_t;
+
+/**
+ * @brief the IP_NULL dpos are shared by all routes, hence they are global.
+ * As the neame implies this is only for IP, hence 2.
+ */
+static ip_null_dpo_t ip_null_dpos[2 * IP_NULL_DPO_ACTION_NUM] = {
+    [0] = {
+	/* proto ip4, no action */
+	.ind_action = IP_NULL_ACTION_NONE,
+    },
+    [1] = {
+	/* proto ip4, action send unreach */
+	.ind_action = IP_NULL_ACTION_SEND_ICMP_UNREACH,
+    },
+    [2] = {
+	/* proto ip4, action send unreach */
+	.ind_action = IP_NULL_ACTION_SEND_ICMP_PROHIBIT,
+    },
+    [3] = {
+	/* proto ip6, no action */
+	.ind_action = IP_NULL_ACTION_NONE,
+    },
+    [4] = {
+	/* proto ip6, action send unreach */
+	.ind_action = IP_NULL_ACTION_SEND_ICMP_UNREACH,
+    },
+    [5] = {
+	/* proto ip6, action send unreach */
+	.ind_action = IP_NULL_ACTION_SEND_ICMP_PROHIBIT,
+    },
+};
+
+/**
+ * @brief Action strings
+ */
+const char *ip_null_action_strings[] = IP_NULL_ACTIONS;
+
+void
+ip_null_dpo_add_and_lock (dpo_proto_t proto,
+			  ip_null_dpo_action_t action,
+			  dpo_id_t *dpo)
+{
+    int i;
+
+    ASSERT((proto == DPO_PROTO_IP4) ||
+	   (proto == DPO_PROTO_IP6));
+    ASSERT(action < IP_NULL_DPO_ACTION_NUM);
+
+    i = (proto == DPO_PROTO_IP4 ? 0 : 1);
+
+    dpo_set(dpo, DPO_IP_NULL, proto, (i*IP_NULL_DPO_ACTION_NUM) + action);
+}
+
+always_inline const ip_null_dpo_t*
+ip_null_dpo_get (index_t indi)
+{
+    return (&ip_null_dpos[indi]);
+}
+
+static void
+ip_null_dpo_lock (dpo_id_t *dpo)
+{
+    /*
+     * not maintaining a lock count on the ip_null, they are const global and
+     * never die.
+     */
+}
+static void
+ip_null_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_ip_null_dpo (u8 *s, va_list *ap)
+{
+    index_t index = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    const ip_null_dpo_t *ind;
+    dpo_proto_t proto;
+
+    ind = ip_null_dpo_get(index);
+    proto = (index < IP_NULL_DPO_ACTION_NUM ? DPO_PROTO_IP4 : DPO_PROTO_IP6);
+
+    return (format(s, "%U-null action:%s",
+		   format_dpo_proto, proto,
+		   ip_null_action_strings[ind->ind_action]));
+}
+
+const static dpo_vft_t ip_null_vft = {
+    .dv_lock   = ip_null_dpo_lock,
+    .dv_unlock = ip_null_dpo_unlock,
+    .dv_format = format_ip_null_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a ip_null
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a ip_null is the
+ * parent object in the DPO-graph.
+ */
+const static char* const ip4_null_nodes[] =
+{
+    "ip4-null",
+    NULL,
+};
+const static char* const ip6_null_nodes[] =
+{
+    "ip6-null",
+    NULL,
+};
+
+const static char* const * const ip_null_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4] = ip4_null_nodes,
+    [DPO_PROTO_IP6] = ip6_null_nodes,
+};
+
+typedef struct ip_null_dpo_trace_t_
+{
+    index_t ind_index;
+} ip_null_dpo_trace_t;
+
+/**
+ * @brief Exit nodes from a IP_NULL
+ */
+typedef enum ip_null_next_t_
+{
+    IP_NULL_NEXT_DROP,
+    IP_NULL_NEXT_ICMP,
+    IP_NULL_NEXT_NUM,
+} ip_null_next_t;
+
+always_inline uword
+ip_null_dpo_switch (vlib_main_t * vm,
+		    vlib_node_runtime_t * node,
+		    vlib_frame_t * frame,
+		    u8 is_ip4)
+{
+    u32 n_left_from, next_index, *from, *to_next;
+    static f64 time_last_seed_change = -1e100;
+    static u32 hash_seeds[3];
+    static uword hash_bitmap[256 / BITS (uword)];
+    f64 time_now;
+
+    from = vlib_frame_vector_args (frame);
+    n_left_from = frame->n_vectors;
+
+    time_now = vlib_time_now (vm);
+    if (time_now - time_last_seed_change > 1e-1)
+    {
+	uword i;
+	u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
+					       sizeof (hash_seeds));
+	for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+	    hash_seeds[i] = r[i];
+
+	/* Mark all hash keys as been not-seen before. */
+	for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+	    hash_bitmap[i] = 0;
+
+	time_last_seed_change = time_now;
+    }
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+	u32 n_left_to_next;
+
+	vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+	while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	    u32 a0, b0, c0, m0, drop0;
+	    vlib_buffer_t *p0;
+	    u32 bi0, indi0, next0;
+	    const ip_null_dpo_t *ind0;
+	    uword bm0;
+
+	    bi0 = from[0];
+	    to_next[0] = bi0;
+	    from += 1;
+	    to_next += 1;
+	    n_left_from -= 1;
+	    n_left_to_next -= 1;
+
+	    p0 = vlib_get_buffer (vm, bi0);
+
+	    /* lookup dst + src mac */
+	    indi0 =  vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+	    ind0 = ip_null_dpo_get(indi0);
+	    next0 = IP_NULL_NEXT_DROP;
+
+	    /*
+	     * rate limit - don't DoS the sender.
+	     */
+	    a0 = hash_seeds[0];
+	    b0 = hash_seeds[1];
+	    c0 = hash_seeds[2];
+
+	    if (is_ip4)
+	    {
+		ip4_header_t *ip0 = vlib_buffer_get_current (p0);
+
+		a0 ^= ip0->dst_address.data_u32;
+		b0 ^= ip0->src_address.data_u32;
+
+		hash_v3_finalize32 (a0, b0, c0);
+	    }
+	    else
+	    {
+		ip6_header_t *ip0 = vlib_buffer_get_current (p0);
+
+		a0 ^= ip0->dst_address.as_u32[0];
+		b0 ^= ip0->src_address.as_u32[0];
+		c0 ^= ip0->src_address.as_u32[1];
+
+		hash_v3_mix32 (a0, b0, c0);
+
+		a0 ^= ip0->dst_address.as_u32[1];
+		b0 ^= ip0->src_address.as_u32[2];
+		c0 ^= ip0->src_address.as_u32[3];
+
+		hash_v3_finalize32 (a0, b0, c0);
+	    }
+
+	    c0 &= BITS (hash_bitmap) - 1;
+	    c0 = c0 / BITS (uword);
+	    m0 = (uword) 1 << (c0 % BITS (uword));
+
+	    bm0 = hash_bitmap[c0];
+	    drop0 = (bm0 & m0) != 0;
+
+	    /* Mark it as seen. */
+	    hash_bitmap[c0] = bm0 | m0;
+
+	    if (PREDICT_FALSE(!drop0))
+	    {
+		if (is_ip4)
+		{
+		    /*
+		     * There's a trade-off here. This conditinal statement
+		     * versus a graph node per-condition. Given the number
+		     * expect number of packets to reach a null route is 0
+		     * we favour the run-time cost over the graph complexity
+		     */
+		    if (IP_NULL_ACTION_SEND_ICMP_UNREACH == ind0->ind_action)
+		    {
+			next0 = IP_NULL_NEXT_ICMP;
+			icmp4_error_set_vnet_buffer(
+			    p0,
+			    ICMP4_destination_unreachable,
+			    ICMP4_destination_unreachable_destination_unreachable_host,
+			    0);
+		    }
+		    else if (IP_NULL_ACTION_SEND_ICMP_PROHIBIT == ind0->ind_action)
+		    {
+			next0 = IP_NULL_NEXT_ICMP;
+			icmp4_error_set_vnet_buffer(
+			    p0,
+			    ICMP4_destination_unreachable,
+			    ICMP4_destination_unreachable_host_administratively_prohibited,
+			    0);
+		    }
+		}
+		else
+		{
+		    if (IP_NULL_ACTION_SEND_ICMP_UNREACH == ind0->ind_action)
+		    {
+			next0 = IP_NULL_NEXT_ICMP;
+			icmp6_error_set_vnet_buffer(
+			    p0,
+			    ICMP6_destination_unreachable,
+			    ICMP6_destination_unreachable_no_route_to_destination,
+			    0);
+		    }
+		    else if (IP_NULL_ACTION_SEND_ICMP_PROHIBIT == ind0->ind_action)
+		    {
+			next0 = IP_NULL_NEXT_ICMP;
+			icmp6_error_set_vnet_buffer(
+			    p0,
+			    ICMP6_destination_unreachable,
+			    ICMP6_destination_unreachable_destination_administratively_prohibited,
+			    0);
+		    }
+		}
+	    }
+
+	    if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		ip_null_dpo_trace_t *tr = vlib_add_trace (vm, node, p0,
+							  sizeof (*tr));
+		tr->ind_index = indi0;
+	    }
+	    vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					     n_left_to_next, bi0, next0);
+	}
+
+	vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+    return frame->n_vectors;
+}
+
+static u8 *
+format_ip_null_dpo_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  ip_null_dpo_trace_t *t = va_arg (*args, ip_null_dpo_trace_t *);
+
+  s = format (s, "%U", format_ip_null_dpo, t->ind_index, 0);
+  return s;
+}
+
+static uword
+ip4_null_dpo_switch (vlib_main_t * vm,
+		    vlib_node_runtime_t * node,
+		    vlib_frame_t * frame)
+{
+    return (ip_null_dpo_switch(vm, node, frame, 1));
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (ip4_null_dpo_node) = {
+  .function = ip4_null_dpo_switch,
+  .name = "ip4-null",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_ip_null_dpo_trace,
+  .n_next_nodes = IP_NULL_NEXT_NUM,
+  .next_nodes = {
+      [IP_NULL_NEXT_DROP] = "ip4-drop",
+      [IP_NULL_NEXT_ICMP] = "ip4-icmp-error",
+  },
+};
+
+static uword
+ip6_null_dpo_switch (vlib_main_t * vm,
+		    vlib_node_runtime_t * node,
+		    vlib_frame_t * frame)
+{
+    return (ip_null_dpo_switch(vm, node, frame, 0));
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (ip6_null_dpo_node) = {
+  .function = ip6_null_dpo_switch,
+  .name = "ip6-null",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_ip_null_dpo_trace,
+  .n_next_nodes = IP_NULL_NEXT_NUM,
+  .next_nodes = {
+      [IP_NULL_NEXT_DROP] = "ip6-drop",
+      [IP_NULL_NEXT_ICMP] = "ip6-icmp-error",
+  },
+};
+
+void
+ip_null_dpo_module_init (void)
+{
+    dpo_register(DPO_IP_NULL, &ip_null_vft, ip_null_nodes);
+}
diff --git a/src/vnet/dpo/ip_null_dpo.h b/src/vnet/dpo/ip_null_dpo.h
new file mode 100644
index 00000000..002a2a70
--- /dev/null
+++ b/src/vnet/dpo/ip_null_dpo.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The IP NULL DPO represents the rubbish bin for IP traffic. Without specifying an
+ * action (i.e. send IMCP type X to sender) it is equivalent to using a drop DPO.
+ * However, in contrast to the drop DPO any route that resovles via a NULL, is
+ * considered to 'resolved' by FIB, i.e. a IP NULL is used when the control plane
+ * is explicitly expressing the desire to drop packets. Drop DPOs are used
+ * internally by FIB when resolution is not possible.
+ *
+ * Any replies to sender are rate limited.
+ */
+
+#ifndef __IP_NULL_DPO_H__
+#define __IP_NULL_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief Actions to take when a packet encounters the NULL DPO
+ */
+typedef enum ip_null_dpo_action_t_
+{
+    IP_NULL_ACTION_NONE,
+    IP_NULL_ACTION_SEND_ICMP_UNREACH,
+    IP_NULL_ACTION_SEND_ICMP_PROHIBIT,
+} ip_null_dpo_action_t;
+
+#define IP_NULL_ACTIONS {						\
+    [IP_NULL_ACTION_NONE] = "discard",					\
+    [IP_NULL_ACTION_SEND_ICMP_UNREACH] = "send-unreachable",		\
+    [IP_NULL_ACTION_SEND_ICMP_PROHIBIT] = "send-prohibited",		\
+}
+
+#define IP_NULL_DPO_ACTION_NUM (IP_NULL_ACTION_SEND_ICMP_PROHIBIT+1)
+
+extern void ip_null_dpo_add_and_lock (dpo_proto_t proto,
+				      ip_null_dpo_action_t action,
+				      dpo_id_t *dpo);
+
+extern void ip_null_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
new file mode 100644
index 00000000..a244776f
--- /dev/null
+++ b/src/vnet/dpo/load_balance.c
@@ -0,0 +1,993 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vppinfra/math.h>              /* for fabs */
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/fib/fib_urpf_list.h>
+
+/*
+ * distribution error tolerance for load-balancing
+ */
+const f64 multipath_next_hop_error_tolerance = 0.1;
+
+#undef LB_DEBUG
+
+#ifdef LB_DEBUG
+#define LB_DBG(_lb, _fmt, _args...)                                     \
+{                                                                       \
+    u8* _tmp =NULL;                                                     \
+    clib_warning("lb:[%s]:" _fmt,                                       \
+                 load_balance_format(load_balance_get_index((_lb)),     \
+                                     0, _tmp),                          \
+                 ##_args);                                              \
+    vec_free(_tmp);                                                     \
+}
+#else
+#define LB_DBG(_p, _fmt, _args...)
+#endif
+
+
+/**
+ * Pool of all DPOs. It's not static so the DP can have fast access
+ */
+load_balance_t *load_balance_pool;
+
+/**
+ * The one instance of load-balance main
+ */
+load_balance_main_t load_balance_main;
+
+f64
+load_balance_get_multipath_tolerance (void)
+{
+    return (multipath_next_hop_error_tolerance);
+}
+
+static inline index_t
+load_balance_get_index (const load_balance_t *lb)
+{
+    return (lb - load_balance_pool);
+}
+
+static inline dpo_id_t*
+load_balance_get_buckets (load_balance_t *lb)
+{
+    if (LB_HAS_INLINE_BUCKETS(lb))
+    {
+        return (lb->lb_buckets_inline);
+    }
+    else
+    {
+        return (lb->lb_buckets);
+    }
+}
+
+static load_balance_t *
+load_balance_alloc_i (void)
+{
+    load_balance_t *lb;
+
+    pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
+    memset(lb, 0, sizeof(*lb));
+
+    lb->lb_map = INDEX_INVALID;
+    lb->lb_urpf = INDEX_INVALID;
+    vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
+                                   load_balance_get_index(lb));
+    vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
+                                   load_balance_get_index(lb));
+    vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters),
+                               load_balance_get_index(lb));
+    vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
+                               load_balance_get_index(lb));
+
+    return (lb);
+}
+
+static u8*
+load_balance_format (index_t lbi,
+                     load_balance_format_flags_t flags,
+                     u32 indent,
+                     u8 *s)
+{
+    vlib_counter_t to, via;
+    load_balance_t *lb;
+    dpo_id_t *buckets;
+    u32 i;
+
+    lb = load_balance_get(lbi);
+    vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
+    vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
+    buckets = load_balance_get_buckets(lb);
+
+    s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
+    s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets);
+    s = format(s, "uRPF:%d ", lb->lb_urpf);
+    s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
+    if (0 != via.packets)
+    {
+        s = format(s, " via:[%Ld:%Ld]",
+                   via.packets, via.bytes);
+    }
+    s = format(s, "]");
+
+    if (INDEX_INVALID != lb->lb_map)
+    {
+        s = format(s, "\n%U%U",
+                   format_white_space, indent+4,
+                   format_load_balance_map, lb->lb_map, indent+4);
+    }
+    for (i = 0; i < lb->lb_n_buckets; i++)
+    {
+        s = format(s, "\n%U[%d] %U",
+                   format_white_space, indent+2,
+                   i,
+                   format_dpo_id,
+                   &buckets[i], indent+6);
+    }
+    return (s);
+}
+
+u8*
+format_load_balance (u8 * s, va_list * args)
+{
+    index_t lbi = va_arg(*args, index_t);
+    load_balance_format_flags_t flags = va_arg(*args, load_balance_format_flags_t);
+
+    return (load_balance_format(lbi, flags, 0, s));
+}
+static u8*
+format_load_balance_dpo (u8 * s, va_list * args)
+{
+    index_t lbi = va_arg(*args, index_t);
+    u32 indent = va_arg(*args, u32);
+
+    return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
+}
+
+
+static load_balance_t *
+load_balance_create_i (u32 num_buckets,
+                       dpo_proto_t lb_proto,
+                       flow_hash_config_t fhc)
+{
+    load_balance_t *lb;
+
+    lb = load_balance_alloc_i();
+    lb->lb_hash_config = fhc;
+    lb->lb_n_buckets = num_buckets;
+    lb->lb_n_buckets_minus_1 = num_buckets-1;
+    lb->lb_proto = lb_proto;
+
+    if (!LB_HAS_INLINE_BUCKETS(lb))
+    {
+        vec_validate_aligned(lb->lb_buckets,
+                             lb->lb_n_buckets - 1,
+                             CLIB_CACHE_LINE_BYTES);
+    }
+
+    LB_DBG(lb, "create");
+
+    return (lb);
+}
+
+index_t
+load_balance_create (u32 n_buckets,
+                     dpo_proto_t lb_proto,
+                     flow_hash_config_t fhc)
+{
+    return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc)));
+}
+
+static inline void
+load_balance_set_bucket_i (load_balance_t *lb,
+                           u32 bucket,
+                           dpo_id_t *buckets,
+                           const dpo_id_t *next)
+{
+    dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next);
+}
+
+void
+load_balance_set_bucket (index_t lbi,
+                         u32 bucket,
+                         const dpo_id_t *next)
+{
+    load_balance_t *lb;
+    dpo_id_t *buckets;
+
+    lb = load_balance_get(lbi);
+    buckets = load_balance_get_buckets(lb);
+
+    ASSERT(bucket < lb->lb_n_buckets);
+
+    load_balance_set_bucket_i(lb, bucket, buckets, next);
+}
+
+int
+load_balance_is_drop (const dpo_id_t *dpo)
+{
+    load_balance_t *lb;
+
+    if (DPO_LOAD_BALANCE != dpo->dpoi_type)
+        return (0);
+
+    lb = load_balance_get(dpo->dpoi_index);
+
+    if (1 == lb->lb_n_buckets)
+    {
+        return (dpo_is_drop(load_balance_get_bucket_i(lb, 0)));
+    }
+    return (0);
+}
+
+void
+load_balance_set_urpf (index_t lbi,
+		       index_t urpf)
+{
+    load_balance_t *lb;
+    index_t old;
+
+    lb = load_balance_get(lbi);
+
+    /*
+     * packets in flight we see this change. but it's atomic, so :P
+     */
+    old = lb->lb_urpf;
+    lb->lb_urpf = urpf;
+
+    fib_urpf_list_unlock(old);
+    fib_urpf_list_lock(urpf);
+}
+
+index_t
+load_balance_get_urpf (index_t lbi)
+{
+    load_balance_t *lb;
+
+    lb = load_balance_get(lbi);
+
+    return (lb->lb_urpf);
+}
+
+const dpo_id_t *
+load_balance_get_bucket (index_t lbi,
+                         u32 bucket)
+{
+    load_balance_t *lb;
+
+    lb = load_balance_get(lbi);
+
+    return (load_balance_get_bucket_i(lb, bucket));
+}
+
+static int
+next_hop_sort_by_weight (load_balance_path_t * n1,
+                         load_balance_path_t * n2)
+{
+    return ((int) n1->path_weight - (int) n2->path_weight);
+}
+
+/* Given next hop vector is over-written with normalized one with sorted weights and
+   with weights corresponding to the number of adjacencies for each next hop.
+   Returns number of adjacencies in block. */
+u32
+ip_multipath_normalize_next_hops (load_balance_path_t * raw_next_hops,
+                                  load_balance_path_t ** normalized_next_hops,
+                                  u32 *sum_weight_in,
+                                  f64 multipath_next_hop_error_tolerance)
+{
+    load_balance_path_t * nhs;
+    uword n_nhs, n_adj, n_adj_left, i, sum_weight;
+    f64 norm, error;
+
+    n_nhs = vec_len (raw_next_hops);
+    ASSERT (n_nhs > 0);
+    if (n_nhs == 0)
+        return 0;
+
+    /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
+    nhs = *normalized_next_hops;
+    vec_validate (nhs, 2*n_nhs - 1);
+
+    /* Fast path: 1 next hop in block. */
+    n_adj = n_nhs;
+    if (n_nhs == 1)
+    {
+        nhs[0] = raw_next_hops[0];
+        nhs[0].path_weight = 1;
+        _vec_len (nhs) = 1;
+        sum_weight = 1;
+        goto done;
+    }
+
+    else if (n_nhs == 2)
+    {
+        int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
+
+        /* Fast sort. */
+        nhs[0] = raw_next_hops[cmp];
+        nhs[1] = raw_next_hops[cmp ^ 1];
+
+        /* Fast path: equal cost multipath with 2 next hops. */
+        if (nhs[0].path_weight == nhs[1].path_weight)
+        {
+            nhs[0].path_weight = nhs[1].path_weight = 1;
+            _vec_len (nhs) = 2;
+            sum_weight = 2;
+            goto done;
+        }
+    }
+    else
+    {
+        clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
+        qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
+    }
+
+    /* Find total weight to normalize weights. */
+    sum_weight = 0;
+    for (i = 0; i < n_nhs; i++)
+        sum_weight += nhs[i].path_weight;
+
+    /* In the unlikely case that all weights are given as 0, set them all to 1. */
+    if (sum_weight == 0)
+    {
+        for (i = 0; i < n_nhs; i++)
+            nhs[i].path_weight = 1;
+        sum_weight = n_nhs;
+    }
+
+    /* Save copies of all next hop weights to avoid being overwritten in loop below. */
+    for (i = 0; i < n_nhs; i++)
+        nhs[n_nhs + i].path_weight = nhs[i].path_weight;
+
+    /* Try larger and larger power of 2 sized adjacency blocks until we
+       find one where traffic flows to within 1% of specified weights. */
+    for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+    {
+        error = 0;
+
+        norm = n_adj / ((f64) sum_weight);
+        n_adj_left = n_adj;
+        for (i = 0; i < n_nhs; i++)
+        {
+            f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */
+            word n = flt_round_nearest (nf);
+
+            n = n > n_adj_left ? n_adj_left : n;
+            n_adj_left -= n;
+            error += fabs (nf - n);
+            nhs[i].path_weight = n;
+
+            if (0 == nhs[i].path_weight)
+            {
+                /*
+                 * when the weight skew is high (norm is small) and n == nf.
+                 * without this correction the path with a low weight would have
+                 * no represenation in the load-balanace - don't want that.
+                 * If the weight skew is high so the load-balance has many buckets
+                 * to allow it. pays ya money takes ya choice.
+                 */
+                error = n_adj;
+                break;
+            }
+        }
+
+        nhs[0].path_weight += n_adj_left;
+
+        /* Less than 5% average error per adjacency with this size adjacency block? */
+        if (error <= multipath_next_hop_error_tolerance*n_adj)
+        {
+            /* Truncate any next hops with zero weight. */
+            _vec_len (nhs) = i;
+            break;
+        }
+    }
+
+done:
+    /* Save vector for next call. */
+    *normalized_next_hops = nhs;
+    *sum_weight_in = sum_weight;
+    return n_adj;
+}
+
+static load_balance_path_t *
+load_balance_multipath_next_hop_fixup (load_balance_path_t *nhs,
+                                       dpo_proto_t drop_proto)
+{
+    if (0 == vec_len(nhs))
+    {
+        load_balance_path_t *nh;
+
+        /*
+         * we need something for the load-balance. so use the drop
+         */
+        vec_add2(nhs, nh, 1);
+
+        nh->path_weight = 1;
+        dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
+    }
+
+    return (nhs);
+}
+
+/*
+ * Fill in adjacencies in block based on corresponding
+ * next hop adjacencies.
+ */
+static void
+load_balance_fill_buckets (load_balance_t *lb,
+                           load_balance_path_t *nhs,
+                           dpo_id_t *buckets,
+                           u32 n_buckets)
+{
+    load_balance_path_t * nh;
+    u16 ii, bucket;
+
+    bucket = 0;
+
+    /*
+     * the next-hops have normalised weights. that means their sum is the number
+     * of buckets we need to fill.
+     */
+    vec_foreach (nh, nhs)
+    {
+        for (ii = 0; ii < nh->path_weight; ii++)
+        {
+            ASSERT(bucket < n_buckets);
+            load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
+        }
+    }
+}
+
+static inline void
+load_balance_set_n_buckets (load_balance_t *lb,
+                            u32 n_buckets)
+{
+    lb->lb_n_buckets = n_buckets;
+    lb->lb_n_buckets_minus_1 = n_buckets-1;
+}
+
+void
+load_balance_multipath_update (const dpo_id_t *dpo,
+                               load_balance_path_t * raw_next_hops,
+                               load_balance_flags_t flags)
+{
+    u32 sum_of_weights,n_buckets, ii;
+    load_balance_path_t * nh, * nhs;
+    index_t lbmi, old_lbmi;
+    load_balance_t *lb;
+    dpo_id_t *tmp_dpo;
+
+    nhs = NULL;
+
+    ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
+    lb = load_balance_get(dpo->dpoi_index);
+    raw_next_hops =
+        load_balance_multipath_next_hop_fixup(raw_next_hops,
+                                              lb->lb_proto);
+    n_buckets =
+        ip_multipath_normalize_next_hops(raw_next_hops,
+                                         &nhs,
+                                         &sum_of_weights,
+                                         multipath_next_hop_error_tolerance);
+
+    ASSERT (n_buckets >= vec_len (raw_next_hops));
+
+    /*
+     * Save the old load-balance map used, and get a new one if required.
+     */
+    old_lbmi = lb->lb_map;
+    if (flags & LOAD_BALANCE_FLAG_USES_MAP)
+    {
+        lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs);
+    }
+    else
+    {
+        lbmi = INDEX_INVALID;
+    }
+
+    if (0 == lb->lb_n_buckets)
+    {
+        /*
+         * first time initialisation. no packets inflight, so we can write
+         * at leisure.
+         */
+        load_balance_set_n_buckets(lb, n_buckets);
+
+        if (!LB_HAS_INLINE_BUCKETS(lb))
+            vec_validate_aligned(lb->lb_buckets,
+                                 lb->lb_n_buckets - 1,
+                                 CLIB_CACHE_LINE_BYTES);
+
+        load_balance_fill_buckets(lb, nhs,
+                                  load_balance_get_buckets(lb),
+                                  n_buckets);
+        lb->lb_map = lbmi;
+    }
+    else
+    {
+        /*
+         * This is a modification of an existing load-balance.
+         * We need to ensure that packets inflight see a consistent state, that
+         * is the number of reported buckets the LB has (read from
+         * lb_n_buckets_minus_1) is not more than it actually has. So if the
+         * number of buckets is increasing, we must update the bucket array first,
+         * then the reported number. vice-versa if the number of buckets goes down.
+         */
+        if (n_buckets == lb->lb_n_buckets)
+        {
+            /*
+             * no change in the number of buckets. we can simply fill what
+             * is new over what is old.
+             */
+            load_balance_fill_buckets(lb, nhs,
+                                      load_balance_get_buckets(lb),
+                                      n_buckets);
+            lb->lb_map = lbmi;
+        }
+        else if (n_buckets > lb->lb_n_buckets)
+        {
+            /*
+             * we have more buckets. the old load-balance map (if there is one)
+             * will remain valid, i.e. mapping to indices within range, so we
+             * update it last.
+             */
+            if (n_buckets > LB_NUM_INLINE_BUCKETS &&
+                lb->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
+            {
+                /*
+                 * the new increased number of buckets is crossing the threshold
+                 * from the inline storage to out-line. Alloc the outline buckets
+                 * first, then fixup the number. then reset the inlines.
+                 */
+                ASSERT(NULL == lb->lb_buckets);
+                vec_validate_aligned(lb->lb_buckets,
+                                     n_buckets - 1,
+                                     CLIB_CACHE_LINE_BYTES);
+
+                load_balance_fill_buckets(lb, nhs,
+                                          lb->lb_buckets,
+                                          n_buckets);
+                CLIB_MEMORY_BARRIER();
+                load_balance_set_n_buckets(lb, n_buckets);
+
+                CLIB_MEMORY_BARRIER();
+
+                for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++)
+                {
+                    dpo_reset(&lb->lb_buckets_inline[ii]);
+                }
+            }
+            else
+            {
+                if (n_buckets <= LB_NUM_INLINE_BUCKETS)
+                {
+                    /*
+                     * we are not crossing the threshold and it's still inline buckets.
+                     * we can write the new on the old..
+                     */
+                    load_balance_fill_buckets(lb, nhs,
+                                              load_balance_get_buckets(lb),
+                                              n_buckets);
+                    CLIB_MEMORY_BARRIER();
+                    load_balance_set_n_buckets(lb, n_buckets);
+                }
+                else
+                {
+                    /*
+                     * we are not crossing the threshold. We need a new bucket array to
+                     * hold the increased number of choices.
+                     */
+                    dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
+
+                    new_buckets = NULL;
+                    old_buckets = load_balance_get_buckets(lb);
+
+                    vec_validate_aligned(new_buckets,
+                                         n_buckets - 1,
+                                         CLIB_CACHE_LINE_BYTES);
+
+                    load_balance_fill_buckets(lb, nhs, new_buckets, n_buckets);
+                    CLIB_MEMORY_BARRIER();
+                    lb->lb_buckets = new_buckets;
+                    CLIB_MEMORY_BARRIER();
+                    load_balance_set_n_buckets(lb, n_buckets);
+
+                    vec_foreach(tmp_dpo, old_buckets)
+                    {
+                        dpo_reset(tmp_dpo);
+                    }
+                    vec_free(old_buckets);
+                }
+            }
+
+            /*
+             * buckets fixed. ready for the MAP update.
+             */
+            lb->lb_map = lbmi;
+        }
+        else
+        {
+            /*
+             * bucket size shrinkage.
+             * Any map we have will be based on the old
+             * larger number of buckets, so will be translating to indices
+             * out of range. So the new MAP must be installed first.
+             */
+            lb->lb_map = lbmi;
+            CLIB_MEMORY_BARRIER();
+
+
+            if (n_buckets <= LB_NUM_INLINE_BUCKETS &&
+                lb->lb_n_buckets > LB_NUM_INLINE_BUCKETS)
+            {
+                /*
+                 * the new decreased number of buckets is crossing the threshold
+                 * from out-line storage to inline:
+                 *   1 - Fill the inline buckets,
+                 *   2 - fixup the number (and this point the inline buckets are
+                 *       used).
+                 *   3 - free the outline buckets
+                 */
+                load_balance_fill_buckets(lb, nhs,
+                                          lb->lb_buckets_inline,
+                                          n_buckets);
+                CLIB_MEMORY_BARRIER();
+                load_balance_set_n_buckets(lb, n_buckets);
+                CLIB_MEMORY_BARRIER();
+
+                vec_foreach(tmp_dpo, lb->lb_buckets)
+                {
+                    dpo_reset(tmp_dpo);
+                }
+                vec_free(lb->lb_buckets);
+            }
+            else
+            {
+                /*
+                 * not crossing the threshold.
+                 *  1 - update the number to the smaller size
+                 *  2 - write the new buckets
+                 *  3 - reset those no longer used.
+                 */
+                dpo_id_t *buckets;
+                u32 old_n_buckets;
+
+                old_n_buckets = lb->lb_n_buckets;
+                buckets = load_balance_get_buckets(lb);
+
+                load_balance_set_n_buckets(lb, n_buckets);
+                CLIB_MEMORY_BARRIER();
+
+                load_balance_fill_buckets(lb, nhs,
+                                          buckets,
+                                          n_buckets);
+
+                for (ii = old_n_buckets-n_buckets; ii < old_n_buckets; ii++)
+                {
+                    dpo_reset(&buckets[ii]);
+                }
+            }
+        }
+    }
+
+    vec_foreach (nh, nhs)
+    {
+        dpo_reset(&nh->path_dpo);
+    }
+    vec_free(nhs);
+
+    load_balance_map_unlock(old_lbmi);
+}
+
+static void
+load_balance_lock (dpo_id_t *dpo)
+{
+    load_balance_t *lb;
+
+    lb = load_balance_get(dpo->dpoi_index);
+
+    lb->lb_locks++;
+}
+
+static void
+load_balance_destroy (load_balance_t *lb)
+{
+    dpo_id_t *buckets;
+    int i;
+
+    buckets = load_balance_get_buckets(lb);
+
+    for (i = 0; i < lb->lb_n_buckets; i++)
+    {
+        dpo_reset(&buckets[i]);
+    }
+
+    LB_DBG(lb, "destroy");
+    if (!LB_HAS_INLINE_BUCKETS(lb))
+    {
+        vec_free(lb->lb_buckets);
+    }
+
+    fib_urpf_list_unlock(lb->lb_urpf);
+    load_balance_map_unlock(lb->lb_map);
+
+    pool_put(load_balance_pool, lb);
+}
+
+static void
+load_balance_unlock (dpo_id_t *dpo)
+{
+    load_balance_t *lb;
+
+    lb = load_balance_get(dpo->dpoi_index);
+
+    lb->lb_locks--;
+
+    if (0 == lb->lb_locks)
+    {
+        load_balance_destroy(lb);
+    }
+}
+
+static void
+load_balance_mem_show (void)
+{
+    fib_show_memory_usage("load-balance",
+			  pool_elts(load_balance_pool),
+			  pool_len(load_balance_pool),
+			  sizeof(load_balance_t));
+    load_balance_map_show_mem();
+}
+
+const static dpo_vft_t lb_vft = {
+    .dv_lock = load_balance_lock,
+    .dv_unlock = load_balance_unlock,
+    .dv_format = format_load_balance_dpo,
+    .dv_mem_show = load_balance_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a load-balance is the
+ * parent object in the DPO-graph.
+ *
+ * We do not list all the load-balance nodes, such as the *-lookup. instead
+ * we are relying on the correct use of the .sibling_of field when setting
+ * up these sibling nodes.
+ */
+const static char* const load_balance_ip4_nodes[] =
+{
+    "ip4-load-balance",
+    NULL,
+};
+const static char* const load_balance_ip6_nodes[] =
+{
+    "ip6-load-balance",
+    NULL,
+};
+const static char* const load_balance_mpls_nodes[] =
+{
+    "mpls-load-balance",
+    NULL,
+};
+const static char* const load_balance_l2_nodes[] =
+{
+    "l2-load-balance",
+    NULL,
+};
+const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = load_balance_ip4_nodes,
+    [DPO_PROTO_IP6]  = load_balance_ip6_nodes,
+    [DPO_PROTO_MPLS] = load_balance_mpls_nodes,
+    [DPO_PROTO_ETHERNET] = load_balance_l2_nodes,
+};
+
+void
+load_balance_module_init (void)
+{
+    dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
+
+    load_balance_map_module_init();
+}
+
+static clib_error_t *
+load_balance_show (vlib_main_t * vm,
+                   unformat_input_t * input,
+                   vlib_cli_command_t * cmd)
+{
+    index_t lbi = INDEX_INVALID;
+
+    while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+        if (unformat (input, "%d", &lbi))
+            ;
+        else
+            break;
+    }
+
+    if (INDEX_INVALID != lbi)
+    {
+        vlib_cli_output (vm, "%U", format_load_balance, lbi,
+                         LOAD_BALANCE_FORMAT_DETAIL);
+    }
+    else
+    {
+        load_balance_t *lb;
+
+        pool_foreach(lb, load_balance_pool,
+        ({
+            vlib_cli_output (vm, "%U", format_load_balance,
+                             load_balance_get_index(lb),
+                             LOAD_BALANCE_FORMAT_NONE);
+        }));
+    }
+
+    return 0;
+}
+
+VLIB_CLI_COMMAND (load_balance_show_command, static) = {
+    .path = "show load-balance",
+    .short_help = "show load-balance [<index>]",
+    .function = load_balance_show,
+};
+
+
+always_inline u32
+ip_flow_hash (void *data)
+{
+  ip4_header_t *iph = (ip4_header_t *) data;
+
+  if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
+    return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
+  else
+    return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
+}
+
+always_inline u64
+mac_to_u64 (u8 * m)
+{
+  return (*((u64 *) m) & 0xffffffffffff);
+}
+
+always_inline u32
+l2_flow_hash (vlib_buffer_t * b0)
+{
+  ethernet_header_t *eh;
+  u64 a, b, c;
+  uword is_ip, eh_size;
+  u16 eh_type;
+
+  eh = vlib_buffer_get_current (b0);
+  eh_type = clib_net_to_host_u16 (eh->type);
+  eh_size = ethernet_buffer_header_size (b0);
+
+  is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
+
+  /* since we have 2 cache lines, use them */
+  if (is_ip)
+    a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
+  else
+    a = eh->type;
+
+  b = mac_to_u64 ((u8 *) eh->dst_address);
+  c = mac_to_u64 ((u8 *) eh->src_address);
+  hash_mix64 (a, b, c);
+
+  return (u32) c;
+}
+
+typedef struct load_balance_trace_t_
+{
+    index_t lb_index;
+} load_balance_trace_t;
+
+static uword
+l2_load_balance (vlib_main_t * vm,
+		 vlib_node_runtime_t * node,
+		 vlib_frame_t * frame)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  vlib_buffer_t *b0;
+	  u32 bi0, lbi0, next0;
+	  const dpo_id_t *dpo0;
+	  const load_balance_t *lb0;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  /* lookup dst + src mac */
+	  lbi0 =  vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+	  lb0 = load_balance_get(lbi0);
+
+	  vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
+
+	  dpo0 = load_balance_get_bucket_i(lb0, 
+					   vnet_buffer(b0)->ip.flow_hash &
+					   (lb0->lb_n_buckets_minus_1));
+
+	  next0 = dpo0->dpoi_next_node;
+	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
+							 sizeof (*tr));
+	      tr->lb_index = lbi0;
+	    }
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+static u8 *
+format_load_balance_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
+
+  s = format (s, "L2-load-balance: index %d", t->lb_index);
+  return s;
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (l2_load_balance_node) = {
+  .function = l2_load_balance,
+  .name = "l2-load-balance",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_load_balance_trace,
+  .n_next_nodes = 1,
+  .next_nodes = {
+      [0] = "error-drop",
+  },
+};
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
new file mode 100644
index 00000000..dc6485e6
--- /dev/null
+++ b/src/vnet/dpo/load_balance.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * \brief
+ * The load-balance object represents an ECMP choice. The buckets of a load
+ * balance object point to the sub-graph after the choice is made.
+ * THe load-balance object is also object type returned from a FIB table lookup.
+ * As such it needs to represent the case where there is only one coice. It may
+ * seem like overkill to use a load-balance object in this case, but the reason
+ * is for performance. If the load-balance object were not the result of the FIB
+ * lookup, then some other object would be. The case where there was ECMP
+ * this other object would need a load-balance as a parent and hence just add
+ * an unnecessary indirection.
+ *
+ * It is also the object in the DP that represents a via-fib-entry in a recursive
+ * route.
+ *
+ */
+
+#ifndef __LOAD_BALANCE_H__
+#define __LOAD_BALANCE_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/fib/fib_types.h>
+
+/**
+ * Load-balance main
+ */
+typedef struct load_balance_main_t_
+{
+    vlib_combined_counter_main_t lbm_to_counters;
+    vlib_combined_counter_main_t lbm_via_counters;
+} load_balance_main_t;
+
+extern load_balance_main_t load_balance_main;
+
+/**
+ * The number of buckets that a load-balance object can have and still
+ * fit in one cache-line
+ */
+#define LB_NUM_INLINE_BUCKETS 4
+
+/**
+ * @brief One path from an [EU]CMP set that the client wants to add to a
+ * load-balance object
+ */
+typedef struct load_balance_path_t_ {
+    /**
+     * ID of the Data-path object.
+     */
+    dpo_id_t path_dpo;
+
+    /**
+     * The index of the FIB path
+     */
+    fib_node_index_t path_index;
+
+    /**
+     * weight for the path.
+     */
+    u32 path_weight;
+} load_balance_path_t;
+
+/**
+ * The FIB DPO provieds;
+ *  - load-balancing over the next DPOs in the chain/graph
+ *  - per-route counters
+ */
+typedef struct load_balance_t_ {
+    /**
+     * number of buckets in the load-balance. always a power of 2.
+     */
+    u16 lb_n_buckets;
+    /**
+     * number of buckets in the load-balance - 1. used in the switch path
+     * as part of the hash calculation.
+     */
+    u16 lb_n_buckets_minus_1;
+
+   /**
+     * The protocol of packets that traverse this LB.
+     * need in combination with the flow hash config to determine how to hash.
+     * u8.
+     */
+    dpo_proto_t lb_proto;
+
+    /**
+     * The number of locks, which is approximately the number of users,
+     * of this load-balance.
+     * Load-balance objects of via-entries are heavily shared by recursives,
+     * so the lock count is a u32.
+     */
+    u32 lb_locks;
+
+    /**
+     * index of the load-balance map, INVALID if this LB does not use one
+     */
+    index_t lb_map;
+
+    /**
+     * This is the index of the uRPF list for this LB
+     */
+    index_t lb_urpf;
+
+    /**
+     * the hash config to use when selecting a bucket. this is a u16
+     */
+    flow_hash_config_t lb_hash_config;
+
+    /**
+     * Vector of buckets containing the next DPOs, sized as lbo_num
+     */
+    dpo_id_t *lb_buckets;
+
+    /**
+     * The rest of the cache line is used for buckets. In the common case
+     * where there there are less than 4 buckets, then the buckets are
+     * on the same cachlie and we save ourselves a pointer dereferance in 
+     * the data-path.
+     */
+    dpo_id_t lb_buckets_inline[LB_NUM_INLINE_BUCKETS];
+} load_balance_t;
+
+STATIC_ASSERT(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES,
+	      "A load_balance object size exceeds one cachline");
+
+/**
+ * Flags controlling load-balance formatting/display
+ */
+typedef enum load_balance_format_flags_t_ {
+    LOAD_BALANCE_FORMAT_NONE,
+    LOAD_BALANCE_FORMAT_DETAIL = (1 << 0),
+} load_balance_format_flags_t;
+
+/**
+ * Flags controlling load-balance creation and modification
+ */
+typedef enum load_balance_flags_t_ {
+    LOAD_BALANCE_FLAG_NONE = 0,
+    LOAD_BALANCE_FLAG_USES_MAP = (1 << 0),
+} load_balance_flags_t;
+
+extern index_t load_balance_create(u32 num_buckets,
+				   dpo_proto_t lb_proto,
+				   flow_hash_config_t fhc);
+extern void load_balance_multipath_update(
+    const dpo_id_t *dpo,
+    load_balance_path_t * raw_next_hops,
+    load_balance_flags_t flags);
+
+extern void load_balance_set_bucket(index_t lbi,
+				    u32 bucket,
+				    const dpo_id_t *next);
+extern void load_balance_set_urpf(index_t lbi,
+				  index_t urpf);
+extern index_t load_balance_get_urpf(index_t lbi);
+
+extern u8* format_load_balance(u8 * s, va_list * args);
+
+extern const dpo_id_t *load_balance_get_bucket(index_t lbi,
+					       u32 bucket);
+extern int load_balance_is_drop(const dpo_id_t *dpo);
+
+extern f64 load_balance_get_multipath_tolerance(void);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern load_balance_t *load_balance_pool;
+static inline load_balance_t*
+load_balance_get (index_t lbi)
+{
+    return (pool_elt_at_index(load_balance_pool, lbi));
+}
+
+#define LB_HAS_INLINE_BUCKETS(_lb)		\
+    ((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
+
+static inline const dpo_id_t *
+load_balance_get_bucket_i (const load_balance_t *lb,
+			   u32 bucket)
+{
+    ASSERT(bucket < lb->lb_n_buckets);
+
+    if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb)))
+    {
+	return (&lb->lb_buckets_inline[bucket]);
+    }
+    else
+    {
+	return (&lb->lb_buckets[bucket]);
+    }
+}
+
+extern void load_balance_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/load_balance_map.c b/src/vnet/dpo/load_balance_map.c
new file mode 100644
index 00000000..70ce1bf7
--- /dev/null
+++ b/src/vnet/dpo/load_balance_map.c
@@ -0,0 +1,575 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ */
+#include <vnet/fib/fib_path.h>
+#include <vnet/fib/fib_node_list.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/load_balance.h>
+
+/**
+ * A hash-table of load-balance maps by path index.
+ * this provides the fast lookup of the LB map when a path goes down
+ */
+static uword *lb_maps_by_path_index;
+
+/**
+ * A hash-table of load-balance maps by set of paths.
+ * This provides the LB map sharing.
+ * LB maps do not necessarily use all the paths in the list, since
+ * the entry that is requesting the map, may not have an out-going
+ * label for each of the paths.
+ */
+static uword *load_balance_map_db;
+
+typedef enum load_balance_map_path_flags_t_
+{
+    LOAD_BALANCE_MAP_PATH_UP     = (1 << 0),
+    LOAD_BALANCE_MAP_PATH_USABLE = (1 << 1),
+} __attribute__ ((packed)) load_balance_map_path_flags_t;
+
+typedef struct load_balance_map_path_t_ {
+    /**
+     * Index of the path
+     */
+    fib_node_index_t lbmp_index;
+
+    /**
+     * Sibling Index in the list of all maps with this path index
+     */
+    fib_node_index_t lbmp_sibling;
+
+    /**
+     * the normalised wegiht of the path
+     */
+    u32 lbmp_weight;
+
+    /**
+     * The sate of the path
+     */
+    load_balance_map_path_flags_t lbmp_flags;
+} load_balance_map_path_t;
+
+/**
+ * The global pool of LB maps
+ */
+load_balance_map_t *load_balance_map_pool;
+
+/*
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...)       \
+    {                                                   \
+        clib_warning("lbm: FIXME" _fmt,                 \
+                     ##_args);                          \
+    }
+#else
+#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...)
+#endif
+
+static index_t
+load_balance_map_get_index (load_balance_map_t *lbm)
+{
+    return (lbm - load_balance_map_pool);
+}
+
+u8*
+format_load_balance_map (u8 *s, va_list ap)
+{
+    index_t lbmi = va_arg(ap, index_t);
+    u32 indent = va_arg(ap, u32);
+    load_balance_map_t *lbm;
+    u32 n_buckets, ii;
+
+    lbm = load_balance_map_get(lbmi);
+    n_buckets = vec_len(lbm->lbm_buckets);
+
+    s = format(s, "load-balance-map: index:%d buckets:%d", lbmi, n_buckets);
+    s = format(s, "\n%U index:", format_white_space, indent+2);
+    for (ii = 0; ii < n_buckets; ii++)
+    {
+        s = format(s, "%5d", ii);
+    }
+    s = format(s, "\n%U   map:", format_white_space, indent+2);
+    for (ii = 0; ii < n_buckets; ii++)
+    {
+        s = format(s, "%5d", lbm->lbm_buckets[ii]);
+    }
+
+    return (s);
+}
+
+
+static uword
+load_balance_map_hash (load_balance_map_t *lbm)
+{
+    u32 old_lbm_hash, new_lbm_hash, hash;
+    load_balance_map_path_t *lb_path;
+
+    new_lbm_hash = old_lbm_hash = vec_len(lbm->lbm_paths);
+
+    vec_foreach (lb_path, lbm->lbm_paths)
+    {
+        hash = lb_path->lbmp_index;
+        hash_mix32(hash, old_lbm_hash, new_lbm_hash);
+    }
+
+    return (new_lbm_hash);
+}
+
+always_inline uword
+load_balance_map_db_hash_key_from_index (uword index)
+{
+    return 1 + 2*index;
+}
+
+always_inline uword
+load_balance_map_db_hash_key_is_index (uword key)
+{
+    return key & 1;
+}
+
+always_inline uword
+load_balance_map_db_hash_key_2_index (uword key)
+{
+    ASSERT (load_balance_map_db_hash_key_is_index (key));
+    return key / 2;
+}
+
+static load_balance_map_t*
+load_balance_map_db_get_from_hash_key (uword key)
+{
+    load_balance_map_t *lbm;
+
+    if (load_balance_map_db_hash_key_is_index (key))
+    {
+        index_t lbm_index;
+
+        lbm_index = load_balance_map_db_hash_key_2_index(key);
+        lbm = load_balance_map_get(lbm_index);
+    }
+    else
+    {
+        lbm = uword_to_pointer (key, load_balance_map_t *);
+    }
+
+    return (lbm);
+}
+
+static uword
+load_balance_map_db_hash_key_sum (hash_t * h,
+                                  uword key)
+{
+    load_balance_map_t *lbm;
+
+    lbm = load_balance_map_db_get_from_hash_key(key);
+
+    return (load_balance_map_hash(lbm));
+}
+
+static uword
+load_balance_map_db_hash_key_equal (hash_t * h,
+                                    uword key1,
+                                    uword key2)
+{
+    load_balance_map_t *lbm1, *lbm2;
+
+    lbm1 = load_balance_map_db_get_from_hash_key(key1);
+    lbm2 = load_balance_map_db_get_from_hash_key(key2);
+
+    return (load_balance_map_hash(lbm1) ==
+            load_balance_map_hash(lbm2));
+}
+
+static index_t
+load_balance_map_db_find (load_balance_map_t *lbm)
+{
+    uword *p;
+
+    p = hash_get(load_balance_map_db, lbm);
+
+    if (NULL != p)
+    {
+        return p[0];
+    }
+
+    return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+load_balance_map_db_insert (load_balance_map_t *lbm)
+{
+    load_balance_map_path_t *lbmp;
+    fib_node_list_t list;
+    uword *p;
+
+    ASSERT(FIB_NODE_INDEX_INVALID == load_balance_map_db_find(lbm));
+
+    /*
+     * insert into the DB based on the set of paths.
+     */
+    hash_set (load_balance_map_db,
+              load_balance_map_db_hash_key_from_index(
+                  load_balance_map_get_index(lbm)),
+              load_balance_map_get_index(lbm));
+
+    /*
+     * insert into each per-path list.
+     */
+    vec_foreach(lbmp, lbm->lbm_paths)
+    {
+        p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
+
+        if (NULL == p)
+        {
+            list = fib_node_list_create();
+            hash_set(lb_maps_by_path_index, lbmp->lbmp_index, list);
+        }
+        else
+        {
+            list = p[0];
+        }
+
+        lbmp->lbmp_sibling =
+            fib_node_list_push_front(list,
+                                     0, FIB_NODE_TYPE_FIRST,
+                                     load_balance_map_get_index(lbm));
+    }
+
+    LOAD_BALANCE_MAP_DBG(lbm, "DB-inserted");
+}
+
+static void
+load_balance_map_db_remove (load_balance_map_t *lbm)
+{
+    load_balance_map_path_t *lbmp;
+    uword *p;
+
+    ASSERT(FIB_NODE_INDEX_INVALID != load_balance_map_db_find(lbm));
+
+    hash_unset(load_balance_map_db,
+               load_balance_map_db_hash_key_from_index(
+                   load_balance_map_get_index(lbm)));
+
+    /*
+     * remove from each per-path list.
+     */
+    vec_foreach(lbmp, lbm->lbm_paths)
+    {
+        p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
+
+        ASSERT(NULL != p);
+
+        fib_node_list_remove(p[0], lbmp->lbmp_sibling);
+    }
+
+    LOAD_BALANCE_MAP_DBG(lbm, "DB-removed");
+}
+
+/**
+ * @brief from the paths that are usable, fill the Map.
+ */
+static void
+load_balance_map_fill (load_balance_map_t *lbm)
+{
+    load_balance_map_path_t *lbmp;
+    u32 n_buckets, bucket, ii, jj;
+    u16 *tmp_buckets;
+
+    tmp_buckets = NULL;
+    n_buckets = vec_len(lbm->lbm_buckets);
+
+    /*
+     * run throught the set of paths once, and build a vector of the
+     * indices that are usable. we do this is a scratch space, since we
+     * need to refer to it multiple times as we build the real buckets.
+     */
+    vec_validate(tmp_buckets, n_buckets-1);
+
+    bucket = jj = 0;
+    vec_foreach (lbmp, lbm->lbm_paths)
+    {
+        if (fib_path_is_resolved(lbmp->lbmp_index))
+        {
+            for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+            {
+                tmp_buckets[jj++] = bucket++;
+            }
+        }
+        else 
+        {
+            bucket += lbmp->lbmp_weight;
+        }
+    }
+    _vec_len(tmp_buckets) = jj;
+
+    /*
+     * If the number of temporaries written is as many as we need, implying
+     * all paths were up, then we can simply copy the scratch area over the
+     * actual buckets' memory
+     */
+    if (jj == n_buckets)
+    {
+        memcpy(lbm->lbm_buckets,
+               tmp_buckets,
+               sizeof(lbm->lbm_buckets[0]) * n_buckets);
+    }
+    else
+    {
+        /*
+         * one or more paths are down.
+         */
+        if (0 == vec_len(tmp_buckets))
+        {
+            /*
+             * if the scratch area is empty, then no paths are usable.
+             * they will all drop. so use them all, lest we account drops
+             * against only one.
+             */
+            for (bucket = 0; bucket < n_buckets; bucket++)
+            {
+                lbm->lbm_buckets[bucket] = bucket;
+            }
+        }
+        else
+        {
+            bucket = jj = 0;
+            vec_foreach (lbmp, lbm->lbm_paths)
+            {
+                if (fib_path_is_resolved(lbmp->lbmp_index))
+                {
+                    for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+                    {
+                        lbm->lbm_buckets[bucket] = bucket;
+                        bucket++;
+                    }
+                }
+                else
+                {
+                    /*
+                     * path is unusable
+                     * cycle through the scratch space selecting a index.
+                     * this means we load balance, in the intended ratio,
+                     * over the paths that are still usable.
+                     */
+                    for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+                    {
+                        lbm->lbm_buckets[bucket] = tmp_buckets[jj];
+                        jj = (jj + 1) % vec_len(tmp_buckets);
+                        bucket++;
+                    }
+                }
+            }
+       }
+    }
+
+    vec_free(tmp_buckets);
+}
+
+static load_balance_map_t*
+load_balance_map_alloc (const load_balance_path_t *paths)
+{
+    load_balance_map_t *lbm;
+    u32 ii;
+
+    pool_get_aligned(load_balance_map_pool, lbm, CLIB_CACHE_LINE_BYTES);
+    memset(lbm, 0, sizeof(*lbm));
+
+    vec_validate(lbm->lbm_paths, vec_len(paths)-1);
+
+    vec_foreach_index(ii, paths)
+    {
+        lbm->lbm_paths[ii].lbmp_index  = paths[ii].path_index;
+        lbm->lbm_paths[ii].lbmp_weight = paths[ii].path_weight;
+    }
+
+    return (lbm);
+}
+
+static load_balance_map_t *
+load_balance_map_init (load_balance_map_t *lbm,
+                       u32 n_buckets,
+                       u32 sum_of_weights)
+{
+    lbm->lbm_sum_of_norm_weights = sum_of_weights;
+    vec_validate(lbm->lbm_buckets, n_buckets-1);
+
+    load_balance_map_db_insert(lbm);
+
+    load_balance_map_fill(lbm);
+
+    return (lbm);
+}
+
+index_t
+load_balance_map_add_or_lock (u32 n_buckets,
+                              u32 sum_of_weights,
+                              const load_balance_path_t *paths)
+{
+    load_balance_map_t *tmp, *lbm;
+    index_t lbmi;
+
+    tmp = load_balance_map_alloc(paths);
+
+    lbmi = load_balance_map_db_find(tmp);
+
+    if (INDEX_INVALID == lbmi)
+    {
+        lbm = load_balance_map_init(tmp, n_buckets, sum_of_weights);
+    }
+    else
+    {
+        lbm = load_balance_map_get(lbmi);
+    }
+
+    lbm->lbm_locks++;
+
+    return (load_balance_map_get_index(lbm));
+}
+
+void
+load_balance_map_lock (index_t lbmi)
+{
+    load_balance_map_t *lbm;
+
+    lbm = load_balance_map_get(lbmi);
+
+    lbm->lbm_locks++;
+}
+
+void
+load_balance_map_unlock (index_t lbmi)
+{
+    load_balance_map_t *lbm;
+
+    if (INDEX_INVALID == lbmi)
+    {
+        return;
+    }
+
+    lbm = load_balance_map_get(lbmi);
+
+    lbm->lbm_locks--;
+
+    if (0 == lbm->lbm_locks)
+    {
+        load_balance_map_db_remove(lbm);
+        vec_free(lbm->lbm_paths);
+        vec_free(lbm->lbm_buckets);
+        pool_put(load_balance_map_pool, lbm);
+    }
+}
+
+static int
+load_balance_map_path_state_change_walk (fib_node_ptr_t *fptr,
+                                         void *ctx)
+{
+    load_balance_map_t *lbm;
+
+    lbm = load_balance_map_get(fptr->fnp_index);
+
+    load_balance_map_fill(lbm);
+
+    return (!0);
+}
+
+/**
+ * @brief the state of a path has changed (it has no doubt gone down).
+ * This is the trigger to perform a PIC edge cutover and update the maps
+ * to exclude this path.
+ */
+void
+load_balance_map_path_state_change (fib_node_index_t path_index)
+{
+    uword *p;
+
+    /*
+     * re-stripe the buckets for each affect MAP
+     */
+    p = hash_get(lb_maps_by_path_index, path_index);
+
+    if (NULL == p)
+        return;
+
+    fib_node_list_walk(p[0], load_balance_map_path_state_change_walk, NULL);
+}
+
+/**
+ * @brief Make/add a new or lock an existing Load-balance map
+ */
+void
+load_balance_map_module_init (void)
+{
+    load_balance_map_db =
+        hash_create2 (/* elts */ 0,
+                      /* user */ 0,
+                      /* value_bytes */ sizeof (index_t),
+                      load_balance_map_db_hash_key_sum,
+                      load_balance_map_db_hash_key_equal,
+                      /* format pair/arg */
+                      0, 0);
+
+    lb_maps_by_path_index = hash_create(0, sizeof(fib_node_list_t));
+}
+
+void
+load_balance_map_show_mem (void)
+{
+    fib_show_memory_usage("Load-Balance Map",
+			  pool_elts(load_balance_map_pool),
+			  pool_len(load_balance_map_pool),
+			  sizeof(load_balance_map_t));
+}
+
+static clib_error_t *
+load_balance_map_show (vlib_main_t * vm,
+                       unformat_input_t * input,
+                       vlib_cli_command_t * cmd)
+{
+    index_t lbmi = INDEX_INVALID;
+
+    while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+        if (unformat (input, "%d", &lbmi))
+            ;
+        else
+            break;
+    }
+
+    if (INDEX_INVALID != lbmi)
+    {
+        vlib_cli_output (vm, "%U", format_load_balance_map, lbmi, 0);
+    }
+    else
+    {
+        load_balance_map_t *lbm;
+
+        pool_foreach(lbm, load_balance_map_pool,
+        ({
+            vlib_cli_output (vm, "%U", format_load_balance_map,
+                             load_balance_map_get_index(lbm), 0);
+        }));
+    }
+
+    return 0;
+}
+
+VLIB_CLI_COMMAND (load_balance_map_show_command, static) = {
+    .path = "show load-balance-map",
+    .short_help = "show load-balance-map [<index>]",
+    .function = load_balance_map_show,
+};
diff --git a/src/vnet/dpo/load_balance_map.h b/src/vnet/dpo/load_balance_map.h
new file mode 100644
index 00000000..454bf4b3
--- /dev/null
+++ b/src/vnet/dpo/load_balance_map.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ */
+
+#ifndef __LOAD_BALANCE_MAP_H__
+#define __LOAD_BALANCE_MAP_H__
+
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/load_balance.h>
+
+struct load_balance_map_path_t_;
+
+/**
+ */
+typedef struct load_balance_map_t_ {
+    /**
+     * The buckets of the map that provide the index to index translation.
+     * In the first cacheline.
+     */
+    u16 *lbm_buckets;
+
+    /**
+     * the vector of paths this MAP represents
+     */
+    struct load_balance_map_path_t_ *lbm_paths;
+
+    /**
+     * the sum of the normalised weights. cache for convenience
+     */
+    u32 lbm_sum_of_norm_weights;
+
+    /**
+     * Number of locks. Maps are shared by a large number of recrusvie fib_entry_ts
+     */
+    u32 lbm_locks;
+} load_balance_map_t;
+
+extern index_t load_balance_map_add_or_lock(u32 n_buckets,
+                                            u32 sum_of_weights,
+                                            const load_balance_path_t *norm_paths);
+
+extern void load_balance_map_lock(index_t lmbi);
+extern void load_balance_map_unlock(index_t lbmi);
+
+extern void load_balance_map_path_state_change(fib_node_index_t path_index);
+
+extern u8* format_load_balance_map(u8 *s, va_list ap);
+extern void load_balance_map_show_mem(void);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern load_balance_map_t *load_balance_map_pool;
+
+static inline load_balance_map_t*
+load_balance_map_get (index_t lbmi)
+{
+    return (pool_elt_at_index(load_balance_map_pool, lbmi));
+}
+
+
+extern void load_balance_map_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
new file mode 100644
index 00000000..96fedd27
--- /dev/null
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -0,0 +1,1185 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/mpls_fib.h>
+
+static const char *const lookup_input_names[] = LOOKUP_INPUTS;
+
+/**
+ * @brief Enumeration of the lookup subtypes
+ */
+typedef enum lookup_sub_type_t_
+{
+    LOOKUP_SUB_TYPE_SRC,
+    LOOKUP_SUB_TYPE_DST,
+    LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE,
+} lookup_sub_type_t;
+#define LOOKUP_SUB_TYPE_NUM (LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE+1)
+
+#define FOR_EACH_LOOKUP_SUB_TYPE(_st)                                   \
+    for (_st = LOOKUP_SUB_TYPE_IP4_SRC; _st < LOOKUP_SUB_TYPE_NUM; _st++)
+
+/**
+ * @brief pool of all MPLS Label DPOs
+ */
+lookup_dpo_t *lookup_dpo_pool;
+
+/**
+ * @brief An array of registered DPO type values for the sub-types
+ */
+static dpo_type_t lookup_dpo_sub_types[LOOKUP_SUB_TYPE_NUM];
+
+static lookup_dpo_t *
+lookup_dpo_alloc (void)
+{
+    lookup_dpo_t *lkd;
+
+    pool_get_aligned(lookup_dpo_pool, lkd, CLIB_CACHE_LINE_BYTES);
+
+    return (lkd);
+}
+
+static index_t
+lookup_dpo_get_index (lookup_dpo_t *lkd)
+{
+    return (lkd - lookup_dpo_pool);
+}
+
+static void
+lookup_dpo_add_or_lock_i (fib_node_index_t fib_index,
+                          dpo_proto_t proto,
+                          lookup_input_t input,
+                          lookup_table_t table_config,
+                          dpo_id_t *dpo)
+{
+    lookup_dpo_t *lkd;
+    dpo_type_t type;
+
+    lkd = lookup_dpo_alloc();
+    lkd->lkd_fib_index = fib_index;
+    lkd->lkd_proto = proto;
+    lkd->lkd_input = input;
+    lkd->lkd_table = table_config;
+
+    /*
+     * use the input type to select the lookup sub-type
+     */
+    type = 0;
+
+    switch (input)
+    {
+    case LOOKUP_INPUT_SRC_ADDR:
+        type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC];
+        break;
+    case LOOKUP_INPUT_DST_ADDR:
+        switch (table_config)
+        {
+        case LOOKUP_TABLE_FROM_INPUT_INTERFACE:
+            type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE];
+            break;
+        case LOOKUP_TABLE_FROM_CONFIG:
+            type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST];
+            break;
+        }
+    }
+
+    if (0 == type)
+    {
+        dpo_reset(dpo);
+    }
+    else
+    {
+        dpo_set(dpo, type, proto, lookup_dpo_get_index(lkd));
+    }
+}
+
+void
+lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index,
+                                    dpo_proto_t proto,
+                                    lookup_input_t input,
+                                    lookup_table_t table_config,
+                                    dpo_id_t *dpo)
+{
+    if (LOOKUP_TABLE_FROM_CONFIG == table_config)
+    {
+	fib_table_lock(fib_index, dpo_proto_to_fib(proto));
+    }
+    lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo);
+}
+
+void
+lookup_dpo_add_or_lock_w_table_id (u32 table_id,
+                                   dpo_proto_t proto,
+                                   lookup_input_t input,
+                                   lookup_table_t table_config,
+                                   dpo_id_t *dpo)
+{
+    fib_node_index_t fib_index = FIB_NODE_INDEX_INVALID;
+
+    if (LOOKUP_TABLE_FROM_CONFIG == table_config)
+    {
+	fib_index =
+	    fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
+					      table_id);
+    }
+
+    ASSERT(FIB_NODE_INDEX_INVALID != fib_index);
+    lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo);    
+}
+
+u8*
+format_lookup_dpo (u8 *s, va_list *args)
+{
+    index_t index = va_arg (*args, index_t);
+    lookup_dpo_t *lkd;
+
+    lkd = lookup_dpo_get(index);
+
+    if (LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table)
+    {
+        s = format(s, "%s lookup in interface's %U table",
+                   lookup_input_names[lkd->lkd_input],
+                   format_dpo_proto, lkd->lkd_proto);
+    }
+    else
+    {
+	s = format(s, "%s lookup in %U",
+		   lookup_input_names[lkd->lkd_input],
+		   format_fib_table_name, lkd->lkd_fib_index,
+		   dpo_proto_to_fib(lkd->lkd_proto));
+    }
+    return (s);
+}
+
+static void
+lookup_dpo_lock (dpo_id_t *dpo)
+{
+    lookup_dpo_t *lkd;
+
+    lkd = lookup_dpo_get(dpo->dpoi_index);
+
+    lkd->lkd_locks++;
+}
+
+static void
+lookup_dpo_unlock (dpo_id_t *dpo)
+{
+    lookup_dpo_t *lkd;
+
+    lkd = lookup_dpo_get(dpo->dpoi_index);
+
+    lkd->lkd_locks--;
+
+    if (0 == lkd->lkd_locks)
+    {
+        if (LOOKUP_TABLE_FROM_CONFIG == lkd->lkd_table)
+        {
+	    fib_table_unlock(lkd->lkd_fib_index,
+			     dpo_proto_to_fib(lkd->lkd_proto));
+        }
+        pool_put(lookup_dpo_pool, lkd);
+    }
+}
+
+always_inline void
+ip4_src_fib_lookup_one (u32 src_fib_index0,
+                        const ip4_address_t * addr0,
+                        u32 * src_adj_index0)
+{
+    ip4_fib_mtrie_leaf_t leaf0, leaf1;
+    ip4_fib_mtrie_t * mtrie0;
+
+    mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
+
+    leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
+    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
+    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
+    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
+
+    /* Handle default route. */
+    leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+    src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+}
+
+always_inline void
+ip4_src_fib_lookup_two (u32 src_fib_index0,
+                        u32 src_fib_index1,
+                        const ip4_address_t * addr0,
+                        const ip4_address_t * addr1,
+                        u32 * src_adj_index0,
+                        u32 * src_adj_index1)
+{
+    ip4_fib_mtrie_leaf_t leaf0, leaf1;
+    ip4_fib_mtrie_t * mtrie0, * mtrie1;
+
+    mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
+    mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie;
+
+    leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
+    leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0);
+
+    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
+    leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1);
+
+    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
+    leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2);
+
+    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
+    leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3);
+
+    /* Handle default route. */
+    leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+    leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
+    src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+    src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+}
+
+/**
+ * @brief Lookup trace  data
+ */
+typedef struct lookup_trace_t_
+{
+    union {
+	ip46_address_t addr;
+	mpls_unicast_header_t hdr;
+    };
+    fib_node_index_t fib_index;
+    index_t lbi;
+} lookup_trace_t;
+
+
+always_inline uword
+lookup_dpo_ip4_inline (vlib_main_t * vm,
+                       vlib_node_runtime_t * node,
+                       vlib_frame_t * from_frame,
+                       int input_src_addr,
+                       int table_from_interface)
+{
+    u32 n_left_from, next_index, * from, * to_next;
+    u32 cpu_index = os_get_cpu_number();
+    vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+	while (n_left_from >= 4 && n_left_to_next > 2)
+	{
+	    u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0;
+	    flow_hash_config_t flow_hash_config0;
+	    const ip4_address_t *input_addr0;
+	    const load_balance_t *lb0;
+	    const lookup_dpo_t * lkd0;
+	    const ip4_header_t * ip0;
+	    const dpo_id_t *dpo0;
+	    vlib_buffer_t * b0;
+	    u32 bi1, lkdi1, lbi1, fib_index1, next1, hash_c1;
+	    flow_hash_config_t flow_hash_config1;
+	    const ip4_address_t *input_addr1;
+	    const load_balance_t *lb1;
+	    const lookup_dpo_t * lkd1;
+	    const ip4_header_t * ip1;
+	    const dpo_id_t *dpo1;
+	    vlib_buffer_t * b1;
+
+	    /* Prefetch next iteration. */
+	    {
+		vlib_buffer_t * p2, * p3;
+
+		p2 = vlib_get_buffer (vm, from[2]);
+		p3 = vlib_get_buffer (vm, from[3]);
+
+		vlib_prefetch_buffer_header (p2, LOAD);
+		vlib_prefetch_buffer_header (p3, LOAD);
+
+		CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+		CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    }
+
+	    bi0 = from[0];
+	    to_next[0] = bi0;
+	    bi1 = from[1];
+	    to_next[1] = bi1;
+	    from += 2;
+	    to_next += 2;
+	    n_left_from -= 2;
+	    n_left_to_next -= 2;
+
+	    b0 = vlib_get_buffer (vm, bi0);
+	    ip0 = vlib_buffer_get_current (b0);
+	    b1 = vlib_get_buffer (vm, bi1);
+	    ip1 = vlib_buffer_get_current (b1);
+
+	    /* dst lookup was done by ip4 lookup */
+	    lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+	    lkdi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+	    lkd0 = lookup_dpo_get(lkdi0);
+	    lkd1 = lookup_dpo_get(lkdi1);
+
+	    /*
+	     * choose between a lookup using the fib index in the DPO
+	     * or getting the FIB index from the interface.
+	     */
+	    if (table_from_interface)
+	    {
+		fib_index0 =
+		    ip4_fib_table_get_index_for_sw_if_index(
+			vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+		fib_index1 =
+		    ip4_fib_table_get_index_for_sw_if_index(
+			vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+	    }
+	    else
+	    {
+		fib_index0 = lkd0->lkd_fib_index;
+		fib_index1 = lkd1->lkd_fib_index;
+	    }
+
+	    /*
+	     * choose between a source or destination address lookup in the table
+	     */
+	    if (input_src_addr)
+	    {
+		input_addr0 = &ip0->src_address;
+		input_addr1 = &ip1->src_address;
+	    }
+	    else
+	    {
+		input_addr0 = &ip0->dst_address;
+		input_addr1 = &ip1->dst_address;
+	    }
+
+	    /* do lookup */
+	    ip4_src_fib_lookup_two (fib_index0, fib_index1,
+                                    input_addr0, input_addr1,
+                                    &lbi0, &lbi1);
+	    lb0 = load_balance_get(lbi0);
+	    lb1 = load_balance_get(lbi1);
+
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = fib_index0;
+            vnet_buffer(b1)->sw_if_index[VLIB_TX] = fib_index1;
+
+	    /* Use flow hash to compute multipath adjacency. */
+	    hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0;
+	    hash_c1 = vnet_buffer (b1)->ip.flow_hash = 0;
+
+	    if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+	    {
+		flow_hash_config0 = lb0->lb_hash_config;
+		hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+		    ip4_compute_flow_hash (ip0, flow_hash_config0);
+	    }
+
+	    if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+	    {
+		flow_hash_config1 = lb1->lb_hash_config;
+		hash_c1 = vnet_buffer (b1)->ip.flow_hash =
+		    ip4_compute_flow_hash (ip1, flow_hash_config1);
+	    }
+
+	    dpo0 = load_balance_get_bucket_i(lb0,
+					     (hash_c0 &
+					      (lb0->lb_n_buckets_minus_1)));
+	    dpo1 = load_balance_get_bucket_i(lb1,
+					     (hash_c1 &
+					      (lb1->lb_n_buckets_minus_1)));
+
+	    next0 = dpo0->dpoi_next_node;
+	    next1 = dpo1->dpoi_next_node;
+	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+	    vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+	    vlib_increment_combined_counter
+		(cm, cpu_index, lbi0, 1,
+		 vlib_buffer_length_in_chain (vm, b0));
+	    vlib_increment_combined_counter
+		(cm, cpu_index, lbi1, 1,
+		 vlib_buffer_length_in_chain (vm, b1));
+
+	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		lookup_trace_t *tr = vlib_add_trace (vm, node,
+						     b0, sizeof (*tr));
+		tr->fib_index = fib_index0;
+		tr->lbi = lbi0;
+		tr->addr.ip4 = *input_addr0;
+	    }
+	    if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		lookup_trace_t *tr = vlib_add_trace (vm, node,
+						     b1, sizeof (*tr));
+		tr->fib_index = fib_index1;
+		tr->lbi = lbi1;
+		tr->addr.ip4 = *input_addr1;
+	    }
+
+	    vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+					     to_next, n_left_to_next,
+					     bi0, bi1, next0, next1);
+	}
+
+	while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	    u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0;
+	    flow_hash_config_t flow_hash_config0;
+	    const ip4_address_t *input_addr;
+	    const load_balance_t *lb0;
+	    const lookup_dpo_t * lkd0;
+	    const ip4_header_t * ip0;
+	    const dpo_id_t *dpo0;
+	    vlib_buffer_t * b0;
+
+	    bi0 = from[0];
+	    to_next[0] = bi0;
+	    from += 1;
+	    to_next += 1;
+	    n_left_from -= 1;
+	    n_left_to_next -= 1;
+
+	    b0 = vlib_get_buffer (vm, bi0);
+	    ip0 = vlib_buffer_get_current (b0);
+
+	    /* dst lookup was done by ip4 lookup */
+	    lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+	    lkd0 = lookup_dpo_get(lkdi0);
+
+	    /*
+	     * choose between a lookup using the fib index in the DPO
+	     * or getting the FIB index from the interface.
+	     */
+	    if (table_from_interface)
+	    {
+		fib_index0 =
+		    ip4_fib_table_get_index_for_sw_if_index(
+			vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+	    }
+	    else
+	    {
+		fib_index0 = lkd0->lkd_fib_index;
+	    }
+
+	    /*
+	     * choose between a source or destination address lookup in the table
+	     */
+	    if (input_src_addr)
+	    {
+		input_addr = &ip0->src_address;
+	    }
+	    else
+	    {
+		input_addr = &ip0->dst_address;
+	    }
+
+	    /* do lookup */
+	    ip4_src_fib_lookup_one (fib_index0, input_addr, &lbi0);
+	    lb0 = load_balance_get(lbi0);
+
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = fib_index0;
+
+	    /* Use flow hash to compute multipath adjacency. */
+	    hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0;
+
+	    if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+	    {
+		flow_hash_config0 = lb0->lb_hash_config;
+		hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+		    ip4_compute_flow_hash (ip0, flow_hash_config0);
+	    }
+
+	    dpo0 = load_balance_get_bucket_i(lb0,
+					     (hash_c0 &
+					      (lb0->lb_n_buckets_minus_1)));
+
+	    next0 = dpo0->dpoi_next_node;
+	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+	    vlib_increment_combined_counter
+		(cm, cpu_index, lbi0, 1,
+		 vlib_buffer_length_in_chain (vm, b0));
+
+	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		lookup_trace_t *tr = vlib_add_trace (vm, node,
+						     b0, sizeof (*tr));
+		tr->fib_index = fib_index0;
+		tr->lbi = lbi0;
+		tr->addr.ip4 = *input_addr;
+	    }
+
+	    vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+					    n_left_to_next, bi0, next0);
+	}
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+static u8 *
+format_lookup_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    lookup_trace_t * t = va_arg (*args, lookup_trace_t *);
+    uword indent = format_get_indent (s);
+    s = format (s, "%U fib-index:%d addr:%U load-balance:%d",
+                format_white_space, indent,
+                t->fib_index,
+                format_ip46_address, &t->addr, IP46_TYPE_ANY,
+                t->lbi);
+    return s;
+}
+
+always_inline uword
+lookup_ip4_dst (vlib_main_t * vm,
+                vlib_node_runtime_t * node,
+                vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_node) = {
+    .function = lookup_ip4_dst,
+    .name = "lookup-ip4-dst",
+    .vector_size = sizeof (u32),
+    .sibling_of = "ip4-lookup",
+    .format_trace = format_lookup_trace,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_node, lookup_ip4_dst)
+
+always_inline uword
+lookup_ip4_dst_itf (vlib_main_t * vm,
+                    vlib_node_runtime_t * node,
+                    vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_itf_node) = {
+    .function = lookup_ip4_dst_itf,
+    .name = "lookup-ip4-dst-itf",
+    .vector_size = sizeof (u32),
+    .sibling_of = "ip4-lookup",
+    .format_trace = format_lookup_trace,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_itf_node, lookup_ip4_dst_itf)
+
+always_inline uword
+lookup_ip4_src (vlib_main_t * vm,
+                vlib_node_runtime_t * node,
+                vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_ip4_inline(vm, node, from_frame, 1, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_src_node) = {
+    .function = lookup_ip4_src,
+    .name = "lookup-ip4-src",
+    .vector_size = sizeof (u32),
+    .format_trace = format_lookup_trace,
+    .sibling_of = "ip4-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_src_node, lookup_ip4_src)
+
+always_inline uword
+lookup_dpo_ip6_inline (vlib_main_t * vm,
+                       vlib_node_runtime_t * node,
+                       vlib_frame_t * from_frame,
+                       int input_src_addr,
+                       int table_from_interface)
+{
+    vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+    u32 n_left_from, next_index, * from, * to_next;
+    u32 cpu_index = os_get_cpu_number();
+
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+	while (n_left_from >= 4 && n_left_to_next > 2)
+	{
+	    u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0;
+	    flow_hash_config_t flow_hash_config0;
+	    const ip6_address_t *input_addr0;
+	    const load_balance_t *lb0;
+	    const lookup_dpo_t * lkd0;
+	    const ip6_header_t * ip0;
+	    const dpo_id_t *dpo0;
+	    vlib_buffer_t * b0;
+	    u32 bi1, lkdi1, lbi1, fib_index1, next1, hash_c1;
+	    flow_hash_config_t flow_hash_config1;
+	    const ip6_address_t *input_addr1;
+	    const load_balance_t *lb1;
+	    const lookup_dpo_t * lkd1;
+	    const ip6_header_t * ip1;
+	    const dpo_id_t *dpo1;
+	    vlib_buffer_t * b1;
+
+	    /* Prefetch next iteration. */
+	    {
+		vlib_buffer_t * p2, * p3;
+
+		p2 = vlib_get_buffer (vm, from[2]);
+		p3 = vlib_get_buffer (vm, from[3]);
+
+		vlib_prefetch_buffer_header (p2, LOAD);
+		vlib_prefetch_buffer_header (p3, LOAD);
+
+		CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+		CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    }
+
+	    bi0 = from[0];
+	    to_next[0] = bi0;
+	    bi1 = from[1];
+	    to_next[1] = bi1;
+	    from += 2;
+	    to_next += 2;
+	    n_left_from -= 2;
+	    n_left_to_next -= 2;
+
+	    b0 = vlib_get_buffer (vm, bi0);
+	    ip0 = vlib_buffer_get_current (b0);
+	    b1 = vlib_get_buffer (vm, bi1);
+	    ip1 = vlib_buffer_get_current (b1);
+
+	    /* dst lookup was done by ip6 lookup */
+	    lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+	    lkdi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+	    lkd0 = lookup_dpo_get(lkdi0);
+	    lkd1 = lookup_dpo_get(lkdi1);
+
+	    /*
+	     * choose between a lookup using the fib index in the DPO
+	     * or getting the FIB index from the interface.
+	     */
+	    if (table_from_interface)
+	    {
+		fib_index0 =
+		    ip6_fib_table_get_index_for_sw_if_index(
+			vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+		fib_index1 =
+		    ip6_fib_table_get_index_for_sw_if_index(
+			vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+	    }
+	    else
+	    {
+		fib_index0 = lkd0->lkd_fib_index;
+		fib_index1 = lkd1->lkd_fib_index;
+	    }
+
+	    /*
+	     * choose between a source or destination address lookup in the table
+	     */
+	    if (input_src_addr)
+	    {
+		input_addr0 = &ip0->src_address;
+		input_addr1 = &ip1->src_address;
+	    }
+	    else
+	    {
+		input_addr0 = &ip0->dst_address;
+		input_addr1 = &ip1->dst_address;
+	    }
+
+	    /* do src lookup */
+	    lbi0 = ip6_fib_table_fwding_lookup(&ip6_main,
+					       fib_index0,
+					       input_addr0);
+	    lbi1 = ip6_fib_table_fwding_lookup(&ip6_main,
+					       fib_index1,
+					       input_addr1);
+	    lb0 = load_balance_get(lbi0);
+	    lb1 = load_balance_get(lbi1);
+
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = fib_index0;
+            vnet_buffer(b1)->sw_if_index[VLIB_TX] = fib_index1;
+
+	    /* Use flow hash to compute multipath adjacency. */
+	    hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0;
+	    hash_c1 = vnet_buffer (b1)->ip.flow_hash = 0;
+
+	    if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+	    {
+		flow_hash_config0 = lb0->lb_hash_config;
+		hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+		    ip6_compute_flow_hash (ip0, flow_hash_config0);
+	    }
+
+	    if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+	    {
+		flow_hash_config1 = lb1->lb_hash_config;
+		hash_c1 = vnet_buffer (b1)->ip.flow_hash =
+		    ip6_compute_flow_hash (ip1, flow_hash_config1);
+	    }
+
+	    dpo0 = load_balance_get_bucket_i(lb0,
+					     (hash_c0 &
+					      (lb0->lb_n_buckets_minus_1)));
+	    dpo1 = load_balance_get_bucket_i(lb1,
+					     (hash_c1 &
+					      (lb1->lb_n_buckets_minus_1)));
+
+	    next0 = dpo0->dpoi_next_node;
+	    next1 = dpo1->dpoi_next_node;
+	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+	    vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+	    vlib_increment_combined_counter
+		(cm, cpu_index, lbi0, 1,
+		 vlib_buffer_length_in_chain (vm, b0));
+	    vlib_increment_combined_counter
+		(cm, cpu_index, lbi1, 1,
+		 vlib_buffer_length_in_chain (vm, b1));
+
+	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		lookup_trace_t *tr = vlib_add_trace (vm, node,
+						     b0, sizeof (*tr));
+		tr->fib_index = fib_index0;
+		tr->lbi = lbi0;
+		tr->addr.ip6 = *input_addr0;
+	    }
+	    if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		lookup_trace_t *tr = vlib_add_trace (vm, node,
+						     b1, sizeof (*tr));
+		tr->fib_index = fib_index1;
+		tr->lbi = lbi1;
+		tr->addr.ip6 = *input_addr1;
+	    }
+	    vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+					    n_left_to_next, bi0, bi1,
+					    next0, next1);
+	}
+	while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	    u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0;
+	    flow_hash_config_t flow_hash_config0;
+	    const ip6_address_t *input_addr0;
+	    const load_balance_t *lb0;
+	    const lookup_dpo_t * lkd0;
+	    const ip6_header_t * ip0;
+	    const dpo_id_t *dpo0;
+	    vlib_buffer_t * b0;
+
+	    bi0 = from[0];
+	    to_next[0] = bi0;
+	    from += 1;
+	    to_next += 1;
+	    n_left_from -= 1;
+	    n_left_to_next -= 1;
+
+	    b0 = vlib_get_buffer (vm, bi0);
+	    ip0 = vlib_buffer_get_current (b0);
+
+	    /* dst lookup was done by ip6 lookup */
+	    lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+	    lkd0 = lookup_dpo_get(lkdi0);
+
+	    /*
+	     * choose between a lookup using the fib index in the DPO
+	     * or getting the FIB index from the interface.
+	     */
+	    if (table_from_interface)
+	    {
+		fib_index0 =
+		    ip6_fib_table_get_index_for_sw_if_index(
+			vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+	    }
+	    else
+	    {
+		fib_index0 = lkd0->lkd_fib_index;
+	    }
+
+	    /*
+	     * choose between a source or destination address lookup in the table
+	     */
+	    if (input_src_addr)
+	    {
+		input_addr0 = &ip0->src_address;
+	    }
+	    else
+	    {
+		input_addr0 = &ip0->dst_address;
+	    }
+
+	    /* do src lookup */
+	    lbi0 = ip6_fib_table_fwding_lookup(&ip6_main,
+					       fib_index0,
+					       input_addr0);
+	    lb0 = load_balance_get(lbi0);
+
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = fib_index0;
+
+	    /* Use flow hash to compute multipath adjacency. */
+	    hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0;
+
+	    if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+	    {
+		flow_hash_config0 = lb0->lb_hash_config;
+		hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+		    ip6_compute_flow_hash (ip0, flow_hash_config0);
+	    }
+
+	    dpo0 = load_balance_get_bucket_i(lb0,
+					     (hash_c0 &
+					      (lb0->lb_n_buckets_minus_1)));
+
+	    next0 = dpo0->dpoi_next_node;
+	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+	    vlib_increment_combined_counter
+		(cm, cpu_index, lbi0, 1,
+		 vlib_buffer_length_in_chain (vm, b0));
+
+	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		lookup_trace_t *tr = vlib_add_trace (vm, node,
+						     b0, sizeof (*tr));
+		tr->fib_index = fib_index0;
+		tr->lbi = lbi0;
+		tr->addr.ip6 = *input_addr0;
+	    }
+	    vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+					    n_left_to_next, bi0, next0);
+	}
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+always_inline uword
+lookup_ip6_dst (vlib_main_t * vm,
+                vlib_node_runtime_t * node,
+                vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_ip6_inline(vm, node, from_frame, 0 /*use src*/, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_dst_node) = {
+    .function = lookup_ip6_dst,
+    .name = "lookup-ip6-dst",
+    .vector_size = sizeof (u32),
+    .format_trace = format_lookup_trace,
+    .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_node, lookup_ip6_dst)
+
+always_inline uword
+lookup_ip6_dst_itf (vlib_main_t * vm,
+		    vlib_node_runtime_t * node,
+		    vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_ip6_inline(vm, node, from_frame, 0 /*use src*/, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_dst_itf_node) = {
+    .function = lookup_ip6_dst_itf,
+    .name = "lookup-ip6-dst-itf",
+    .vector_size = sizeof (u32),
+    .format_trace = format_lookup_trace,
+    .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_itf_node, lookup_ip6_dst_itf)
+
+always_inline uword
+lookup_ip6_src (vlib_main_t * vm,
+                vlib_node_runtime_t * node,
+                vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_ip6_inline(vm, node, from_frame, 1, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_src_node) = {
+    .function = lookup_ip6_src,
+    .name = "lookup-ip6-src",
+    .vector_size = sizeof (u32),
+    .format_trace = format_lookup_trace,
+    .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_src_node, lookup_ip6_src)
+
+always_inline uword
+lookup_dpo_mpls_inline (vlib_main_t * vm,
+                       vlib_node_runtime_t * node,
+                       vlib_frame_t * from_frame,
+                       int table_from_interface)
+{
+    u32 n_left_from, next_index, * from, * to_next;
+    u32 cpu_index = os_get_cpu_number();
+    vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+        /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+        /*   } */
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            u32 bi0, lkdi0, lbi0, fib_index0,  next0;
+            const mpls_unicast_header_t * hdr0;
+            const load_balance_t *lb0;
+            const lookup_dpo_t * lkd0;
+            const dpo_id_t *dpo0;
+            vlib_buffer_t * b0;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            from += 1;
+            to_next += 1;
+            n_left_from -= 1;
+            n_left_to_next -= 1;
+
+            b0 = vlib_get_buffer (vm, bi0);
+            hdr0 = vlib_buffer_get_current (b0);
+
+            /* dst lookup was done by mpls lookup */
+            lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            lkd0 = lookup_dpo_get(lkdi0);
+
+            /*
+             * choose between a lookup using the fib index in the DPO
+             * or getting the FIB index from the interface.
+             */
+            if (table_from_interface)
+            {
+                fib_index0 = 
+                    mpls_fib_table_get_index_for_sw_if_index(
+                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+            }
+            else
+            {
+                fib_index0 = lkd0->lkd_fib_index;
+            }
+
+            /* do lookup */
+            lbi0 = mpls_fib_table_forwarding_lookup (fib_index0, hdr0);
+            lb0  = load_balance_get(lbi0);
+            dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+            next0 = dpo0->dpoi_next_node;
+            vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+            vlib_increment_combined_counter
+                (cm, cpu_index, lbi0, 1,
+                 vlib_buffer_length_in_chain (vm, b0));
+
+	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
+            {
+                lookup_trace_t *tr = vlib_add_trace (vm, node, 
+                                                     b0, sizeof (*tr));
+                tr->fib_index = fib_index0;
+                tr->lbi = lbi0;
+                tr->hdr = *hdr0;
+            }
+
+           vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0, next0);
+        }
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+static u8 *
+format_lookup_mpls_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    lookup_trace_t * t = va_arg (*args, lookup_trace_t *);
+    uword indent = format_get_indent (s);
+    mpls_unicast_header_t hdr;
+
+    hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl);
+
+    s = format (s, "%U fib-index:%d hdr:%U load-balance:%d",
+                format_white_space, indent,
+                t->fib_index,
+                format_mpls_header, hdr,
+                t->lbi);
+    return s;
+}
+
+always_inline uword
+lookup_mpls_dst (vlib_main_t * vm,
+                vlib_node_runtime_t * node,
+                vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_mpls_inline(vm, node, from_frame, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_mpls_dst_node) = {
+    .function = lookup_mpls_dst,
+    .name = "lookup-mpls-dst",
+    .vector_size = sizeof (u32),
+    .sibling_of = "mpls-lookup",
+    .format_trace = format_lookup_mpls_trace,
+    .n_next_nodes = 0,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_node, lookup_mpls_dst)
+
+always_inline uword
+lookup_mpls_dst_itf (vlib_main_t * vm,
+                    vlib_node_runtime_t * node,
+                    vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_mpls_inline(vm, node, from_frame, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_mpls_dst_itf_node) = {
+    .function = lookup_mpls_dst_itf,
+    .name = "lookup-mpls-dst-itf",
+    .vector_size = sizeof (u32),
+    .sibling_of = "mpls-lookup",
+    .format_trace = format_lookup_mpls_trace,
+    .n_next_nodes = 0,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_itf_node, lookup_mpls_dst_itf)
+
+static void
+lookup_dpo_mem_show (void)
+{
+    fib_show_memory_usage("Lookup",
+			  pool_elts(lookup_dpo_pool),
+			  pool_len(lookup_dpo_pool),
+			  sizeof(lookup_dpo_t));
+}
+
+const static dpo_vft_t lkd_vft = {
+    .dv_lock = lookup_dpo_lock,
+    .dv_unlock = lookup_dpo_unlock,
+    .dv_format = format_lookup_dpo,
+};
+const static dpo_vft_t lkd_vft_w_mem_show = {
+    .dv_lock = lookup_dpo_lock,
+    .dv_unlock = lookup_dpo_unlock,
+    .dv_format = format_lookup_dpo,
+    .dv_mem_show = lookup_dpo_mem_show,
+};
+
+const static char* const lookup_src_ip4_nodes[] =
+{
+    "lookup-ip4-src",
+    NULL,
+};
+const static char* const lookup_src_ip6_nodes[] =
+{
+    "lookup-ip6-src",
+    NULL,
+};
+const static char* const * const lookup_src_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = lookup_src_ip4_nodes,
+    [DPO_PROTO_IP6]  = lookup_src_ip6_nodes,
+    [DPO_PROTO_MPLS] = NULL,
+};
+
+const static char* const lookup_dst_ip4_nodes[] =
+{
+    "lookup-ip4-dst",
+    NULL,
+};
+const static char* const lookup_dst_ip6_nodes[] =
+{
+    "lookup-ip6-dst",
+    NULL,
+};
+const static char* const lookup_dst_mpls_nodes[] =
+{
+    "lookup-mpls-dst",
+    NULL,
+};
+const static char* const * const lookup_dst_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = lookup_dst_ip4_nodes,
+    [DPO_PROTO_IP6]  = lookup_dst_ip6_nodes,
+    [DPO_PROTO_MPLS] = lookup_dst_mpls_nodes,
+};
+
+const static char* const lookup_dst_from_interface_ip4_nodes[] =
+{
+    "lookup-ip4-dst-itf",
+    NULL,
+};
+const static char* const lookup_dst_from_interface_ip6_nodes[] =
+{
+    "lookup-ip6-dst-itf",
+    NULL,
+};
+const static char* const lookup_dst_from_interface_mpls_nodes[] =
+{
+    "lookup-mpls-dst-itf",
+    NULL,
+};
+const static char* const * const lookup_dst_from_interface_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = lookup_dst_from_interface_ip4_nodes,
+    [DPO_PROTO_IP6]  = lookup_dst_from_interface_ip6_nodes,
+    [DPO_PROTO_MPLS] = lookup_dst_from_interface_mpls_nodes,
+};
+
+
+void
+lookup_dpo_module_init (void)
+{
+    dpo_register(DPO_LOOKUP, &lkd_vft_w_mem_show, NULL);
+
+    /*
+     * There are various sorts of lookup; src or dst addr v4 /v6 etc.
+     * there isn't an object type for each (there is only the lookup_dpo_t),
+     * but, for performance reasons, there is a data plane function, and hence
+     * VLIB node for each. VLIB graph node construction is based on DPO types
+     * so we create sub-types.
+     */
+    lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC] =
+        dpo_register_new_type(&lkd_vft, lookup_src_nodes);
+    lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST] =
+        dpo_register_new_type(&lkd_vft, lookup_dst_nodes);
+    lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE] =
+        dpo_register_new_type(&lkd_vft, lookup_dst_from_interface_nodes);
+}
diff --git a/src/vnet/dpo/lookup_dpo.h b/src/vnet/dpo/lookup_dpo.h
new file mode 100644
index 00000000..ff283388
--- /dev/null
+++ b/src/vnet/dpo/lookup_dpo.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOOKUP_DPO_H__
+#define __LOOKUP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_input_t_ {
+    LOOKUP_INPUT_SRC_ADDR,
+    LOOKUP_INPUT_DST_ADDR,
+} __attribute__ ((packed)) lookup_input_t;
+
+#define LOOKUP_INPUTS {                         \
+    [LOOKUP_INPUT_SRC_ADDR] = "src-address",    \
+    [LOOKUP_INPUT_DST_ADDR] = "dst-address",    \
+}
+
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_table_t_ {
+    LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+    LOOKUP_TABLE_FROM_CONFIG,
+} __attribute__ ((packed)) lookup_table_t;
+
+#define LOOKUP_TABLES {                                   \
+    [LOOKUP_INPUT_SRC_ADDR] = "table-input-interface",    \
+    [LOOKUP_INPUT_DST_ADDR] = "table-configured",         \
+}
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct lookup_dpo_t
+{
+    /**
+     * The FIB, or interface from which to get a FIB, in which to perform
+     * the next lookup;
+     */
+    fib_node_index_t lkd_fib_index;
+
+    /**
+     * The protocol of the FIB for the lookup, and hence
+     * the protocol of the packet
+     */
+    dpo_proto_t lkd_proto;
+
+    /**
+     * Switch to use src or dst address
+     */
+    lookup_input_t lkd_input;
+
+    /**
+     * Switch to use the table index passed, or the table of the input interface
+     */
+    lookup_table_t lkd_table;
+
+    /**
+     * Number of locks
+     */
+    u16 lkd_locks;
+} lookup_dpo_t;
+
+extern void lookup_dpo_add_or_lock_w_fib_index(fib_node_index_t fib_index,
+                                               dpo_proto_t proto,
+                                               lookup_input_t input,
+                                               lookup_table_t table,
+                                               dpo_id_t *dpo);
+extern void lookup_dpo_add_or_lock_w_table_id(u32 table_id,
+                                              dpo_proto_t proto,
+                                              lookup_input_t input,
+                                              lookup_table_t table,
+                                              dpo_id_t *dpo);
+
+extern u8* format_lookup_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern lookup_dpo_t *lookup_dpo_pool;
+
+static inline lookup_dpo_t *
+lookup_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(lookup_dpo_pool, index));
+}
+
+extern void lookup_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
new file mode 100644
index 00000000..bbdc9666
--- /dev/null
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -0,0 +1,570 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+mpls_label_dpo_t *mpls_label_dpo_pool;
+
+static mpls_label_dpo_t *
+mpls_label_dpo_alloc (void)
+{
+    mpls_label_dpo_t *mld;
+
+    pool_get_aligned(mpls_label_dpo_pool, mld, CLIB_CACHE_LINE_BYTES);
+    memset(mld, 0, sizeof(*mld));
+
+    dpo_reset(&mld->mld_dpo);
+
+    return (mld);
+}
+
+static index_t
+mpls_label_dpo_get_index (mpls_label_dpo_t *mld)
+{
+    return (mld - mpls_label_dpo_pool);
+}
+
+index_t
+mpls_label_dpo_create (mpls_label_t *label_stack,
+                       mpls_eos_bit_t eos,
+                       u8 ttl,
+                       u8 exp,
+                       dpo_proto_t payload_proto,
+		       const dpo_id_t *dpo)
+{
+    mpls_label_dpo_t *mld;
+    u32 ii;
+
+    mld = mpls_label_dpo_alloc();
+    mld->mld_n_labels = vec_len(label_stack);
+    mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]);
+    mld->mld_payload_proto = payload_proto;
+
+    /*
+     * construct label rewrite headers for each value value passed.
+     * get the header in network byte order since we will paint it
+     * on a packet in the data-plane
+     */
+
+    for (ii = 0; ii < mld->mld_n_labels-1; ii++)
+    {
+	vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]);
+	vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, 255);
+	vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, 0);
+	vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, MPLS_NON_EOS);
+	mld->mld_hdr[ii].label_exp_s_ttl =
+	    clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl);
+    }
+
+    /*
+     * the inner most label
+     */
+    ii = mld->mld_n_labels-1;
+
+    vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]);
+    vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, ttl);
+    vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, exp);
+    vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, eos);
+    mld->mld_hdr[ii].label_exp_s_ttl =
+	clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl);
+
+    /*
+     * stack this label objct on its parent.
+     */
+    dpo_stack(DPO_MPLS_LABEL,
+              mld->mld_payload_proto,
+              &mld->mld_dpo,
+              dpo);
+
+    return (mpls_label_dpo_get_index(mld));
+}
+
+u8*
+format_mpls_label_dpo (u8 *s, va_list *args)
+{
+    index_t index = va_arg (*args, index_t);
+    u32 indent = va_arg (*args, u32);
+    mpls_unicast_header_t hdr;
+    mpls_label_dpo_t *mld;
+    u32 ii;
+
+    mld = mpls_label_dpo_get(index);
+
+    s = format(s, "mpls-label:[%d]:", index);
+
+    for (ii = 0; ii < mld->mld_n_labels; ii++)
+    {
+	hdr.label_exp_s_ttl =
+	    clib_net_to_host_u32(mld->mld_hdr[ii].label_exp_s_ttl);
+	s = format(s, "%U", format_mpls_header, hdr);
+    }
+
+    s = format(s, "\n%U", format_white_space, indent);
+    s = format(s, "%U", format_dpo_id, &mld->mld_dpo, indent+2);
+
+    return (s);
+}
+
+static void
+mpls_label_dpo_lock (dpo_id_t *dpo)
+{
+    mpls_label_dpo_t *mld;
+
+    mld = mpls_label_dpo_get(dpo->dpoi_index);
+
+    mld->mld_locks++;
+}
+
+static void
+mpls_label_dpo_unlock (dpo_id_t *dpo)
+{
+    mpls_label_dpo_t *mld;
+
+    mld = mpls_label_dpo_get(dpo->dpoi_index);
+
+    mld->mld_locks--;
+
+    if (0 == mld->mld_locks)
+    {
+	dpo_reset(&mld->mld_dpo);
+	pool_put(mpls_label_dpo_pool, mld);
+    }
+}
+
+/**
+ * @brief A struct to hold tracing information for the MPLS label imposition
+ * node.
+ */
+typedef struct mpls_label_imposition_trace_t_
+{
+    /**
+     * The MPLS header imposed
+     */
+    mpls_unicast_header_t hdr;
+} mpls_label_imposition_trace_t;
+
+always_inline uword
+mpls_label_imposition_inline (vlib_main_t * vm,
+                              vlib_node_runtime_t * node,
+                              vlib_frame_t * from_frame,
+                              u8 payload_is_ip4,
+                              u8 payload_is_ip6)
+{
+    u32 n_left_from, next_index, * from, * to_next;
+
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+        while (n_left_from >= 4 && n_left_to_next >= 2)
+        {
+            mpls_unicast_header_t *hdr0, *hdr1;
+            mpls_label_dpo_t *mld0, *mld1;
+            u32 bi0, mldi0, bi1, mldi1;
+            vlib_buffer_t * b0, *b1;
+            u32 next0, next1;
+            u8 ttl0, ttl1;
+
+            bi0 = to_next[0] = from[0];
+            bi1 = to_next[1] = from[1];
+
+            /* Prefetch next iteration. */
+            {
+                vlib_buffer_t * p2, * p3;
+
+                p2 = vlib_get_buffer (vm, from[2]);
+                p3 = vlib_get_buffer (vm, from[3]);
+
+                vlib_prefetch_buffer_header (p2, STORE);
+                vlib_prefetch_buffer_header (p3, STORE);
+
+                CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
+                CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE);
+            }
+
+            from += 2;
+            to_next += 2;
+            n_left_from -= 2;
+            n_left_to_next -= 2;
+
+            b0 = vlib_get_buffer (vm, bi0);
+            b1 = vlib_get_buffer (vm, bi1);
+
+            /* dst lookup was done by ip4 lookup */
+            mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+            mld0 = mpls_label_dpo_get(mldi0);
+            mld1 = mpls_label_dpo_get(mldi1);
+
+            if (payload_is_ip4)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+                ip4_header_t * ip0 = vlib_buffer_get_current(b0);
+                ip4_header_t * ip1 = vlib_buffer_get_current(b1);
+                u32 checksum0;
+                u32 checksum1;
+
+                checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+                checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+
+                checksum0 += checksum0 >= 0xffff;
+                checksum1 += checksum1 >= 0xffff;
+
+                ip0->checksum = checksum0;
+                ip1->checksum = checksum1;
+
+                ip0->ttl -= 1;
+                ip1->ttl -= 1;
+
+                ttl1 = ip1->ttl;
+                ttl0 = ip0->ttl;
+            }
+            else if (payload_is_ip6)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+                ip6_header_t * ip0 = vlib_buffer_get_current(b0);
+                ip6_header_t * ip1 = vlib_buffer_get_current(b1);
+
+
+                ip0->hop_limit -= 1;
+                ip1->hop_limit -= 1;
+
+                ttl0 = ip0->hop_limit;
+                ttl1 = ip1->hop_limit;
+            }
+            else
+            {
+                /*
+                 * else, the packet to be encapped is an MPLS packet
+                 */
+                if (PREDICT_TRUE(vnet_buffer(b0)->mpls.first))
+                {
+                    /*
+                     * The first label to be imposed on the packet. this is a label swap.
+                     * in which case we stashed the TTL and EXP bits in the
+                     * packet in the lookup node
+                     */
+                    ASSERT(0 != vnet_buffer (b0)->mpls.ttl);
+
+                    ttl0 = vnet_buffer(b0)->mpls.ttl - 1;
+                }
+                else
+                {
+                    /*
+                     * not the first label. implying we are recusring down a chain of
+                     * output labels.
+                     * Each layer is considered a new LSP - hence the TTL is reset.
+                     */
+                    ttl0 = 255;
+                }
+                if (PREDICT_TRUE(vnet_buffer(b1)->mpls.first))
+                {
+                    ASSERT(1 != vnet_buffer (b1)->mpls.ttl);
+                    ttl1 = vnet_buffer(b1)->mpls.ttl - 1;
+                }
+                else
+                {
+                    ttl1 = 255;
+                }
+            }
+            vnet_buffer(b0)->mpls.first = 0;
+            vnet_buffer(b1)->mpls.first = 0;
+
+            /* Paint the MPLS header */
+            vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
+            vlib_buffer_advance(b1, -(mld1->mld_n_hdr_bytes));
+
+            hdr0 = vlib_buffer_get_current(b0);
+            hdr1 = vlib_buffer_get_current(b1);
+
+            clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
+            clib_memcpy(hdr1, mld1->mld_hdr, mld1->mld_n_hdr_bytes);
+
+            /* fixup the TTL for the inner most label */
+            hdr0 = hdr0 + (mld0->mld_n_labels - 1);
+            hdr1 = hdr1 + (mld1->mld_n_labels - 1);
+            ((char*)hdr0)[3] = ttl0;
+            ((char*)hdr1)[3] = ttl1;
+
+            next0 = mld0->mld_dpo.dpoi_next_node;
+            next1 = mld1->mld_dpo.dpoi_next_node;
+            vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
+            vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index;
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_imposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b0, sizeof (*tr));
+                tr->hdr = *hdr0;
+            }
+            if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_imposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b1, sizeof (*tr));
+                tr->hdr = *hdr1;
+            }
+
+            vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+                                            n_left_to_next,
+                                            bi0, bi1, next0, next1);
+        }
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            mpls_unicast_header_t *hdr0;
+            mpls_label_dpo_t *mld0;
+            vlib_buffer_t * b0;
+            u32 bi0, mldi0;
+            u32 next0;
+            u8 ttl;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            from += 1;
+            to_next += 1;
+            n_left_from -= 1;
+            n_left_to_next -= 1;
+
+            b0 = vlib_get_buffer (vm, bi0);
+
+            /* dst lookup was done by ip4 lookup */
+            mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            mld0 = mpls_label_dpo_get(mldi0);
+
+            if (payload_is_ip4)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+                ip4_header_t * ip0 = vlib_buffer_get_current(b0);
+                u32 checksum0;
+
+                checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+                checksum0 += checksum0 >= 0xffff;
+
+                ip0->checksum = checksum0;
+                ip0->ttl -= 1;
+                ttl = ip0->ttl;
+            }
+            else if (payload_is_ip6)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+                ip6_header_t * ip0 = vlib_buffer_get_current(b0);
+
+                ip0->hop_limit -= 1;
+                ttl = ip0->hop_limit;
+            }
+            else
+            {
+                /*
+                 * else, the packet to be encapped is an MPLS packet
+                 */
+                if (vnet_buffer(b0)->mpls.first)
+                {
+                    /*
+                     * The first label to be imposed on the packet. this is a label swap.
+                     * in which case we stashed the TTL and EXP bits in the
+                     * packet in the lookup node
+                     */
+                    ASSERT(0 != vnet_buffer (b0)->mpls.ttl);
+
+                    ttl = vnet_buffer(b0)->mpls.ttl - 1;
+                }
+                else
+                {
+                    /*
+                     * not the first label. implying we are recusring down a chain of
+                     * output labels.
+                     * Each layer is considered a new LSP - hence the TTL is reset.
+                     */
+                    ttl = 255;
+                }
+            }
+            vnet_buffer(b0)->mpls.first = 0;
+
+            /* Paint the MPLS header */
+            vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
+            hdr0 = vlib_buffer_get_current(b0);
+            clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
+
+            /* fixup the TTL for the inner most label */
+            hdr0 = hdr0 + (mld0->mld_n_labels - 1);
+            ((char*)hdr0)[3] = ttl;
+
+            next0 = mld0->mld_dpo.dpoi_next_node;
+            vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_imposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b0, sizeof (*tr));
+                tr->hdr = *hdr0;
+            }
+
+            vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0, next0);
+        }
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+static u8 *
+format_mpls_label_imposition_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    mpls_label_imposition_trace_t * t;
+    mpls_unicast_header_t hdr;
+    uword indent;
+
+    t = va_arg (*args, mpls_label_imposition_trace_t *);
+    indent = format_get_indent (s);
+    hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl);
+
+    s = format (s, "%Umpls-header:%U",
+                format_white_space, indent,
+                format_mpls_header, hdr);
+    return (s);
+}
+
+static uword
+mpls_label_imposition (vlib_main_t * vm,
+                       vlib_node_runtime_t * node,
+                       vlib_frame_t * frame)
+{
+    return (mpls_label_imposition_inline(vm, node, frame, 0, 0));
+}
+
+VLIB_REGISTER_NODE (mpls_label_imposition_node) = {
+    .function = mpls_label_imposition,
+    .name = "mpls-label-imposition",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mpls_label_imposition_trace,
+    .n_next_nodes = 1,
+    .next_nodes = {
+        [0] = "error-drop",
+    }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node,
+                              mpls_label_imposition)
+
+static uword
+ip4_mpls_label_imposition (vlib_main_t * vm,
+                           vlib_node_runtime_t * node,
+                           vlib_frame_t * frame)
+{
+    return (mpls_label_imposition_inline(vm, node, frame, 1, 0));
+}
+
+VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = {
+    .function = ip4_mpls_label_imposition,
+    .name = "ip4-mpls-label-imposition",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mpls_label_imposition_trace,
+    .n_next_nodes = 1,
+    .next_nodes = {
+        [0] = "error-drop",
+    }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_imposition_node,
+                              ip4_mpls_label_imposition)
+
+static uword
+ip6_mpls_label_imposition (vlib_main_t * vm,
+                           vlib_node_runtime_t * node,
+                           vlib_frame_t * frame)
+{
+    return (mpls_label_imposition_inline(vm, node, frame, 0, 1));
+}
+
+VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = {
+    .function = ip6_mpls_label_imposition,
+    .name = "ip6-mpls-label-imposition",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mpls_label_imposition_trace,
+    .n_next_nodes = 1,
+    .next_nodes = {
+        [0] = "error-drop",
+    }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node,
+                              ip6_mpls_label_imposition)
+
+static void
+mpls_label_dpo_mem_show (void)
+{
+    fib_show_memory_usage("MPLS label",
+			  pool_elts(mpls_label_dpo_pool),
+			  pool_len(mpls_label_dpo_pool),
+			  sizeof(mpls_label_dpo_t));
+}
+
+const static dpo_vft_t mld_vft = {
+    .dv_lock = mpls_label_dpo_lock,
+    .dv_unlock = mpls_label_dpo_unlock,
+    .dv_format = format_mpls_label_dpo,
+    .dv_mem_show = mpls_label_dpo_mem_show,
+};
+
+const static char* const mpls_label_imp_ip4_nodes[] =
+{
+    "ip4-mpls-label-imposition",
+    NULL,
+};
+const static char* const mpls_label_imp_ip6_nodes[] =
+{
+    "ip6-mpls-label-imposition",
+    NULL,
+};
+const static char* const mpls_label_imp_mpls_nodes[] =
+{
+    "mpls-label-imposition",
+    NULL,
+};
+const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = mpls_label_imp_ip4_nodes,
+    [DPO_PROTO_IP6]  = mpls_label_imp_ip6_nodes,
+    [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes,
+};
+
+
+void
+mpls_label_dpo_module_init (void)
+{
+    dpo_register(DPO_MPLS_LABEL, &mld_vft, mpls_label_imp_nodes);
+}
diff --git a/src/vnet/dpo/mpls_label_dpo.h b/src/vnet/dpo/mpls_label_dpo.h
new file mode 100644
index 00000000..89bcb093
--- /dev/null
+++ b/src/vnet/dpo/mpls_label_dpo.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_LABEL_DPO_H__
+#define __MPLS_LABEL_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct mpls_label_dpo_t
+{
+    /**
+     * The MPLS label header to impose. Outer most label first.
+     */
+    mpls_unicast_header_t mld_hdr[8];
+
+    /**
+     * Next DPO in the graph
+     */
+    dpo_id_t mld_dpo;
+
+    /**
+     * The protocol of the payload/packets that are being encapped
+     */
+    dpo_proto_t mld_payload_proto;
+
+    /**
+     * Size of the label stack
+     */
+    u16 mld_n_labels;
+
+    /**
+     * Cached amount of header bytes to paint
+     */
+    u16 mld_n_hdr_bytes;
+
+    /**
+     * Number of locks/users of the label
+     */
+    u16 mld_locks;
+} mpls_label_dpo_t;
+
+/**
+ * @brief Assert that the MPLS label object is less than a cache line in size.
+ * Should this get any bigger then we will need to reconsider how many labels
+ * can be pushed in one object.
+ */
+_Static_assert((sizeof(mpls_label_dpo_t) <= CLIB_CACHE_LINE_BYTES),
+	       "MPLS label DPO is larger than one cache line.");
+
+/**
+ * @brief Create an MPLS label object
+ *
+ * @param label_stack The stack if labels to impose, outer most label first
+ * @param eos The inner most label's EOS bit
+ * @param ttl The inner most label's TTL bit
+ * @param exp The inner most label's EXP bit
+ * @param payload_proto The ptocool of the payload packets that will
+ *                      be imposed with this label header.
+ * @param dpo The parent of the created MPLS label object
+ */
+extern index_t mpls_label_dpo_create(mpls_label_t *label_stack,
+                                     mpls_eos_bit_t eos,
+                                     u8 ttl,
+                                     u8 exp,
+                                     dpo_proto_t payload_proto,
+				     const dpo_id_t *dpo);
+
+extern u8* format_mpls_label_dpo(u8 *s, va_list *args);
+
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern mpls_label_dpo_t *mpls_label_dpo_pool;
+
+static inline mpls_label_dpo_t *
+mpls_label_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(mpls_label_dpo_pool, index));
+}
+
+extern void mpls_label_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/punt_dpo.c b/src/vnet/dpo/punt_dpo.c
new file mode 100644
index 00000000..d1661dcc
--- /dev/null
+++ b/src/vnet/dpo/punt_dpo.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing puntping the packet
+ */
+
+#include <vnet/dpo/dpo.h>
+
+static dpo_id_t punt_dpos[DPO_PROTO_NUM];
+
+const dpo_id_t *
+punt_dpo_get (dpo_proto_t proto)
+{
+    dpo_set(&punt_dpos[proto], DPO_PUNT, proto, 1);
+
+    return (&punt_dpos[proto]);
+}
+
+int
+dpo_is_punt (const dpo_id_t *dpo)
+{
+    return (dpo->dpoi_type == DPO_PUNT);
+}
+
+static void
+punt_dpo_lock (dpo_id_t *dpo)
+{
+    /*
+     * not maintaining a lock count on the punt
+     * more trouble than it's worth.
+     * There always needs to be one around. no point it managaing its lifetime
+     */
+}
+static void
+punt_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_punt_dpo (u8 *s, va_list *ap)
+{
+    CLIB_UNUSED(index_t index) = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+
+    return (format(s, "dpo-punt"));
+}
+
+const static dpo_vft_t punt_vft = {
+    .dv_lock   = punt_dpo_lock,
+    .dv_unlock = punt_dpo_unlock,
+    .dv_format = format_punt_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a punt
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a punt is the
+ * parent object in the DPO-graph.
+ */
+const static char* const punt_ip4_nodes[] =
+{
+    "ip4-punt",
+    NULL,
+};
+const static char* const punt_ip6_nodes[] =
+{
+    "ip6-punt",
+    NULL,
+};
+const static char* const punt_mpls_nodes[] =
+{
+    "mpls-punt",
+    NULL,
+};
+const static char* const * const punt_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = punt_ip4_nodes,
+    [DPO_PROTO_IP6]  = punt_ip6_nodes,
+    [DPO_PROTO_MPLS] = punt_mpls_nodes,
+};
+
+void
+punt_dpo_module_init (void)
+{
+    dpo_register(DPO_PUNT, &punt_vft, punt_nodes);
+}
diff --git a/src/vnet/dpo/punt_dpo.h b/src/vnet/dpo/punt_dpo.h
new file mode 100644
index 00000000..370547c1
--- /dev/null
+++ b/src/vnet/dpo/punt_dpo.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief A DPO to punt packets to the Control-plane
+ */
+
+#ifndef __PUNT_DPO_H__
+#define __PUNT_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern int dpo_is_punt(const dpo_id_t *dpo);
+
+extern const dpo_id_t *punt_dpo_get(dpo_proto_t proto);
+
+extern void punt_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/receive_dpo.c b/src/vnet/dpo/receive_dpo.c
new file mode 100644
index 00000000..2b2571c6
--- /dev/null
+++ b/src/vnet/dpo/receive_dpo.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing receiveing the packet, i.e. it's for-us
+ */
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/receive_dpo.h>
+
+/**
+ * @brief pool of all receive DPOs
+ */
+receive_dpo_t *receive_dpo_pool;
+
+static receive_dpo_t *
+receive_dpo_alloc (void)
+{
+    receive_dpo_t *rd;
+
+    pool_get_aligned(receive_dpo_pool, rd, CLIB_CACHE_LINE_BYTES);
+    memset(rd, 0, sizeof(*rd));
+
+    return (rd);
+}
+
+static receive_dpo_t *
+receive_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+    ASSERT(DPO_RECEIVE == dpo->dpoi_type);
+
+    return (receive_dpo_get(dpo->dpoi_index));
+}
+
+
+/*
+ * receive_dpo_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The local adj is added to an interface's receive prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+void
+receive_dpo_add_or_lock (dpo_proto_t proto,
+                         u32 sw_if_index,
+                         const ip46_address_t *nh_addr,
+                         dpo_id_t *dpo)
+{
+    receive_dpo_t *rd;
+
+    rd = receive_dpo_alloc();
+
+    rd->rd_sw_if_index = sw_if_index;
+    if (NULL != nh_addr)
+    {
+	rd->rd_addr = *nh_addr;
+    }
+
+    dpo_set(dpo, DPO_RECEIVE, proto, (rd - receive_dpo_pool));
+}
+
+static void
+receive_dpo_lock (dpo_id_t *dpo)
+{
+    receive_dpo_t *rd;
+
+    rd = receive_dpo_get_from_dpo(dpo);
+    rd->rd_locks++;
+}
+
+static void
+receive_dpo_unlock (dpo_id_t *dpo)
+{
+    receive_dpo_t *rd;
+
+    rd = receive_dpo_get_from_dpo(dpo);
+    rd->rd_locks--;
+
+    if (0 == rd->rd_locks)
+    {
+        pool_put(receive_dpo_pool, rd);
+    }
+}
+
+static u8*
+format_receive_dpo (u8 *s, va_list *ap)
+{
+    CLIB_UNUSED(index_t index) = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    vnet_main_t * vnm = vnet_get_main();
+    receive_dpo_t *rd;
+
+    rd = receive_dpo_get(index);
+
+    if (~0 != rd->rd_sw_if_index)
+    {
+        return (format(s, "dpo-receive: %U on %U",
+                       format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY,
+                       format_vnet_sw_interface_name, vnm,
+                       vnet_get_sw_interface(vnm, rd->rd_sw_if_index)));
+    }
+    else
+    {
+        return (format(s, "dpo-receive"));
+    }
+}
+
+static void
+receive_dpo_mem_show (void)
+{
+    fib_show_memory_usage("Receive",
+			  pool_elts(receive_dpo_pool),
+			  pool_len(receive_dpo_pool),
+			  sizeof(receive_dpo_t));
+}
+
+const static dpo_vft_t receive_vft = {
+    .dv_lock = receive_dpo_lock,
+    .dv_unlock = receive_dpo_unlock,
+    .dv_format = format_receive_dpo,
+    .dv_mem_show = receive_dpo_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a receive
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a receive is the
+ * parent object in the DPO-graph.
+ */
+const static char* const receive_ip4_nodes[] =
+{
+    "ip4-local",
+    NULL,
+};
+const static char* const receive_ip6_nodes[] =
+{
+    "ip6-local",
+    NULL,
+};
+
+const static char* const * const receive_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = receive_ip4_nodes,
+    [DPO_PROTO_IP6]  = receive_ip6_nodes,
+    [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+receive_dpo_module_init (void)
+{
+    dpo_register(DPO_RECEIVE, &receive_vft, receive_nodes);
+}
diff --git a/src/vnet/dpo/receive_dpo.h b/src/vnet/dpo/receive_dpo.h
new file mode 100644
index 00000000..2420fd78
--- /dev/null
+++ b/src/vnet/dpo/receive_dpo.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing receiveing the packet, i.e. it's for-us
+ */
+
+#ifndef __RECEIVE_DPO_H__
+#define __RECEIVE_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/ip6.h>
+
+typedef struct receive_dpo_t_
+{
+    /**
+     * The Software interface index on which traffic is received
+     */
+    u32 rd_sw_if_index;
+
+    /**
+     * The address on the receive interface. packet are destined to this address
+     */
+    ip46_address_t rd_addr;
+
+    /**
+     * number oflocks.
+     */
+    u16 rd_locks;
+} receive_dpo_t;
+
+extern void receive_dpo_add_or_lock (dpo_proto_t proto,
+                                     u32 sw_if_index,
+                                     const ip46_address_t *nh_addr,
+                                     dpo_id_t *dpo);
+
+extern void receive_dpo_module_init(void);
+
+/**
+ * @brief pool of all receive DPOs
+ */
+receive_dpo_t *receive_dpo_pool;
+
+static inline receive_dpo_t *
+receive_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(receive_dpo_pool, index));
+}
+
+#endif
-- 
cgit 1.2.3-korg


From c0790cfef0bd1c56f4c75dc4f959584148386258 Mon Sep 17 00:00:00 2001
From: Neale Ranns <neale.ranns@cisco.com>
Date: Thu, 5 Jan 2017 01:01:47 -0800
Subject: FIB memory leaks (VPP-578)

1) vec_free the fe_srcs of a fib_entry_t when the fib_entry_t is itself reed
2) in the load-balance fixup if a drop path is required add this to a new vector of next-hops 'fixed_nhs'. This vector is managed by the load-balance function. The caller continues to manage its own set. The function is now const implying that the caller is safe to assume the next-hops do not change.

Change-Id: I0f29203ee16b9a270f40edf237488fa99ba65320
Signed-off-by: Neale Ranns <nranns@cisco.com>
Signed-off-by: Neale Ranns <neale.ranns@cisco.com>
---
 src/vnet/dpo/load_balance.c  | 33 ++++++++++++++++++---------------
 src/vnet/dpo/load_balance.h  |  2 +-
 src/vnet/fib/fib_entry.c     |  1 +
 src/vnet/fib/fib_entry_src.c |  8 ++++++++
 src/vnet/fib/fib_path_list.c | 26 ++++++++++++++++++--------
 src/vnet/fib/fib_path_list.h |  2 ++
 6 files changed, 48 insertions(+), 24 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index a244776f..e70a7a30 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -279,8 +279,8 @@ load_balance_get_bucket (index_t lbi,
 }
 
 static int
-next_hop_sort_by_weight (load_balance_path_t * n1,
-                         load_balance_path_t * n2)
+next_hop_sort_by_weight (const load_balance_path_t * n1,
+                         const load_balance_path_t * n2)
 {
     return ((int) n1->path_weight - (int) n2->path_weight);
 }
@@ -289,7 +289,7 @@ next_hop_sort_by_weight (load_balance_path_t * n1,
    with weights corresponding to the number of adjacencies for each next hop.
    Returns number of adjacencies in block. */
 u32
-ip_multipath_normalize_next_hops (load_balance_path_t * raw_next_hops,
+ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
                                   load_balance_path_t ** normalized_next_hops,
                                   u32 *sum_weight_in,
                                   f64 multipath_next_hop_error_tolerance)
@@ -409,23 +409,25 @@ done:
 }
 
 static load_balance_path_t *
-load_balance_multipath_next_hop_fixup (load_balance_path_t *nhs,
+load_balance_multipath_next_hop_fixup (const load_balance_path_t *nhs,
                                        dpo_proto_t drop_proto)
 {
     if (0 == vec_len(nhs))
     {
-        load_balance_path_t *nh;
+        load_balance_path_t *new_nhs = NULL, *nh;
 
         /*
          * we need something for the load-balance. so use the drop
          */
-        vec_add2(nhs, nh, 1);
+        vec_add2(new_nhs, nh, 1);
 
         nh->path_weight = 1;
         dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
+
+        return (new_nhs);
     }
 
-    return (nhs);
+    return (NULL);
 }
 
 /*
@@ -467,11 +469,11 @@ load_balance_set_n_buckets (load_balance_t *lb,
 
 void
 load_balance_multipath_update (const dpo_id_t *dpo,
-                               load_balance_path_t * raw_next_hops,
+                               const load_balance_path_t * raw_nhs,
                                load_balance_flags_t flags)
 {
-    u32 sum_of_weights,n_buckets, ii;
-    load_balance_path_t * nh, * nhs;
+    load_balance_path_t *nh, *nhs, *fixed_nhs;
+    u32 sum_of_weights, n_buckets, ii;
     index_t lbmi, old_lbmi;
     load_balance_t *lb;
     dpo_id_t *tmp_dpo;
@@ -480,16 +482,16 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
     ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
     lb = load_balance_get(dpo->dpoi_index);
-    raw_next_hops =
-        load_balance_multipath_next_hop_fixup(raw_next_hops,
-                                              lb->lb_proto);
+    fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
     n_buckets =
-        ip_multipath_normalize_next_hops(raw_next_hops,
+        ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
+                                          raw_nhs :
+                                          fixed_nhs),
                                          &nhs,
                                          &sum_of_weights,
                                          multipath_next_hop_error_tolerance);
 
-    ASSERT (n_buckets >= vec_len (raw_next_hops));
+    ASSERT (n_buckets >= vec_len (raw_nhs));
 
     /*
      * Save the old load-balance map used, and get a new one if required.
@@ -694,6 +696,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
         dpo_reset(&nh->path_dpo);
     }
     vec_free(nhs);
+    vec_free(fixed_nhs);
 
     load_balance_map_unlock(old_lbmi);
 }
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
index dc6485e6..17996536 100644
--- a/src/vnet/dpo/load_balance.h
+++ b/src/vnet/dpo/load_balance.h
@@ -159,7 +159,7 @@ extern index_t load_balance_create(u32 num_buckets,
 				   flow_hash_config_t fhc);
 extern void load_balance_multipath_update(
     const dpo_id_t *dpo,
-    load_balance_path_t * raw_next_hops,
+    const load_balance_path_t * raw_next_hops,
     load_balance_flags_t flags);
 
 extern void load_balance_set_bucket(index_t lbi,
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index 24b50637..3aa3632c 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -230,6 +230,7 @@ fib_entry_last_lock_gone (fib_node_t *node)
 
     ASSERT(0 == vec_len(fib_entry->fe_delegates));
     vec_free(fib_entry->fe_delegates);
+    vec_free(fib_entry->fe_srcs);
     pool_put(fib_entry_pool, fib_entry);
 }
 
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index 060fac94..1fb04060 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -382,6 +382,14 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
         .fct = fct,
     };
 
+    /*
+     * As an optimisation we allocate the vector of next-hops to be sized
+     * equal to the maximum nuber of paths we will need, which is also the
+     * most likely number we will need, since in most cases the paths are 'up'.
+     */
+    vec_validate(ctx.next_hops, fib_path_list_get_n_paths(esrc->fes_pl));
+    vec_reset_length(ctx.next_hops);
+
     lb_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto);
 
     fib_path_list_walk(esrc->fes_pl,
diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c
index 5b35e9b8..db9d1af9 100644
--- a/src/vnet/fib/fib_path_list.c
+++ b/src/vnet/fib/fib_path_list.c
@@ -365,10 +365,10 @@ fib_path_list_mk_lb (fib_path_list_t *path_list,
 		     fib_forward_chain_type_t fct,
 		     dpo_id_t *dpo)
 {
-    load_balance_path_t *hash_key;
+    load_balance_path_t *nhs;
     fib_node_index_t *path_index;
 
-    hash_key  = NULL;
+    nhs  = NULL;
 
     if (!dpo_id_is_valid(dpo))
     {
@@ -388,21 +388,20 @@ fib_path_list_mk_lb (fib_path_list_t *path_list,
      */
     vec_foreach (path_index, path_list->fpl_paths)
     {
-	hash_key = fib_path_append_nh_for_multipath_hash(
-	               *path_index,
-		       fct,
-		       hash_key);
+	nhs = fib_path_append_nh_for_multipath_hash(*path_index,
+                                                    fct,
+                                                    nhs);
     }
 
     /*
      * Path-list load-balances, which if used, would be shared and hence
      * never need a load-balance map.
      */
-    load_balance_multipath_update(dpo, hash_key, LOAD_BALANCE_FLAG_NONE);
+    load_balance_multipath_update(dpo, nhs, LOAD_BALANCE_FLAG_NONE);
 
     FIB_PATH_LIST_DBG(path_list, "mk lb: %d", dpo->dpoi_index);
 
-    vec_free(hash_key);
+    vec_free(nhs);
 }
 
 /**
@@ -591,6 +590,17 @@ fib_path_list_resolve (fib_path_list_t *path_list)
     return (path_list);
 }
 
+u32
+fib_path_list_get_n_paths (fib_node_index_t path_list_index)
+{
+    fib_path_list_t *path_list;
+
+    path_list = fib_path_list_get(path_list_index);
+
+    return (vec_len(path_list->fpl_paths));
+}
+
+
 u32
 fib_path_list_get_resolving_interface (fib_node_index_t path_list_index)
 {
diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h
index 8bc1b20b..f4f94a1b 100644
--- a/src/vnet/fib/fib_path_list.h
+++ b/src/vnet/fib/fib_path_list.h
@@ -104,6 +104,8 @@ extern fib_node_index_t fib_path_list_copy_and_path_remove(
     fib_node_index_t pl_index,
     fib_path_list_flags_t flags,
     const fib_route_path_t *path);
+extern u32 fib_path_list_get_n_paths(fib_node_index_t pl_index);
+
 extern void fib_path_list_contribute_forwarding(fib_node_index_t path_list_index,
 						fib_forward_chain_type_t type,
 						dpo_id_t *dpo);
-- 
cgit 1.2.3-korg


From 32e1c010b0c34fd0984f7fc45fae648a182025c5 Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Tue, 22 Nov 2016 17:07:28 +0000
Subject: IP Multicast FIB (mfib)

 - IPv[46] mfib tables with support for (*,G/m), (*,G) and (S,G) exact and longest prefix match
 - Replication represented via a new replicate DPO.
 - RPF configuration and data-plane checking
 - data-plane signals sent to listening control planes.

The functions of multicast forwarding entries differ from their unicast conterparts, so we introduce a new mfib_table_t and mfib_entry_t objects. However, we re-use the fib_path_list to resolve and build the entry's output list. the fib_path_list provides the service to construct a replicate DPO for multicast.

'make tests' is added to with two new suites; TEST=mfib, this is invocation of the CLI command 'test mfib' which deals with many path add/remove, flag set/unset scenarios, TEST=ip-mcast, data-plane forwarding tests.

Updated applications to use the new MIFB functions;
  - IPv6 NS/RA.
  - DHCPv6
 unit tests for these are undated accordingly.

Change-Id: I49ec37b01f1b170335a5697541c8fd30e6d3a961
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/scripts/vnet/mcast/ip4         |   22 +
 src/vat/api_format.c               |  173 +++++
 src/vnet.am                        |   35 +-
 src/vnet/adj/adj.c                 |   11 +-
 src/vnet/adj/adj.h                 |    1 -
 src/vnet/adj/adj_internal.h        |    2 +
 src/vnet/adj/adj_mcast.c           |  346 ++++++++++
 src/vnet/adj/adj_mcast.h           |   78 +++
 src/vnet/adj/adj_nbr.c             |    2 +-
 src/vnet/adj/adj_rewrite.c         |   53 --
 src/vnet/adj/adj_rewrite.h         |   49 --
 src/vnet/dhcpv6/proxy_node.c       |   46 +-
 src/vnet/dpo/dpo.c                 |    2 +
 src/vnet/dpo/dpo.h                 |    8 +-
 src/vnet/dpo/load_balance.c        |   13 +-
 src/vnet/dpo/load_balance.h        |    8 +
 src/vnet/dpo/replicate_dpo.c       |  759 ++++++++++++++++++++++
 src/vnet/dpo/replicate_dpo.h       |  143 +++++
 src/vnet/ethernet/arp.c            |   84 ++-
 src/vnet/ethernet/ethernet.h       |    2 +
 src/vnet/ethernet/interface.c      |   20 +
 src/vnet/fib/fib_attached_export.c |    4 +-
 src/vnet/fib/fib_entry.h           |    2 +-
 src/vnet/fib/fib_entry_delegate.c  |    3 +
 src/vnet/fib/fib_entry_src.c       |    4 +
 src/vnet/fib/fib_node.h            |    2 +
 src/vnet/fib/fib_path.c            |   91 ++-
 src/vnet/fib/fib_path_list.c       |    9 +-
 src/vnet/fib/fib_path_list.h       |    6 +
 src/vnet/fib/fib_table.c           |   69 +-
 src/vnet/fib/fib_table.h           |   16 +
 src/vnet/fib/fib_test.c            |  207 +++---
 src/vnet/fib/fib_types.c           |    4 +
 src/vnet/fib/fib_types.h           |   14 +
 src/vnet/fib/fib_urpf_list.c       |   20 +-
 src/vnet/fib/ip4_fib.c             |   50 +-
 src/vnet/fib/ip4_fib.h             |    9 +
 src/vnet/fib/ip6_fib.c             |  117 ++--
 src/vnet/fib/ip6_fib.h             |   11 +-
 src/vnet/fib/mpls_fib.c            |   17 +-
 src/vnet/fib/mpls_fib.h            |    9 +
 src/vnet/ip/ip.api                 |   53 ++
 src/vnet/ip/ip4.h                  |   24 +
 src/vnet/ip/ip4_forward.c          |  498 +++++----------
 src/vnet/ip/ip4_input.c            |    4 +-
 src/vnet/ip/ip6.h                  |   27 +
 src/vnet/ip/ip6_forward.c          |   83 ++-
 src/vnet/ip/ip6_input.c            |   43 +-
 src/vnet/ip/ip6_neighbor.c         |  134 ++--
 src/vnet/ip/ip_api.c               |  210 +++++++
 src/vnet/ip/lookup.c               |  171 +++++
 src/vnet/ip/lookup.h               |   82 +--
 src/vnet/mcast/mcast.c             |  565 -----------------
 src/vnet/mcast/mcast.h             |   50 --
 src/vnet/mcast/mcast_test.c        |  149 -----
 src/vnet/mfib/ip4_mfib.c           |  465 ++++++++++++++
 src/vnet/mfib/ip4_mfib.h           |   95 +++
 src/vnet/mfib/ip6_mfib.c           |  663 +++++++++++++++++++
 src/vnet/mfib/ip6_mfib.h           |  109 ++++
 src/vnet/mfib/mfib_entry.c         | 1096 ++++++++++++++++++++++++++++++++
 src/vnet/mfib/mfib_entry.h         |  172 +++++
 src/vnet/mfib/mfib_forward.c       |  512 +++++++++++++++
 src/vnet/mfib/mfib_itf.c           |  119 ++++
 src/vnet/mfib/mfib_itf.h           |   63 ++
 src/vnet/mfib/mfib_signal.c        |  201 ++++++
 src/vnet/mfib/mfib_signal.h        |   59 ++
 src/vnet/mfib/mfib_table.c         |  489 ++++++++++++++
 src/vnet/mfib/mfib_table.h         |  331 ++++++++++
 src/vnet/mfib/mfib_test.c          | 1225 ++++++++++++++++++++++++++++++++++++
 src/vnet/mfib/mfib_types.c         |  213 +++++++
 src/vnet/mfib/mfib_types.h         |  185 ++++++
 src/vnet/misc.c                    |    3 +
 src/vnet/rewrite.h                 |   31 +
 src/vnet/sr/sr.c                   |    4 +-
 src/vnet/util/radix.c              | 1104 ++++++++++++++++++++++++++++++++
 src/vnet/util/radix.h              |  147 +++++
 src/vnet/vxlan/vxlan.c             |  112 +++-
 src/vpp/api/api.c                  |   14 +-
 src/vppinfra.am                    |    2 +-
 src/vppinfra/dlist.h               |    2 +-
 src/vppinfra/format.c              |    8 +-
 src/vppinfra/format.h              |    4 +-
 src/vppinfra/unformat.c            |   16 +-
 test/test_dhcp.py                  |   16 -
 test/test_ip6.py                   |  131 ++--
 test/test_ip_mcast.py              |  612 ++++++++++++++++++
 test/test_mfib.py                  |   23 +
 test/vpp_interface.py              |    3 +-
 test/vpp_ip_route.py               |  101 ++-
 test/vpp_papi_provider.py          |   34 +-
 90 files changed, 11211 insertions(+), 1767 deletions(-)
 create mode 100644 src/scripts/vnet/mcast/ip4
 create mode 100644 src/vnet/adj/adj_mcast.c
 create mode 100644 src/vnet/adj/adj_mcast.h
 delete mode 100644 src/vnet/adj/adj_rewrite.c
 delete mode 100644 src/vnet/adj/adj_rewrite.h
 create mode 100644 src/vnet/dpo/replicate_dpo.c
 create mode 100644 src/vnet/dpo/replicate_dpo.h
 delete mode 100644 src/vnet/mcast/mcast.c
 delete mode 100644 src/vnet/mcast/mcast.h
 delete mode 100644 src/vnet/mcast/mcast_test.c
 create mode 100644 src/vnet/mfib/ip4_mfib.c
 create mode 100644 src/vnet/mfib/ip4_mfib.h
 create mode 100644 src/vnet/mfib/ip6_mfib.c
 create mode 100644 src/vnet/mfib/ip6_mfib.h
 create mode 100644 src/vnet/mfib/mfib_entry.c
 create mode 100644 src/vnet/mfib/mfib_entry.h
 create mode 100644 src/vnet/mfib/mfib_forward.c
 create mode 100644 src/vnet/mfib/mfib_itf.c
 create mode 100644 src/vnet/mfib/mfib_itf.h
 create mode 100644 src/vnet/mfib/mfib_signal.c
 create mode 100644 src/vnet/mfib/mfib_signal.h
 create mode 100644 src/vnet/mfib/mfib_table.c
 create mode 100644 src/vnet/mfib/mfib_table.h
 create mode 100644 src/vnet/mfib/mfib_test.c
 create mode 100644 src/vnet/mfib/mfib_types.c
 create mode 100644 src/vnet/mfib/mfib_types.h
 create mode 100644 src/vnet/util/radix.c
 create mode 100644 src/vnet/util/radix.h
 create mode 100644 test/test_ip_mcast.py
 create mode 100644 test/test_mfib.py

(limited to 'src/vnet/dpo')

diff --git a/src/scripts/vnet/mcast/ip4 b/src/scripts/vnet/mcast/ip4
new file mode 100644
index 00000000..69f1ee00
--- /dev/null
+++ b/src/scripts/vnet/mcast/ip4
@@ -0,0 +1,22 @@
+packet-generator new {
+  name x
+  limit 1
+  node ip4-input
+  size 64-64
+  no-recycle
+  data {
+    ICMP: 1.0.0.2 -> 232.1.1.1
+    ICMP echo_request
+    incrementing 100
+  }
+}
+
+trace add pg-input 100
+loop create
+loop create
+set int state loop0 up
+set int state loop1 up
+
+ip mroute add 232.1.1.1 via pg0 Accept
+ip mroute add 232.1.1.1 via loop0 Forward
+ip mroute add 232.1.1.1 via loop1 Forward
diff --git a/src/vat/api_format.c b/src/vat/api_format.c
index b83313de..4cfe4a58 100644
--- a/src/vat/api_format.c
+++ b/src/vat/api_format.c
@@ -48,6 +48,7 @@
 #include <vnet/span/span.h>
 #include <vnet/policer/policer.h>
 #include <vnet/policer/police.h>
+#include <vnet/mfib/mfib_types.h>
 
 #include "vat/json_format.h"
 
@@ -505,6 +506,53 @@ unformat_flow_classify_table_type (unformat_input_t * input, va_list * va)
   return 1;
 }
 
+static const char *mfib_flag_names[] = MFIB_ENTRY_NAMES_SHORT;
+static const char *mfib_flag_long_names[] = MFIB_ENTRY_NAMES_LONG;
+static const char *mfib_itf_flag_long_names[] = MFIB_ITF_NAMES_LONG;
+static const char *mfib_itf_flag_names[] = MFIB_ITF_NAMES_SHORT;
+
+uword
+unformat_mfib_itf_flags (unformat_input_t * input, va_list * args)
+{
+  mfib_itf_flags_t old, *iflags = va_arg (*args, mfib_itf_flags_t *);
+  mfib_itf_attribute_t attr;
+
+  old = *iflags;
+  FOR_EACH_MFIB_ITF_ATTRIBUTE (attr)
+  {
+    if (unformat (input, mfib_itf_flag_long_names[attr]))
+      *iflags |= (1 << attr);
+  }
+  FOR_EACH_MFIB_ITF_ATTRIBUTE (attr)
+  {
+    if (unformat (input, mfib_itf_flag_names[attr]))
+      *iflags |= (1 << attr);
+  }
+
+  return (old == *iflags ? 0 : 1);
+}
+
+uword
+unformat_mfib_entry_flags (unformat_input_t * input, va_list * args)
+{
+  mfib_entry_flags_t old, *eflags = va_arg (*args, mfib_entry_flags_t *);
+  mfib_entry_attribute_t attr;
+
+  old = *eflags;
+  FOR_EACH_MFIB_ATTRIBUTE (attr)
+  {
+    if (unformat (input, mfib_flag_long_names[attr]))
+      *eflags |= (1 << attr);
+  }
+  FOR_EACH_MFIB_ATTRIBUTE (attr)
+  {
+    if (unformat (input, mfib_flag_names[attr]))
+      *eflags |= (1 << attr);
+  }
+
+  return (old == *eflags ? 0 : 1);
+}
+
 #if (VPP_API_TEST_BUILTIN==0)
 u8 *
 format_ip4_address (u8 * s, va_list * args)
@@ -3592,6 +3640,7 @@ _(bridge_domain_add_del_reply)                          \
 _(sw_interface_set_l2_xconnect_reply)                   \
 _(l2fib_add_del_reply)                                  \
 _(ip_add_del_route_reply)                               \
+_(ip_mroute_add_del_reply)                              \
 _(mpls_route_add_del_reply)                             \
 _(mpls_ip_bind_unbind_reply)                            \
 _(proxy_arp_add_del_reply)                              \
@@ -3792,6 +3841,7 @@ _(TAP_MODIFY_REPLY, tap_modify_reply)					\
 _(TAP_DELETE_REPLY, tap_delete_reply)					\
 _(SW_INTERFACE_TAP_DETAILS, sw_interface_tap_details)                   \
 _(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply)			\
+_(IP_MROUTE_ADD_DEL_REPLY, ip_mroute_add_del_reply)			\
 _(MPLS_ROUTE_ADD_DEL_REPLY, mpls_route_add_del_reply)			\
 _(MPLS_IP_BIND_UNBIND_REPLY, mpls_ip_bind_unbind_reply)			\
 _(PROXY_ARP_ADD_DEL_REPLY, proxy_arp_add_del_reply)                     \
@@ -6383,6 +6433,126 @@ api_ip_add_del_route (vat_main_t * vam)
   return (vam->retval);
 }
 
+static int
+api_ip_mroute_add_del (vat_main_t * vam)
+{
+  unformat_input_t *i = vam->input;
+  vl_api_ip_mroute_add_del_t *mp;
+  f64 timeout;
+  u32 sw_if_index = ~0, vrf_id = 0;
+  u8 is_ipv6 = 0;
+  u8 is_local = 0;
+  u8 create_vrf_if_needed = 0;
+  u8 is_add = 1;
+  u8 address_set = 0;
+  u32 grp_address_length = 0;
+  ip4_address_t v4_grp_address, v4_src_address;
+  ip6_address_t v6_grp_address, v6_src_address;
+  mfib_itf_flags_t iflags = 0;
+  mfib_entry_flags_t eflags = 0;
+
+  /* Parse args required to build the message */
+  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (i, "sw_if_index %d", &sw_if_index))
+	;
+      else if (unformat (i, "%U %U",
+			 unformat_ip4_address, &v4_src_address,
+			 unformat_ip4_address, &v4_grp_address))
+	{
+	  grp_address_length = 64;
+	  address_set = 1;
+	  is_ipv6 = 0;
+	}
+      else if (unformat (i, "%U %U",
+			 unformat_ip6_address, &v6_src_address,
+			 unformat_ip6_address, &v6_grp_address))
+	{
+	  grp_address_length = 256;
+	  address_set = 1;
+	  is_ipv6 = 1;
+	}
+      else if (unformat (i, "%U", unformat_ip4_address, &v4_grp_address))
+	{
+	  memset (&v4_src_address, 0, sizeof (v4_src_address));
+	  grp_address_length = 32;
+	  address_set = 1;
+	  is_ipv6 = 0;
+	}
+      else if (unformat (i, "%U", unformat_ip6_address, &v6_grp_address))
+	{
+	  memset (&v6_src_address, 0, sizeof (v6_src_address));
+	  grp_address_length = 128;
+	  address_set = 1;
+	  is_ipv6 = 1;
+	}
+      else if (unformat (i, "/%d", &grp_address_length))
+	;
+      else if (unformat (i, "local"))
+	{
+	  is_local = 1;
+	}
+      else if (unformat (i, "del"))
+	is_add = 0;
+      else if (unformat (i, "add"))
+	is_add = 1;
+      else if (unformat (i, "vrf %d", &vrf_id))
+	;
+      else if (unformat (i, "create-vrf"))
+	create_vrf_if_needed = 1;
+      else if (unformat (i, "%U", unformat_mfib_itf_flags, &iflags))
+	;
+      else if (unformat (i, "%U", unformat_mfib_entry_flags, &eflags))
+	;
+      else
+	{
+	  clib_warning ("parse error '%U'", format_unformat_error, i);
+	  return -99;
+	}
+    }
+
+  if (address_set == 0)
+    {
+      errmsg ("missing addresses\n");
+      return -99;
+    }
+
+  /* Construct the API message */
+  M (IP_MROUTE_ADD_DEL, ip_mroute_add_del);
+
+  mp->next_hop_sw_if_index = ntohl (sw_if_index);
+  mp->table_id = ntohl (vrf_id);
+  mp->create_vrf_if_needed = create_vrf_if_needed;
+
+  mp->is_add = is_add;
+  mp->is_ipv6 = is_ipv6;
+  mp->is_local = is_local;
+  mp->itf_flags = ntohl (iflags);
+  mp->entry_flags = ntohl (eflags);
+  mp->grp_address_length = grp_address_length;
+  mp->grp_address_length = ntohs (mp->grp_address_length);
+
+  if (is_ipv6)
+    {
+      clib_memcpy (mp->grp_address, &v6_grp_address, sizeof (v6_grp_address));
+      clib_memcpy (mp->src_address, &v6_src_address, sizeof (v6_src_address));
+    }
+  else
+    {
+      clib_memcpy (mp->grp_address, &v4_grp_address, sizeof (v4_grp_address));
+      clib_memcpy (mp->src_address, &v4_src_address, sizeof (v4_src_address));
+
+    }
+
+  /* send it... */
+  S;
+  /* Wait for a reply... */
+  W;
+
+  /* Return the good/bad news */
+  return (vam->retval);
+}
+
 static int
 api_mpls_route_add_del (vat_main_t * vam)
 {
@@ -17512,6 +17682,9 @@ _(ip_add_del_route,                                                     \
   "[<intfc> | sw_if_index <id>] [resolve-attempts <n>]\n"               \
   "[weight <n>] [drop] [local] [classify <n>] [del]\n"                  \
   "[multipath] [count <n>]")                                            \
+_(ip_mroute_add_del,                                                    \
+  "<src> <grp>/<mask> [table-id <n>]\n"                                 \
+  "[<intfc> | sw_if_index <id>] [local] [del]")                         \
 _(mpls_route_add_del,                                                   \
   "<label> <eos> via <addr> [table-id <n>]\n"                           \
   "[<intfc> | sw_if_index <id>] [resolve-attempts <n>]\n"               \
diff --git a/src/vnet.am b/src/vnet.am
index c6922493..6d043e32 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -946,16 +946,15 @@ nobase_include_HEADERS +=			\
 
 libvnet_la_SOURCES +=				\
   vnet/adj/adj_nbr.c				\
-  vnet/adj/adj_rewrite.c			\
   vnet/adj/adj_glean.c   			\
   vnet/adj/adj_midchain.c   			\
+  vnet/adj/adj_mcast.c   			\
   vnet/adj/adj_l2.c      			\
   vnet/adj/adj.c
 
 nobase_include_HEADERS +=			\
   vnet/adj/adj.h				\
   vnet/adj/adj_types.h				\
-  vnet/adj/adj_rewrite.h			\
   vnet/adj/adj_glean.h  			\
   vnet/adj/adj_nbr.h
 
@@ -971,8 +970,9 @@ libvnet_la_SOURCES +=				\
   vnet/dpo/receive_dpo.c			\
   vnet/dpo/load_balance.c			\
   vnet/dpo/load_balance_map.c			\
-  vnet/dpo/lookup_dpo.c				\
-  vnet/dpo/classify_dpo.c			\
+  vnet/dpo/lookup_dpo.c   			\
+  vnet/dpo/classify_dpo.c   			\
+  vnet/dpo/replicate_dpo.c   			\
   vnet/dpo/mpls_label_dpo.c
 
 nobase_include_HEADERS +=			\
@@ -985,6 +985,33 @@ nobase_include_HEADERS +=			\
   vnet/dpo/ip_null_dpo.h			\
   vnet/dpo/dpo.h
 
+########################################
+# Multicast FIB
+########################################
+
+libvnet_la_SOURCES +=				\
+  vnet/mfib/mfib_test.c                         \
+  vnet/mfib/mfib_forward.c                      \
+  vnet/mfib/ip4_mfib.c                          \
+  vnet/mfib/ip6_mfib.c                          \
+  vnet/mfib/mfib_types.c                        \
+  vnet/mfib/mfib_signal.c                       \
+  vnet/mfib/mfib_itf.c                          \
+  vnet/mfib/mfib_entry.c                        \
+  vnet/mfib/mfib_table.c
+
+nobase_include_HEADERS +=			\
+  vnet/mfib/ip4_mfib.h                          \
+  vnet/mfib/mfib_types.h                        \
+  vnet/mfib/mfib_table.h
+
+########################################
+# Utilities
+########################################
+
+libvnet_la_SOURCES +=                    \
+  vnet/util/radix.c
+
 ########################################
 # Plugin client library
 ########################################
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index d0be0f0e..a99f173f 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -17,6 +17,7 @@
 #include <vnet/adj/adj_internal.h>
 #include <vnet/adj/adj_glean.h>
 #include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_mcast.h>
 #include <vnet/fib/fib_node_list.h>
 
 /*
@@ -58,8 +59,6 @@ adj_alloc (fib_protocol_t proto)
                                    adj_get_index(adj));
 
     adj->rewrite_header.sw_if_index = ~0;
-    adj->mcast_group_index = ~0;
-    adj->saved_lookup_next_index = 0;
     adj->n_adj = 1;
     adj->lookup_next_index = 0;
 
@@ -116,6 +115,9 @@ format_ip_adjacency (u8 * s, va_list * args)
     case IP_LOOKUP_NEXT_MIDCHAIN:
 	s = format (s, "%U", format_adj_midchain, adj_index, 2);
 	break;
+    case IP_LOOKUP_NEXT_MCAST:
+	s = format (s, "%U", format_adj_mcast, adj_index, 0);
+	break;
     default:
 	break;
     }
@@ -179,6 +181,10 @@ adj_last_lock_gone (ip_adjacency_t *adj)
 	adj_glean_remove(adj->ia_nh_proto,
 			 adj->rewrite_header.sw_if_index);
 	break;
+    case IP_LOOKUP_NEXT_MCAST:
+	adj_mcast_remove(adj->ia_nh_proto,
+			 adj->rewrite_header.sw_if_index);
+	break;
     default:
 	/*
 	 * type not stored in any DB from which we need to remove it
@@ -350,6 +356,7 @@ adj_module_init (vlib_main_t * vm)
     adj_nbr_module_init();
     adj_glean_module_init();
     adj_midchain_module_init();
+    adj_mcast_module_init();
 
     /*
      * one special adj to reserve index 0
diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h
index e85625db..29bae673 100644
--- a/src/vnet/adj/adj.h
+++ b/src/vnet/adj/adj.h
@@ -45,7 +45,6 @@
 #include <vnet/ip/lookup.h>
 #include <vnet/adj/adj_types.h>
 #include <vnet/adj/adj_nbr.h>
-#include <vnet/adj/adj_rewrite.h>
 #include <vnet/adj/adj_glean.h>
 
 /**
diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h
index 833bc7c9..ece59121 100644
--- a/src/vnet/adj/adj_internal.h
+++ b/src/vnet/adj/adj_internal.h
@@ -100,5 +100,7 @@ extern void adj_nbr_remove(adj_index_t ai,
 			   u32 sw_if_index);
 extern void adj_glean_remove(fib_protocol_t proto,
 			     u32 sw_if_index);
+extern void adj_mcast_remove(fib_protocol_t proto,
+			     u32 sw_if_index);
 
 #endif
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
new file mode 100644
index 00000000..1345aedb
--- /dev/null
+++ b/src/vnet/adj/adj_mcast.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_internal.h>
+#include <vnet/fib/fib_walk.h>
+#include <vnet/ip/ip.h>
+
+/*
+ * The 'DB' of all mcast adjs.
+ * There is only one mcast per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static adj_index_t *adj_mcasts[FIB_PROTOCOL_MAX];
+
+static u32
+adj_get_mcast_node (fib_protocol_t proto)
+{
+    switch (proto) {
+    case FIB_PROTOCOL_IP4:
+	return (ip4_rewrite_mcast_node.index);
+    case FIB_PROTOCOL_IP6:
+	return (ip6_rewrite_mcast_node.index);
+    case FIB_PROTOCOL_MPLS:
+	break;
+    }
+    ASSERT(0);
+    return (0);
+}
+
+/*
+ * adj_mcast_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The mcast adj is added to an interface's connected prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+adj_index_t
+adj_mcast_add_or_lock (fib_protocol_t proto,
+                       vnet_link_t link_type,
+		       u32 sw_if_index)
+{
+    ip_adjacency_t * adj;
+
+    vec_validate_init_empty(adj_mcasts[proto], sw_if_index, ADJ_INDEX_INVALID);
+
+    if (ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index])
+    {
+        vnet_main_t *vnm;
+
+        vnm = vnet_get_main();
+	adj = adj_alloc(proto);
+
+	adj->lookup_next_index = IP_LOOKUP_NEXT_MCAST;
+	adj->ia_nh_proto = proto;
+	adj->ia_link = link_type;
+	adj_mcasts[proto][sw_if_index] = adj_get_index(adj);
+        adj_lock(adj_get_index(adj));
+
+	vnet_rewrite_init(vnm, sw_if_index,
+			  adj_get_mcast_node(proto),
+			  vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
+			  &adj->rewrite_header);
+
+	/*
+	 * we need a rewrite where the destination IP address is converted
+	 * to the appropriate link-layer address. This is interface specific.
+	 * So ask the interface to do it.
+	 */
+	vnet_update_adjacency_for_sw_interface(vnm, sw_if_index,
+                                               adj_get_index(adj));
+    }
+    else
+    {
+	adj = adj_get(adj_mcasts[proto][sw_if_index]);
+        adj_lock(adj_get_index(adj));
+    }
+
+    return (adj_get_index(adj));
+}
+
+/**
+ * adj_mcast_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_mcast_update_rewrite (adj_index_t adj_index,
+                          u8 *rewrite)
+{
+    ip_adjacency_t *adj;
+
+    ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+    adj = adj_get(adj_index);
+
+    /*
+     * update the adj's rewrite string and build the arc
+     * from the rewrite node to the interface's TX node
+     */
+    adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST,
+                                    adj_get_mcast_node(adj->ia_nh_proto),
+                                    vnet_tx_node_index_for_sw_interface(
+                                        vnet_get_main(),
+                                        adj->rewrite_header.sw_if_index),
+                                    rewrite);
+}
+
+void
+adj_mcast_remove (fib_protocol_t proto,
+		  u32 sw_if_index)
+{
+    ASSERT(sw_if_index < vec_len(adj_mcasts[proto]));
+
+    adj_mcasts[proto][sw_if_index] = ADJ_INDEX_INVALID;
+}
+
+static clib_error_t *
+adj_mcast_interface_state_change (vnet_main_t * vnm,
+				  u32 sw_if_index,
+				  u32 flags)
+{
+    /*
+     * for each mcast on the interface trigger a walk back to the children
+     */
+    fib_protocol_t proto;
+    ip_adjacency_t *adj;
+
+
+    for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+    {
+	if (sw_if_index >= vec_len(adj_mcasts[proto]) ||
+	    ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index])
+	    continue;
+
+	adj = adj_get(adj_mcasts[proto][sw_if_index]);
+
+	fib_node_back_walk_ctx_t bw_ctx = {
+	    .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
+			    FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
+			    FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
+	};
+
+	fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+    }
+
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_mcast_interface_state_change);
+
+/**
+ * @brief Invoked on each SW interface of a HW interface when the
+ * HW interface state changes
+ */
+static void
+adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm,
+                                      u32 sw_if_index,
+                                      void *arg)
+{
+    adj_mcast_interface_state_change(vnm, sw_if_index, (uword) arg);
+}
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+adj_mcast_hw_interface_state_change (vnet_main_t * vnm,
+                                     u32 hw_if_index,
+                                     u32 flags)
+{
+    /*
+     * walk SW interfaces on the HW
+     */
+    uword sw_flags;
+
+    sw_flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
+                VNET_SW_INTERFACE_FLAG_ADMIN_UP :
+                0);
+
+    vnet_hw_interface_walk_sw(vnm, hw_if_index,
+                              adj_nbr_hw_sw_interface_state_change,
+                              (void*) sw_flags);
+
+    return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+    adj_mcast_hw_interface_state_change);
+
+static clib_error_t *
+adj_mcast_interface_delete (vnet_main_t * vnm,
+			    u32 sw_if_index,
+			    u32 is_add)
+{
+    /*
+     * for each mcast on the interface trigger a walk back to the children
+     */
+    fib_protocol_t proto;
+    ip_adjacency_t *adj;
+
+    if (is_add)
+    {
+	/*
+	 * not interested in interface additions. we will not back walk
+	 * to resolve paths through newly added interfaces. Why? The control
+	 * plane should have the brains to add interfaces first, then routes.
+	 * So the case where there are paths with a interface that matches
+	 * one just created is the case where the path resolved through an
+	 * interface that was deleted, and still has not been removed. The
+	 * new interface added, is NO GUARANTEE that the interface being
+	 * added now, even though it may have the same sw_if_index, is the
+	 * same interface that the path needs. So tough!
+	 * If the control plane wants these routes to resolve it needs to
+	 * remove and add them again.
+	 */
+	return (NULL);
+    }
+
+    for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+    {
+	if (sw_if_index >= vec_len(adj_mcasts[proto]) ||
+	    ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index])
+	    continue;
+
+	adj = adj_get(adj_mcasts[proto][sw_if_index]);
+
+	fib_node_back_walk_ctx_t bw_ctx = {
+	    .fnbw_reason =  FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
+	};
+
+	fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx);
+    }
+
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_mcast_interface_delete);
+
+u8*
+format_adj_mcast (u8* s, va_list *ap)
+{
+    index_t index = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    vnet_main_t * vnm = vnet_get_main();
+    ip_adjacency_t * adj = adj_get(index);
+
+    s = format(s, "%U-mcast: ",
+               format_fib_protocol, adj->ia_nh_proto);
+    s = format (s, "%U",
+		format_vnet_rewrite,
+		vnm->vlib_main, &adj->rewrite_header,
+                sizeof (adj->rewrite_data), 0);
+
+    return (s);
+}
+
+
+static void
+adj_dpo_lock (dpo_id_t *dpo)
+{
+    adj_lock(dpo->dpoi_index);
+}
+static void
+adj_dpo_unlock (dpo_id_t *dpo)
+{
+    adj_unlock(dpo->dpoi_index);
+}
+
+const static dpo_vft_t adj_mcast_dpo_vft = {
+    .dv_lock = adj_dpo_lock,
+    .dv_unlock = adj_dpo_unlock,
+    .dv_format = format_adj_mcast,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a mcast
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a mcast is the
+ * parent object in the DPO-graph.
+ */
+const static char* const adj_mcast_ip4_nodes[] =
+{
+    "ip4-rewrite-mcast",
+    NULL,
+};
+const static char* const adj_mcast_ip6_nodes[] =
+{
+    "ip6-rewrite-mcast",
+    NULL,
+};
+
+const static char* const * const adj_mcast_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = adj_mcast_ip4_nodes,
+    [DPO_PROTO_IP6]  = adj_mcast_ip6_nodes,
+    [DPO_PROTO_MPLS] = NULL,
+};
+
+/**
+ * @brief Return the size of the adj DB.
+ * This is only for testing purposes so an efficient implementation is not needed
+ */
+u32
+adj_mcast_db_size (void)
+{
+    u32 n_adjs, sw_if_index;
+    fib_protocol_t proto;
+
+    n_adjs = 0;
+    for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+    {
+        for (sw_if_index = 0;
+             sw_if_index < vec_len(adj_mcasts[proto]);
+             sw_if_index++)
+        {
+            if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index])
+            {
+                n_adjs++;
+            }
+        }
+    }
+    
+    return (n_adjs);
+}
+
+void
+adj_mcast_module_init (void)
+{
+    dpo_register(DPO_ADJACENCY_MCAST, &adj_mcast_dpo_vft, adj_mcast_nodes);
+}
diff --git a/src/vnet/adj/adj_mcast.h b/src/vnet/adj/adj_mcast.h
new file mode 100644
index 00000000..21c5a141
--- /dev/null
+++ b/src/vnet/adj/adj_mcast.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief Mcast Adjacency
+ *
+ * The multicast adjacency forwards IP traffic on an interface toward a multicast
+ * group address. This is a different type of adjacency to a unicast adjacency
+ * since the application of the MAC header is different, and so the VLIB node
+ * visited is also different. DPO types have different VLIB nodes.
+ */
+
+#ifndef __ADJ_MCAST_H__
+#define __ADJ_MCAST_H__
+
+#include <vnet/adj/adj_types.h>
+
+/**
+ * @brief
+ *  Add (and lock) a new or lock an existing mcast adjacency
+ *
+ * @param proto
+ *  The protocol for the neighbours that we wish to mcast
+ *
+ * @param link_type
+ *  A description of the protocol of the packets that will forward
+ *  through this adj. On an ethernet interface this is the MAC header's
+ *  ether-type
+ *
+ * @param sw_if_index
+ *  The interface on which to mcast
+ */
+extern adj_index_t adj_mcast_add_or_lock(fib_protocol_t proto,
+                                         vnet_link_t link_type,
+					 u32 sw_if_index);
+
+/**
+ * @brief
+ *  Update the rewrite string for an existing adjacecny.
+ *
+ * @param
+ *  The index of the adj to update
+ *
+ * @param
+ *  The new rewrite
+ */
+extern void adj_mcast_update_rewrite(adj_index_t adj_index,
+                                     u8 *rewrite);
+
+/**
+ * @brief Format/display a mcast adjacency.
+ */
+extern u8* format_adj_mcast(u8* s, va_list *ap);
+
+/**
+ * @brief Get the sze of the mcast adj DB. Test purposes only.
+ */
+extern u32 adj_mcast_db_size(void);
+
+/**
+ * @brief
+ *  Module initialisation
+ */
+extern void adj_mcast_module_init(void);
+
+#endif
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
index 1344bb67..9e8073d3 100644
--- a/src/vnet/adj/adj_nbr.c
+++ b/src/vnet/adj/adj_nbr.c
@@ -162,7 +162,7 @@ adj_nbr_alloc (fib_protocol_t nh_proto,
 }
 
 /*
- * adj_add_for_nbr
+ * adj_nbr_add_or_lock
  *
  * Add an adjacency for the neighbour requested.
  *
diff --git a/src/vnet/adj/adj_rewrite.c b/src/vnet/adj/adj_rewrite.c
deleted file mode 100644
index 7d792557..00000000
--- a/src/vnet/adj/adj_rewrite.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/adj/adj.h>
-#include <vnet/adj/adj_internal.h>
-
-/**
- * adj_rewrite_add_and_lock
- *
- * A rewrite sub-type has the rewrite string provided, but no key
- */
-adj_index_t
-adj_rewrite_add_and_lock (fib_protocol_t nh_proto,
-			  vnet_link_t link_type,
-			  u32 sw_if_index,
-			  u8 *rewrite)
-{
-    ip_adjacency_t *adj;
-
-    adj = adj_alloc(nh_proto);
-
-    adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
-    memset(&adj->sub_type.nbr.next_hop, 0, sizeof(adj->sub_type.nbr.next_hop));
-    adj->ia_link = link_type;
-    adj->ia_nh_proto = nh_proto;
-    adj->rewrite_header.sw_if_index = sw_if_index;
-
-    ASSERT(NULL != rewrite);
-
-    vnet_rewrite_for_sw_interface(vnet_get_main(),
-				  link_type,
-				  adj->rewrite_header.sw_if_index,
-				  adj_get_rewrite_node(link_type),
-				  rewrite,
-				  &adj->rewrite_header,
-				  sizeof (adj->rewrite_data));
-
-    adj_lock(adj_get_index(adj));
-
-    return (adj_get_index(adj));
-}
diff --git a/src/vnet/adj/adj_rewrite.h b/src/vnet/adj/adj_rewrite.h
deleted file mode 100644
index 25e6bba8..00000000
--- a/src/vnet/adj/adj_rewrite.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * @brief
- * A rewrite adjacency has no key, and thus cannot be 'found' from the
- * FIB resolution code. the client therefore needs to maange these adjacencies
- */
-
-#ifndef __ADJ_REWRITE_H__
-#define __ADJ_REWRITE_H__
-
-#include <vnet/adj/adj_types.h>
-
-/**
- * @brief
- *  Add (and lock) a new or lock an existing neighbour adjacency
- *
- * @param nh_proto
- *  The protocol for the next-hop address (v4 or v6)
- *
- * @param link_type
- *  A description of the protocol of the packets that will forward
- *  through this adj. On an ethernet interface this is the MAC header's
- *  ether-type
- *
- * @param sw_if_index
- *  The interface on which the peer resides
- *
- * @param rewrite
- *  The rewrite to prepend to packets
- */
-extern adj_index_t adj_rewrite_add_and_lock(fib_protocol_t nh_proto,
-					    vnet_link_t link_type,
-					    u32 sw_if_index,
-					    u8 *rewrite);
-
-#endif
diff --git a/src/vnet/dhcpv6/proxy_node.c b/src/vnet/dhcpv6/proxy_node.c
index 4dd2239f..77afef2a 100644
--- a/src/vnet/dhcpv6/proxy_node.c
+++ b/src/vnet/dhcpv6/proxy_node.c
@@ -19,6 +19,8 @@
 #include <vnet/pg/pg.h>
 #include <vnet/dhcpv6/proxy.h>
 #include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/ip6_mfib.h>
 
 static char * dhcpv6_proxy_error_strings[] = {
 #define dhcpv6_proxy_error(n,s) s,
@@ -819,7 +821,7 @@ int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address,
   u32 server_fib_index = 0;
   u32 rx_fib_index = 0;
 
-  rx_fib_index = ip6_fib_table_find_or_create_and_lock(rx_fib_id);
+  rx_fib_index = ip6_mfib_table_find_or_create_and_lock(rx_fib_id);
   server_fib_index = ip6_fib_table_find_or_create_and_lock(server_fib_id);
 
   if (is_del)
@@ -848,8 +850,10 @@ int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address,
   if (rx_fib_id == 0)
     {
       server = pool_elt_at_index (dm->dhcp6_servers, 0);
-
-      goto initialize_it;
+      if (server->valid)
+          goto reconfigure_it;
+      else
+          goto initialize_it;
     }
 
   if (rx_fib_index < vec_len(dm->dhcp6_server_index_by_rx_fib_index))
@@ -866,6 +870,42 @@ int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address,
   pool_get (dm->dhcp6_servers, server);
 
   initialize_it:
+  {
+      const mfib_prefix_t all_dhcp_servers = {
+          .fp_len = 128,
+          .fp_proto = FIB_PROTOCOL_IP6,
+          .fp_grp_addr = {
+              .ip6 = dm->all_dhcpv6_server_relay_agent_address,
+          }
+      };
+      const fib_route_path_t path_for_us = {
+          .frp_proto = FIB_PROTOCOL_IP6,
+          .frp_addr = zero_addr,
+          .frp_sw_if_index = 0xffffffff,
+          .frp_fib_index = ~0,
+          .frp_weight = 0,
+          .frp_flags = FIB_ROUTE_PATH_LOCAL,
+      };
+      mfib_table_entry_path_update(rx_fib_index,
+                                   &all_dhcp_servers,
+                                   MFIB_SOURCE_DHCP,
+                                   &path_for_us,
+                                   MFIB_ITF_FLAG_FORWARD);
+      /*
+       * Each interface that is enabled in this table, needs to be added
+       * as an accepting interface, but this is not easily doable in VPP.
+       * So we cheat. Add a flag to the entry that indicates accept form
+       * any interface.
+       * We will still only accept on v6 enabled interfaces, since the input
+       * feature ensures this.
+       */
+      mfib_table_entry_update(rx_fib_index,
+                              &all_dhcp_servers,
+                              MFIB_SOURCE_DHCP,
+                              MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
+  }
+
+reconfigure_it:
 
   copy_ip6_address(&server->dhcp6_server, addr);
   copy_ip6_address(&server->dhcp6_src_address, src_address);
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index 688d2892..cc2fa0eb 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -36,6 +36,7 @@
 #include <vnet/dpo/punt_dpo.h>
 #include <vnet/dpo/classify_dpo.h>
 #include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
 
 /**
  * Array of char* names for the DPO types and protos
@@ -449,6 +450,7 @@ dpo_module_init (vlib_main_t * vm)
     classify_dpo_module_init();
     lookup_dpo_module_init();
     ip_null_dpo_module_init();
+    replicate_module_init();
 
     return (NULL);
 }
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
index 1efcbc88..aff4e1b8 100644
--- a/src/vnet/dpo/dpo.h
+++ b/src/vnet/dpo/dpo.h
@@ -100,15 +100,18 @@ typedef enum dpo_type_t_ {
      * @brief load-balancing over a choice of [un]equal cost paths
      */
     DPO_LOAD_BALANCE,
+    DPO_REPLICATE,
     DPO_ADJACENCY,
     DPO_ADJACENCY_INCOMPLETE,
     DPO_ADJACENCY_MIDCHAIN,
     DPO_ADJACENCY_GLEAN,
+    DPO_ADJACENCY_MCAST,
     DPO_RECEIVE,
     DPO_LOOKUP,
     DPO_LISP_CP,
     DPO_CLASSIFY,
     DPO_MPLS_LABEL,
+    DPO_MFIB_ENTRY,
     DPO_LAST,
 } __attribute__((packed)) dpo_type_t;
 
@@ -123,12 +126,15 @@ typedef enum dpo_type_t_ {
     [DPO_ADJACENCY_INCOMPLETE] = "dpo-adjacency-incomplete",	\
     [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin",	\
     [DPO_ADJACENCY_GLEAN] = "dpo-glean",	\
+    [DPO_ADJACENCY_MCAST] = "dpo-adj-mcast",	\
     [DPO_RECEIVE] = "dpo-receive",	\
     [DPO_LOOKUP] = "dpo-lookup",	\
     [DPO_LOAD_BALANCE] = "dpo-load-balance",	\
+    [DPO_REPLICATE] = "dpo-replicate",	\
     [DPO_LISP_CP] = "dpo-lisp-cp",	\
     [DPO_CLASSIFY] = "dpo-classify",	\
-    [DPO_MPLS_LABEL] = "dpo-mpls-label"	\
+    [DPO_MPLS_LABEL] = "dpo-mpls-label", \
+    [DPO_MFIB_ENTRY] = "dpo-mfib_entry"	\
 }
 
 /**
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index e70a7a30..f11b4e4d 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -238,6 +238,17 @@ load_balance_is_drop (const dpo_id_t *dpo)
     return (0);
 }
 
+void
+load_balance_set_fib_entry_flags (index_t lbi,
+                                  fib_entry_flag_t flags)
+{
+    load_balance_t *lb;
+
+    lb = load_balance_get(lbi);
+    lb->lb_fib_entry_flags = flags;
+}
+
+
 void
 load_balance_set_urpf (index_t lbi,
 		       index_t urpf)
@@ -683,7 +694,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                                           buckets,
                                           n_buckets);
 
-                for (ii = old_n_buckets-n_buckets; ii < old_n_buckets; ii++)
+                for (ii = n_buckets; ii < old_n_buckets; ii++)
                 {
                     dpo_reset(&buckets[ii]);
                 }
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
index 17996536..b901c5be 100644
--- a/src/vnet/dpo/load_balance.h
+++ b/src/vnet/dpo/load_balance.h
@@ -36,6 +36,7 @@
 #include <vnet/ip/lookup.h>
 #include <vnet/dpo/dpo.h>
 #include <vnet/fib/fib_types.h>
+#include <vnet/fib/fib_entry.h>
 
 /**
  * Load-balance main
@@ -98,6 +99,11 @@ typedef struct load_balance_t_ {
      */
     dpo_proto_t lb_proto;
 
+    /**
+     * Flags from the load-balance's associated fib_entry_t
+     */
+    fib_entry_flag_t lb_fib_entry_flags;
+
     /**
      * The number of locks, which is approximately the number of users,
      * of this load-balance.
@@ -167,6 +173,8 @@ extern void load_balance_set_bucket(index_t lbi,
 				    const dpo_id_t *next);
 extern void load_balance_set_urpf(index_t lbi,
 				  index_t urpf);
+extern void load_balance_set_fib_entry_flags(index_t lbi,
+                                             fib_entry_flag_t flags);
 extern index_t load_balance_get_urpf(index_t lbi);
 
 extern u8* format_load_balance(u8 * s, va_list * args);
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
new file mode 100644
index 00000000..a2d5fdb6
--- /dev/null
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -0,0 +1,759 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/adj/adj.h>
+
+#undef REP_DEBUG
+
+#ifdef REP_DEBUG
+#define REP_DBG(_rep, _fmt, _args...)                                   \
+{                                                                       \
+    u8* _tmp =NULL;                                                     \
+    clib_warning("rep:[%s]:" _fmt,                                      \
+                 replicate_format(replicate_get_index((_rep)),          \
+                                  0, _tmp),                             \
+                 ##_args);                                              \
+    vec_free(_tmp);                                                     \
+}
+#else
+#define REP_DBG(_p, _fmt, _args...)
+#endif
+
+
+/**
+ * Pool of all DPOs. It's not static so the DP can have fast access
+ */
+replicate_t *replicate_pool;
+
+/**
+ * The one instance of replicate main
+ */
+replicate_main_t replicate_main;
+
+static inline index_t
+replicate_get_index (const replicate_t *rep)
+{
+    return (rep - replicate_pool);
+}
+
+static inline dpo_id_t*
+replicate_get_buckets (replicate_t *rep)
+{
+    if (REP_HAS_INLINE_BUCKETS(rep))
+    {
+        return (rep->rep_buckets_inline);
+    }
+    else
+    {
+        return (rep->rep_buckets);
+    }
+}
+
+static replicate_t *
+replicate_alloc_i (void)
+{
+    replicate_t *rep;
+
+    pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES);
+    memset(rep, 0, sizeof(*rep));
+
+    vlib_validate_combined_counter(&(replicate_main.repm_counters),
+                                   replicate_get_index(rep));
+    vlib_zero_combined_counter(&(replicate_main.repm_counters),
+                               replicate_get_index(rep));
+
+    return (rep);
+}
+
+static u8*
+replicate_format (index_t repi,
+                  replicate_format_flags_t flags,
+                  u32 indent,
+                  u8 *s)
+{
+    vlib_counter_t to;
+    replicate_t *rep;
+    dpo_id_t *buckets;
+    u32 i;
+
+    rep = replicate_get(repi);
+    vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to);
+    buckets = replicate_get_buckets(rep);
+
+    s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE);
+    s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets);
+    s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes);
+
+    for (i = 0; i < rep->rep_n_buckets; i++)
+    {
+        s = format(s, "\n%U", format_white_space, indent+2);
+        s = format(s, "[%d]", i);
+        s = format(s, " %U", format_dpo_id, &buckets[i], indent+6);
+    }
+    return (s);
+}
+
+u8*
+format_replicate (u8 * s, va_list * args)
+{
+    index_t repi = va_arg(*args, index_t);
+    replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t);
+
+    return (replicate_format(repi, flags, 0, s));
+}
+static u8*
+format_replicate_dpo (u8 * s, va_list * args)
+{
+    index_t repi = va_arg(*args, index_t);
+    u32 indent = va_arg(*args, u32);
+
+    return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s));
+}
+
+
+static replicate_t *
+replicate_create_i (u32 num_buckets,
+                    dpo_proto_t rep_proto)
+{
+    replicate_t *rep;
+
+    rep = replicate_alloc_i();
+    rep->rep_n_buckets = num_buckets;
+    rep->rep_proto = rep_proto;
+
+    if (!REP_HAS_INLINE_BUCKETS(rep))
+    {
+        vec_validate_aligned(rep->rep_buckets,
+                             rep->rep_n_buckets - 1,
+                             CLIB_CACHE_LINE_BYTES);
+    }
+
+    REP_DBG(rep, "create");
+
+    return (rep);
+}
+
+index_t
+replicate_create (u32 n_buckets,
+                  dpo_proto_t rep_proto)
+{
+    return (replicate_get_index(replicate_create_i(n_buckets, rep_proto)));
+}
+
+static inline void
+replicate_set_bucket_i (replicate_t *rep,
+                        u32 bucket,
+                        dpo_id_t *buckets,
+                        const dpo_id_t *next)
+{
+    dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next);
+}
+
+void
+replicate_set_bucket (index_t repi,
+                      u32 bucket,
+                      const dpo_id_t *next)
+{
+    replicate_t *rep;
+    dpo_id_t *buckets;
+
+    rep = replicate_get(repi);
+    buckets = replicate_get_buckets(rep);
+
+    ASSERT(bucket < rep->rep_n_buckets);
+
+    replicate_set_bucket_i(rep, bucket, buckets, next);
+}
+
+int
+replicate_is_drop (const dpo_id_t *dpo)
+{
+    replicate_t *rep;
+
+    if (DPO_REPLICATE != dpo->dpoi_type)
+        return (0);
+
+    rep = replicate_get(dpo->dpoi_index);
+
+    if (1 == rep->rep_n_buckets)
+    {
+        return (dpo_is_drop(replicate_get_bucket_i(rep, 0)));
+    }
+    return (0);
+}
+
+const dpo_id_t *
+replicate_get_bucket (index_t repi,
+                      u32 bucket)
+{
+    replicate_t *rep;
+
+    rep = replicate_get(repi);
+
+    return (replicate_get_bucket_i(rep, bucket));
+}
+
+
+static load_balance_path_t *
+replicate_multipath_next_hop_fixup (load_balance_path_t *nhs,
+                                    dpo_proto_t drop_proto)
+{
+    if (0 == vec_len(nhs))
+    {
+        load_balance_path_t *nh;
+
+        /*
+         * we need something for the replicate. so use the drop
+         */
+        vec_add2(nhs, nh, 1);
+
+        nh->path_weight = 1;
+        dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
+    }
+
+    return (nhs);
+}
+
+/*
+ * Fill in adjacencies in block based on corresponding
+ * next hop adjacencies.
+ */
+static void
+replicate_fill_buckets (replicate_t *rep,
+                        load_balance_path_t *nhs,
+                        dpo_id_t *buckets,
+                        u32 n_buckets)
+{
+    load_balance_path_t * nh;
+    u16 ii, bucket;
+
+    bucket = 0;
+
+    /*
+     * the next-hops have normalised weights. that means their sum is the number
+     * of buckets we need to fill.
+     */
+    vec_foreach (nh, nhs)
+    {
+        for (ii = 0; ii < nh->path_weight; ii++)
+        {
+            ASSERT(bucket < n_buckets);
+            replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo);
+        }
+    }
+}
+
+static inline void
+replicate_set_n_buckets (replicate_t *rep,
+                         u32 n_buckets)
+{
+    rep->rep_n_buckets = n_buckets;
+}
+
+void
+replicate_multipath_update (const dpo_id_t *dpo,
+                            load_balance_path_t * next_hops)
+{
+    load_balance_path_t * nh, * nhs;
+    dpo_id_t *tmp_dpo;
+    u32 ii, n_buckets;
+    replicate_t *rep;
+
+    ASSERT(DPO_REPLICATE == dpo->dpoi_type);
+    rep = replicate_get(dpo->dpoi_index);
+    nhs = replicate_multipath_next_hop_fixup(next_hops,
+                                             rep->rep_proto);
+    n_buckets = vec_len(nhs);
+
+    if (0 == rep->rep_n_buckets)
+    {
+        /*
+         * first time initialisation. no packets inflight, so we can write
+         * at leisure.
+         */
+        replicate_set_n_buckets(rep, n_buckets);
+
+        if (!REP_HAS_INLINE_BUCKETS(rep))
+            vec_validate_aligned(rep->rep_buckets,
+                                 rep->rep_n_buckets - 1,
+                                 CLIB_CACHE_LINE_BYTES);
+
+        replicate_fill_buckets(rep, nhs,
+                               replicate_get_buckets(rep),
+                               n_buckets);
+    }
+    else
+    {
+        /*
+         * This is a modification of an existing replicate.
+         * We need to ensure that packets in flight see a consistent state, that
+         * is the number of reported buckets the REP has
+         * is not more than it actually has. So if the
+         * number of buckets is increasing, we must update the bucket array first,
+         * then the reported number. vice-versa if the number of buckets goes down.
+         */
+        if (n_buckets == rep->rep_n_buckets)
+        {
+            /*
+             * no change in the number of buckets. we can simply fill what
+             * is new over what is old.
+             */
+            replicate_fill_buckets(rep, nhs,
+                                   replicate_get_buckets(rep),
+                                   n_buckets);
+        }
+        else if (n_buckets > rep->rep_n_buckets)
+        {
+            /*
+             * we have more buckets. the old replicate map (if there is one)
+             * will remain valid, i.e. mapping to indices within range, so we
+             * update it last.
+             */
+            if (n_buckets > REP_NUM_INLINE_BUCKETS &&
+                rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
+            {
+                /*
+                 * the new increased number of buckets is crossing the threshold
+                 * from the inline storage to out-line. Alloc the outline buckets
+                 * first, then fixup the number. then reset the inlines.
+                 */
+                ASSERT(NULL == rep->rep_buckets);
+                vec_validate_aligned(rep->rep_buckets,
+                                     n_buckets - 1,
+                                     CLIB_CACHE_LINE_BYTES);
+
+                replicate_fill_buckets(rep, nhs,
+                                       rep->rep_buckets,
+                                       n_buckets);
+                CLIB_MEMORY_BARRIER();
+                replicate_set_n_buckets(rep, n_buckets);
+
+                CLIB_MEMORY_BARRIER();
+
+                for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++)
+                {
+                    dpo_reset(&rep->rep_buckets_inline[ii]);
+                }
+            }
+            else
+            {
+                if (n_buckets <= REP_NUM_INLINE_BUCKETS)
+                {
+                    /*
+                     * we are not crossing the threshold and it's still inline buckets.
+                     * we can write the new on the old..
+                     */
+                    replicate_fill_buckets(rep, nhs,
+                                           replicate_get_buckets(rep),
+                                           n_buckets);
+                    CLIB_MEMORY_BARRIER();
+                    replicate_set_n_buckets(rep, n_buckets);
+                }
+                else
+                {
+                    /*
+                     * we are not crossing the threshold. We need a new bucket array to
+                     * hold the increased number of choices.
+                     */
+                    dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
+
+                    new_buckets = NULL;
+                    old_buckets = replicate_get_buckets(rep);
+
+                    vec_validate_aligned(new_buckets,
+                                         n_buckets - 1,
+                                         CLIB_CACHE_LINE_BYTES);
+
+                    replicate_fill_buckets(rep, nhs, new_buckets, n_buckets);
+                    CLIB_MEMORY_BARRIER();
+                    rep->rep_buckets = new_buckets;
+                    CLIB_MEMORY_BARRIER();
+                    replicate_set_n_buckets(rep, n_buckets);
+
+                    vec_foreach(tmp_dpo, old_buckets)
+                    {
+                        dpo_reset(tmp_dpo);
+                    }
+                    vec_free(old_buckets);
+                }
+            }
+        }
+        else
+        {
+            /*
+             * bucket size shrinkage.
+             */
+            if (n_buckets <= REP_NUM_INLINE_BUCKETS &&
+                rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS)
+            {
+                /*
+                 * the new decreased number of buckets is crossing the threshold
+                 * from out-line storage to inline:
+                 *   1 - Fill the inline buckets,
+                 *   2 - fixup the number (and this point the inline buckets are
+                 *       used).
+                 *   3 - free the outline buckets
+                 */
+                replicate_fill_buckets(rep, nhs,
+                                       rep->rep_buckets_inline,
+                                       n_buckets);
+                CLIB_MEMORY_BARRIER();
+                replicate_set_n_buckets(rep, n_buckets);
+                CLIB_MEMORY_BARRIER();
+
+                vec_foreach(tmp_dpo, rep->rep_buckets)
+                {
+                    dpo_reset(tmp_dpo);
+                }
+                vec_free(rep->rep_buckets);
+            }
+            else
+            {
+                /*
+                 * not crossing the threshold.
+                 *  1 - update the number to the smaller size
+                 *  2 - write the new buckets
+                 *  3 - reset those no longer used.
+                 */
+                dpo_id_t *buckets;
+                u32 old_n_buckets;
+
+                old_n_buckets = rep->rep_n_buckets;
+                buckets = replicate_get_buckets(rep);
+
+                replicate_set_n_buckets(rep, n_buckets);
+                CLIB_MEMORY_BARRIER();
+
+                replicate_fill_buckets(rep, nhs,
+                                       buckets,
+                                       n_buckets);
+
+                for (ii = n_buckets; ii < old_n_buckets; ii++)
+                {
+                    dpo_reset(&buckets[ii]);
+                }
+            }
+        }
+    }
+
+    vec_foreach (nh, nhs)
+    {
+        dpo_reset(&nh->path_dpo);
+    }
+    vec_free(nhs);
+}
+
+static void
+replicate_lock (dpo_id_t *dpo)
+{
+    replicate_t *rep;
+
+    rep = replicate_get(dpo->dpoi_index);
+
+    rep->rep_locks++;
+}
+
+static void
+replicate_destroy (replicate_t *rep)
+{
+    dpo_id_t *buckets;
+    int i;
+
+    buckets = replicate_get_buckets(rep);
+
+    for (i = 0; i < rep->rep_n_buckets; i++)
+    {
+        dpo_reset(&buckets[i]);
+    }
+
+    REP_DBG(rep, "destroy");
+    if (!REP_HAS_INLINE_BUCKETS(rep))
+    {
+        vec_free(rep->rep_buckets);
+    }
+
+    pool_put(replicate_pool, rep);
+}
+
+static void
+replicate_unlock (dpo_id_t *dpo)
+{
+    replicate_t *rep;
+
+    rep = replicate_get(dpo->dpoi_index);
+
+    rep->rep_locks--;
+
+    if (0 == rep->rep_locks)
+    {
+        replicate_destroy(rep);
+    }
+}
+
+static void
+replicate_mem_show (void)
+{
+    fib_show_memory_usage("replicate",
+			  pool_elts(replicate_pool),
+			  pool_len(replicate_pool),
+			  sizeof(replicate_t));
+}
+
+const static dpo_vft_t rep_vft = {
+    .dv_lock = replicate_lock,
+    .dv_unlock = replicate_unlock,
+    .dv_format = format_replicate_dpo,
+    .dv_mem_show = replicate_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a replicate
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a replicate is the
+ * parent object in the DPO-graph.
+ */
+const static char* const replicate_ip4_nodes[] =
+{
+    "ip4-replicate",
+    NULL,
+};
+const static char* const replicate_ip6_nodes[] =
+{
+    "ip6-replicate",
+    NULL,
+};
+const static char* const replicate_mpls_nodes[] =
+{
+    "mpls-replicate",
+    NULL,
+};
+
+const static char* const * const replicate_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = replicate_ip4_nodes,
+    [DPO_PROTO_IP6]  = replicate_ip6_nodes,
+    [DPO_PROTO_MPLS] = replicate_mpls_nodes,
+};
+
+void
+replicate_module_init (void)
+{
+    dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes);
+}
+
+static clib_error_t *
+replicate_show (vlib_main_t * vm,
+                unformat_input_t * input,
+                vlib_cli_command_t * cmd)
+{
+    index_t repi = INDEX_INVALID;
+
+    while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+        if (unformat (input, "%d", &repi))
+            ;
+        else
+            break;
+    }
+
+    if (INDEX_INVALID != repi)
+    {
+        vlib_cli_output (vm, "%U", format_replicate, repi,
+                         REPLICATE_FORMAT_DETAIL);
+    }
+    else
+    {
+        replicate_t *rep;
+
+        pool_foreach(rep, replicate_pool,
+        ({
+            vlib_cli_output (vm, "%U", format_replicate,
+                             replicate_get_index(rep),
+                             REPLICATE_FORMAT_NONE);
+        }));
+    }
+
+    return 0;
+}
+
+VLIB_CLI_COMMAND (replicate_show_command, static) = {
+    .path = "show replicate",
+    .short_help = "show replicate [<index>]",
+    .function = replicate_show,
+};
+
+typedef struct replicate_trace_t_
+{
+    index_t rep_index;
+    index_t dpo_index;
+    dpo_type_t dpo_type;
+} replicate_trace_t;
+
+static uword
+replicate_inline (vlib_main_t * vm,
+                  vlib_node_runtime_t * node,
+                  vlib_frame_t * frame)
+{
+    vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
+    u32 n_left_from, * from, * to_next, next_index;
+    u32 cpu_index = os_get_cpu_number();
+
+    from = vlib_frame_vector_args (frame);
+    n_left_from = frame->n_vectors;
+    next_index = node->cached_next_index;
+  
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame (vm, node, next_index,
+                             to_next, n_left_to_next);
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+	{
+            u32 next0, ci0, bi0, bucket, repi0;
+            const replicate_t *rep0;
+            vlib_buffer_t * b0, *c0;
+            const dpo_id_t *dpo0;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            from += 1;
+            to_next += 1;
+            n_left_from -= 1;
+            n_left_to_next -= 1;
+
+            b0 = vlib_get_buffer (vm, bi0);
+            repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+            rep0 = replicate_get(repi0);
+
+            vlib_increment_combined_counter(
+                cm, cpu_index, repi0, 1,
+                vlib_buffer_length_in_chain(vm, b0));
+
+            /* ship the original to the first bucket */
+            dpo0 = replicate_get_bucket_i(rep0, 0);
+            next0 = dpo0->dpoi_next_node;
+            vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+                t->rep_index = repi0;
+                t->dpo_index = dpo0->dpoi_index;
+                t->dpo_type = dpo0->dpoi_type;
+            }
+            vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                             to_next, n_left_to_next,
+                                             bi0, next0);
+
+            /* ship copies to the rest of the buckets */
+            for (bucket = 1; bucket < rep0->rep_n_buckets; bucket++)
+            {
+                /* Make a copy */
+                c0 = vlib_buffer_copy(vm, b0);
+                ci0 = vlib_get_buffer_index(vm, c0);
+
+                to_next[0] = ci0;
+                to_next += 1;
+                n_left_to_next -= 1;
+
+                dpo0 = replicate_get_bucket_i(rep0, bucket);
+                next0 = dpo0->dpoi_next_node;
+                vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+                if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+                {
+                    replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+                    t->rep_index = repi0;
+                    t->dpo_index = dpo0->dpoi_index;
+                    t->dpo_type = dpo0->dpoi_type;
+                }
+
+                vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                                 to_next, n_left_to_next,
+                                                 ci0, next0);
+            }
+        }
+
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+    return frame->n_vectors;
+}
+
+static u8 *
+format_replicate_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  replicate_trace_t *t = va_arg (*args, replicate_trace_t *);
+
+  s = format (s, "replicate: %d via %U:%d",
+              t->rep_index,
+              format_dpo_type, t->dpo_type,
+              t->dpo_index);
+  return s;
+}
+
+static uword
+ip4_replicate (vlib_main_t * vm,
+               vlib_node_runtime_t * node,
+               vlib_frame_t * frame)
+{
+    return (replicate_inline (vm, node, frame));
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (ip4_replicate_node) = {
+  .function = ip4_replicate,
+  .name = "ip4-replicate",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_replicate_trace,
+  .n_next_nodes = 1,
+  .next_nodes = {
+      [0] = "error-drop",
+  },
+};
+
+static uword
+ip6_replicate (vlib_main_t * vm,
+               vlib_node_runtime_t * node,
+               vlib_frame_t * frame)
+{
+    return (replicate_inline (vm, node, frame));
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (ip6_replicate_node) = {
+  .function = ip6_replicate,
+  .name = "ip6-replicate",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_replicate_trace,
+  .n_next_nodes = 1,
+  .next_nodes = {
+      [0] = "error-drop",
+  },
+};
diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h
new file mode 100644
index 00000000..a564739c
--- /dev/null
+++ b/src/vnet/dpo/replicate_dpo.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ *
+ */
+
+#ifndef __REPLICATE_DPO_H__
+#define __REPLICATE_DPO_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/fib_types.h>
+
+/**
+ * replicate main
+ */
+typedef struct replicate_main_t_
+{
+    vlib_combined_counter_main_t repm_counters;
+} replicate_main_t;
+
+extern replicate_main_t replicate_main;
+
+/**
+ * The number of buckets that a load-balance object can have and still
+ * fit in one cache-line
+ */
+#define REP_NUM_INLINE_BUCKETS 4
+
+/**
+ * The FIB DPO provieds;
+ *  - load-balancing over the next DPOs in the chain/graph
+ *  - per-route counters
+ */
+typedef struct replicate_t_ {
+    /**
+     * number of buckets in the load-balance. always a power of 2.
+     */
+    u16 rep_n_buckets;
+
+   /**
+     * The protocol of packets that traverse this REP.
+     * need in combination with the flow hash config to determine how to hash.
+     * u8.
+     */
+    dpo_proto_t rep_proto;
+
+    /**
+     * The number of locks, which is approximately the number of users,
+     * of this load-balance.
+     * Load-balance objects of via-entries are heavily shared by recursives,
+     * so the lock count is a u32.
+     */
+    u32 rep_locks;
+
+    /**
+     * Vector of buckets containing the next DPOs, sized as repo_num
+     */
+    dpo_id_t *rep_buckets;
+
+    /**
+     * The rest of the cache line is used for buckets. In the common case
+     * where there there are less than 4 buckets, then the buckets are
+     * on the same cachlie and we save ourselves a pointer dereferance in 
+     * the data-path.
+     */
+    dpo_id_t rep_buckets_inline[REP_NUM_INLINE_BUCKETS];
+} replicate_t;
+
+STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES,
+	      "A replicate object size exceeds one cachline");
+
+/**
+ * Flags controlling load-balance formatting/display
+ */
+typedef enum replicate_format_flags_t_ {
+    REPLICATE_FORMAT_NONE,
+    REPLICATE_FORMAT_DETAIL = (1 << 0),
+} replicate_format_flags_t;
+
+extern index_t replicate_create(u32 num_buckets,
+                                dpo_proto_t rep_proto);
+extern void replicate_multipath_update(
+    const dpo_id_t *dpo,
+    load_balance_path_t *next_hops);
+
+extern void replicate_set_bucket(index_t repi,
+				    u32 bucket,
+				    const dpo_id_t *next);
+
+extern u8* format_replicate(u8 * s, va_list * args);
+
+extern const dpo_id_t *replicate_get_bucket(index_t repi,
+					       u32 bucket);
+extern int replicate_is_drop(const dpo_id_t *dpo);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern replicate_t *replicate_pool;
+static inline replicate_t*
+replicate_get (index_t repi)
+{
+    return (pool_elt_at_index(replicate_pool, repi));
+}
+
+#define REP_HAS_INLINE_BUCKETS(_rep)		\
+    ((_rep)->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
+
+static inline const dpo_id_t *
+replicate_get_bucket_i (const replicate_t *rep,
+			   u32 bucket)
+{
+    ASSERT(bucket < rep->rep_n_buckets);
+
+    if (PREDICT_TRUE(REP_HAS_INLINE_BUCKETS(rep)))
+    {
+	return (&rep->rep_buckets_inline[bucket]);
+    }
+    else
+    {
+	return (&rep->rep_buckets[bucket]);
+    }
+}
+
+extern void replicate_module_init(void);
+
+#endif
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
index 0298541b..8f07fa55 100644
--- a/src/vnet/ethernet/arp.c
+++ b/src/vnet/ethernet/arp.c
@@ -23,6 +23,7 @@
 #include <vppinfra/mhash.h>
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_mcast.h>
 #include <vnet/mpls/mpls.h>
 
 /**
@@ -438,33 +439,74 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
   e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
 
-  if (NULL != e)
-    {
-      adj_nbr_walk_nh4 (sw_if_index,
-			&e->ip4_address, arp_mk_complete_walk, e);
-    }
-  else
+  switch (adj->lookup_next_index)
     {
+    case IP_LOOKUP_NEXT_ARP:
+    case IP_LOOKUP_NEXT_GLEAN:
+      if (NULL != e)
+	{
+	  adj_nbr_walk_nh4 (sw_if_index,
+			    &e->ip4_address, arp_mk_complete_walk, e);
+	}
+      else
+	{
+	  /*
+	   * no matching ARP entry.
+	   * construct the rewrite required to for an ARP packet, and stick
+	   * that in the adj's pipe to smoke.
+	   */
+	  adj_nbr_update_rewrite
+	    (ai,
+	     ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+	     ethernet_build_rewrite
+	     (vnm,
+	      sw_if_index,
+	      VNET_LINK_ARP,
+	      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+	  /*
+	   * since the FIB has added this adj for a route, it makes sense it
+	   * may want to forward traffic sometime soon. Let's send a
+	   * speculative ARP. just one. If we were to do periodically that
+	   * wouldn't be bad either, but that's more code than i'm prepared to
+	   * write at this time for relatively little reward.
+	   */
+	  arp_nbr_probe (adj);
+	}
+      break;
+    case IP_LOOKUP_NEXT_MCAST:
       /*
-       * no matching ARP entry.
-       * construct the rewire required to for an ARP packet, and stick
-       * that in the adj's pipe to smoke.
+       * Construct a partial rewrite from the known ethernet mcast dest MAC
        */
-      adj_nbr_update_rewrite (ai,
-			      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
-			      ethernet_build_rewrite (vnm,
-						      sw_if_index,
-						      VNET_LINK_ARP,
-						      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+      adj_mcast_update_rewrite
+	(ai,
+	 ethernet_build_rewrite (vnm,
+				 sw_if_index,
+				 adj->ia_link,
+				 ethernet_ip4_mcast_dst_addr ()));
 
       /*
-       * since the FIB has added this adj for a route, it makes sense it may
-       * want to forward traffic sometime soon. Let's send a speculative ARP.
-       * just one. If we were to do periodically that wouldn't be bad either,
-       * but that's more code than i'm prepared to write at this time for
-       * relatively little reward.
+       * Complete the remaining fields of the adj's rewrite to direct the
+       * complete of the rewrite at switch time by copying in the IP
+       * dst address's bytes.
+       * Ofset is 11 bytes from the end of the MAC header - which is three
+       * bytes into the desintation address. And we write 3 bytes.
        */
-      arp_nbr_probe (adj);
+      adj->rewrite_header.dst_mcast_offset = 11;
+      adj->rewrite_header.dst_mcast_n_bytes = 3;
+
+      break;
+
+    case IP_LOOKUP_NEXT_DROP:
+    case IP_LOOKUP_NEXT_PUNT:
+    case IP_LOOKUP_NEXT_LOCAL:
+    case IP_LOOKUP_NEXT_REWRITE:
+    case IP_LOOKUP_NEXT_LOAD_BALANCE:
+    case IP_LOOKUP_NEXT_MIDCHAIN:
+    case IP_LOOKUP_NEXT_ICMP_ERROR:
+    case IP_LOOKUP_N_NEXT:
+      ASSERT (0);
+      break;
     }
 }
 
diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h
index f88b0cf3..3acde421 100644
--- a/src/vnet/ethernet/ethernet.h
+++ b/src/vnet/ethernet/ethernet.h
@@ -547,6 +547,8 @@ void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
 u8 *ethernet_build_rewrite (vnet_main_t * vnm,
 			    u32 sw_if_index,
 			    vnet_link_t link_type, const void *dst_address);
+const u8 *ethernet_ip4_mcast_dst_addr (void);
+const u8 *ethernet_ip6_mcast_dst_addr (void);
 
 extern vlib_node_registration_t ethernet_input_node;
 
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 1c1f4353..95700309 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -51,6 +51,26 @@
  * This file contains code to manage loopback interfaces.
  */
 
+const u8 *
+ethernet_ip4_mcast_dst_addr (void)
+{
+  const static u8 ethernet_mcast_dst_mac[] = {
+    0x1, 0x0, 0x5e, 0x0, 0x0, 0x0,
+  };
+
+  return (ethernet_mcast_dst_mac);
+}
+
+const u8 *
+ethernet_ip6_mcast_dst_addr (void)
+{
+  const static u8 ethernet_mcast_dst_mac[] = {
+    0x33, 0x33, 0x00, 0x0, 0x0, 0x0,
+  };
+
+  return (ethernet_mcast_dst_mac);
+}
+
 /**
  * @brief build a rewrite string to use for sending packets of type 'link_type'
  * to 'dst_address'
diff --git a/src/vnet/fib/fib_attached_export.c b/src/vnet/fib/fib_attached_export.c
index fd597e3e..574131de 100644
--- a/src/vnet/fib/fib_attached_export.c
+++ b/src/vnet/fib/fib_attached_export.c
@@ -303,8 +303,8 @@ fib_attached_export_import (fib_entry_t *fib_entry,
      * may have realloc'd.
      */
     fib_entry = fib_entry_get(fei);
-        import->faei_export_sibling =
-	  fib_entry_cover_track(fib_entry_get(import->faei_export_entry), fei);
+    import->faei_export_sibling =
+	fib_entry_cover_track(fib_entry_get(import->faei_export_entry), fei);
 
     fed = fib_entry_delegate_find_or_add(fib_entry,
                                          FIB_ENTRY_DELEGATE_ATTACHED_IMPORT);
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index 44a5f2e6..f258b755 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -220,7 +220,7 @@ typedef enum fib_entry_flag_t_ {
     FIB_ENTRY_FLAG_EXCLUSIVE = (1 << FIB_ENTRY_ATTRIBUTE_EXCLUSIVE),
     FIB_ENTRY_FLAG_LOCAL     = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL),
     FIB_ENTRY_FLAG_IMPORT    = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT),
-} fib_entry_flag_t;
+} __attribute__((packed)) fib_entry_flag_t;
 
 /**
  * Flags for the source data
diff --git a/src/vnet/fib/fib_entry_delegate.c b/src/vnet/fib/fib_entry_delegate.c
index a0d45f97..efe402d1 100644
--- a/src/vnet/fib/fib_entry_delegate.c
+++ b/src/vnet/fib/fib_entry_delegate.c
@@ -119,6 +119,9 @@ fib_entry_chain_type_to_delegate_type (fib_forward_chain_type_t fct)
         return (FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS);
     case FIB_FORW_CHAIN_TYPE_ETHERNET:
         return (FIB_ENTRY_DELEGATE_CHAIN_ETHERNET);
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+        break;
     }
     ASSERT(0);
     return (FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4);
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index 1fb04060..d54787cd 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -313,6 +313,8 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
         {
         case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
         case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+        case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+        case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
             /*
              * EOS traffic with no label to stack, we need the IP Adj
              */
@@ -458,6 +460,8 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
     {
 	load_balance_set_urpf(dpo_lb->dpoi_index, ui);
     }
+    load_balance_set_fib_entry_flags(dpo_lb->dpoi_index,
+                                     fib_entry_get_flags_i(fib_entry));
 }
 
 void
diff --git a/src/vnet/fib/fib_node.h b/src/vnet/fib/fib_node.h
index 3ad8ee95..457dfb7a 100644
--- a/src/vnet/fib/fib_node.h
+++ b/src/vnet/fib/fib_node.h
@@ -31,6 +31,7 @@ typedef enum fib_node_type_t_ {
      */
     FIB_NODE_TYPE_WALK,
     FIB_NODE_TYPE_ENTRY,
+    FIB_NODE_TYPE_MFIB_ENTRY,
     FIB_NODE_TYPE_PATH_LIST,
     FIB_NODE_TYPE_PATH,
     FIB_NODE_TYPE_ADJ,
@@ -51,6 +52,7 @@ typedef enum fib_node_type_t_ {
 
 #define FIB_NODE_TYPES {                          \
     [FIB_NODE_TYPE_ENTRY]     = "entry",          \
+    [FIB_NODE_TYPE_MFIB_ENTRY] = "mfib-entry",    \
     [FIB_NODE_TYPE_WALK]      = "walk",           \
     [FIB_NODE_TYPE_PATH_LIST] = "path-list",      \
     [FIB_NODE_TYPE_PATH]      = "path",           \
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 809e3e16..080057f3 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -23,6 +23,7 @@
 #include <vnet/dpo/lookup_dpo.h>
 
 #include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
 
 #include <vnet/fib/fib_path.h>
 #include <vnet/fib/fib_node.h>
@@ -960,6 +961,8 @@ fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
 	cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST;
     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED)
 	cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED;
+    if (rpath->frp_flags & FIB_ROUTE_PATH_LOCAL)
+	cfg_flags |= FIB_PATH_CFG_FLAG_LOCAL;
 
     return (cfg_flags);
 }
@@ -1003,28 +1006,25 @@ fib_path_create (fib_node_index_t pl_index,
     /*
      * deduce the path's tpye from the parementers and save what is needed.
      */
-    if (~0 != rpath->frp_sw_if_index)
+    if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_LOCAL)
     {
-	if (flags & FIB_PATH_CFG_FLAG_LOCAL)
-	{
-	    path->fp_type = FIB_PATH_TYPE_RECEIVE;
-	    path->receive.fp_interface = rpath->frp_sw_if_index;
-            path->receive.fp_addr = rpath->frp_addr;
-	}
-	else
-	{
-	    if (ip46_address_is_zero(&rpath->frp_addr))
-	    {
-		path->fp_type = FIB_PATH_TYPE_ATTACHED;
-		path->attached.fp_interface = rpath->frp_sw_if_index;
-	    }
-	    else
-	    {
-		path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
-		path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
-		path->attached_next_hop.fp_nh = rpath->frp_addr;
-	    }
-	}
+        path->fp_type = FIB_PATH_TYPE_RECEIVE;
+        path->receive.fp_interface = rpath->frp_sw_if_index;
+        path->receive.fp_addr = rpath->frp_addr;
+    }
+    else if (~0 != rpath->frp_sw_if_index)
+    {
+        if (ip46_address_is_zero(&rpath->frp_addr))
+        {
+            path->fp_type = FIB_PATH_TYPE_ATTACHED;
+            path->attached.fp_interface = rpath->frp_sw_if_index;
+        }
+        else
+        {
+            path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
+            path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
+            path->attached_next_hop.fp_nh = rpath->frp_addr;
+        }
     }
     else
     {
@@ -1199,7 +1199,7 @@ fib_path_cmp_i (const fib_path_t *path1,
     {
 	res = (path1->fp_type - path2->fp_type);
     }
-    if (path1->fp_nh_proto != path2->fp_nh_proto)
+    else if (path1->fp_nh_proto != path2->fp_nh_proto)
     {
 	res = (path1->fp_nh_proto - path2->fp_nh_proto);
     }
@@ -1770,8 +1770,11 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 
 		break;
 	    }
-	    }
+	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+	    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
 	    break;
+            }
+            break;
 	case FIB_PATH_TYPE_RECURSIVE:
 	    switch (fct)
 	    {
@@ -1781,13 +1784,15 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
 		fib_path_recursive_adj_update(path, fct, dpo);
 		break;
+	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+	    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
 	    case FIB_FORW_CHAIN_TYPE_ETHERNET:
 		ASSERT(0);
 		break;
 	    }
 	    break;
 	case FIB_PATH_TYPE_DEAG:
-	    switch (fct)
+            switch (fct)
 	    {
 	    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
                 lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
@@ -1800,7 +1805,9 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
 	    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
 		dpo_copy(dpo, &path->fp_dpo);
-		break;		
+		break;
+	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+	    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
 	    case FIB_FORW_CHAIN_TYPE_ETHERNET:
 		ASSERT(0);
 		break;
@@ -1810,12 +1817,38 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
 	    break;
         case FIB_PATH_TYPE_ATTACHED:
-	case FIB_PATH_TYPE_RECEIVE:
-	case FIB_PATH_TYPE_SPECIAL:
-	    ASSERT(0);
+	    switch (fct)
+	    {
+	    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+	    case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+	    case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+	    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+	    case FIB_FORW_CHAIN_TYPE_ETHERNET:
+                break;
+	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+	    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+                {
+                    adj_index_t ai;
+
+                    /*
+                     * Create the adj needed for sending IP multicast traffic
+                     */
+                    ai = adj_mcast_add_or_lock(path->fp_nh_proto,
+                                               fib_forw_chain_type_to_link_type(fct),
+                                               path->attached.fp_interface);
+                    dpo_set(dpo, DPO_ADJACENCY_MCAST,
+                            fib_forw_chain_type_to_dpo_proto(fct),
+                            ai);
+                    adj_unlock(ai);
+                }
+                break;
+            }
+            break;
+        case FIB_PATH_TYPE_RECEIVE:
+        case FIB_PATH_TYPE_SPECIAL:
+            dpo_copy(dpo, &path->fp_dpo);
             break;
 	}
-
     }
 }
 
diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c
index db9d1af9..ce11cf45 100644
--- a/src/vnet/fib/fib_path_list.c
+++ b/src/vnet/fib/fib_path_list.c
@@ -585,8 +585,11 @@ fib_path_list_resolve (fib_path_list_t *path_list)
     path_list = fib_path_list_get(path_list_index);
 
     FIB_PATH_LIST_DBG(path_list, "resovled");
-    fib_path_list_mk_urpf(path_list);
 
+    if (!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_NO_URPF))
+    {
+        fib_path_list_mk_urpf(path_list);
+    }
     return (path_list);
 }
 
@@ -1025,14 +1028,14 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index,
  */
 void
 fib_path_list_contribute_forwarding (fib_node_index_t path_list_index,
-				     fib_forward_chain_type_t type,
+				     fib_forward_chain_type_t fct,
 				     dpo_id_t *dpo)
 {
     fib_path_list_t *path_list;
 
     path_list = fib_path_list_get(path_list_index);
 
-    fib_path_list_mk_lb(path_list, type, dpo);
+    fib_path_list_mk_lb(path_list, fct, dpo);
 }
 
 /*
diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h
index f4f94a1b..b4971add 100644
--- a/src/vnet/fib/fib_path_list.h
+++ b/src/vnet/fib/fib_path_list.h
@@ -60,6 +60,10 @@ typedef enum fib_path_list_attribute_t_ {
      * looped path-list. one path looped implies the whole list is
      */
     FIB_PATH_LIST_ATTRIBUTE_LOOPED,
+    /**
+     * no uRPF - do not generate unicast RPF list for this path-list
+     */
+    FIB_PATH_LIST_ATTRIBUTE_NO_URPF,
     /**
      * Marher. Add new flags before this one, and then update it.
      */
@@ -74,6 +78,7 @@ typedef enum fib_path_list_flags_t_ {
     FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE),
     FIB_PATH_LIST_FLAG_RESOLVED  = (1 << FIB_PATH_LIST_ATTRIBUTE_RESOLVED),
     FIB_PATH_LIST_FLAG_LOOPED    = (1 << FIB_PATH_LIST_ATTRIBUTE_LOOPED),
+    FIB_PATH_LIST_FLAG_NO_URPF   = (1 << FIB_PATH_LIST_ATTRIBUTE_NO_URPF),
 } fib_path_list_flags_t;
 
 #define FIB_PATH_LIST_ATTRIBUTES {       		 \
@@ -83,6 +88,7 @@ typedef enum fib_path_list_flags_t_ {
     [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive",   \
     [FIB_PATH_LIST_ATTRIBUTE_LOCAL]     = "local",	 \
     [FIB_PATH_LIST_ATTRIBUTE_LOOPED]     = "looped",	 \
+    [FIB_PATH_LIST_ATTRIBUTE_NO_URPF]     = "no-uRPF",	 \
 }
 
 #define FOR_EACH_PATH_LIST_ATTRIBUTE(_item)		\
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 76db42d0..57eb0ae8 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -1043,6 +1043,26 @@ fib_table_destroy (fib_table_t *fib_table)
 	break;
     }
 }
+void
+fib_table_walk (u32 fib_index,
+                fib_protocol_t proto,
+                fib_table_walk_fn_t fn,
+                void *ctx)
+{
+    switch (proto)
+    {
+    case FIB_PROTOCOL_IP4:
+	ip4_fib_table_walk(ip4_fib_get(fib_index), fn, ctx);
+	break;
+    case FIB_PROTOCOL_IP6:
+	ip6_fib_table_walk(fib_index, fn, ctx);
+	break;
+    case FIB_PROTOCOL_MPLS:
+	mpls_fib_table_walk(mpls_fib_get(fib_index), fn, ctx);
+	break;
+    }
+}
+
 
 void
 fib_table_unlock (u32 fib_index,
@@ -1094,11 +1114,56 @@ format_fib_table_name (u8* s, va_list ap)
     return (s);
 }
 
+/**
+ * @brief Table flush context. Store the indicies of matching FIB entries
+ * that need to be removed.
+ */
+typedef struct fib_table_flush_ctx_t_
+{
+    /**
+     * The list of entries to flush
+     */
+    fib_node_index_t *ftf_entries;
+
+    /**
+     * The source we are flushing
+     */
+    fib_source_t ftf_source;
+} fib_table_flush_ctx_t;
+
+static int
+fib_table_flush_cb (fib_node_index_t fib_entry_index,
+                    void *arg)
+{
+    fib_table_flush_ctx_t *ctx = arg;
+
+    if (fib_entry_is_sourced(fib_entry_index, ctx->ftf_source))
+    {
+        vec_add1(ctx->ftf_entries, fib_entry_index);
+    }
+    return (1);
+}
+
+
 void
 fib_table_flush (u32 fib_index,
 		 fib_protocol_t proto,
 		 fib_source_t source)
 {
-    // FIXME
-    ASSERT(0);
+    fib_node_index_t *fib_entry_index;
+    fib_table_flush_ctx_t ctx = {
+        .ftf_entries = NULL,
+        .ftf_source = source,
+    };
+
+    fib_table_walk(fib_index, proto,
+                   fib_table_flush_cb,
+                   &ctx);
+
+    vec_foreach(fib_entry_index, ctx.ftf_entries)
+    {
+        fib_entry_delete(*fib_entry_index, source);
+    }
+
+    vec_free(ctx.ftf_entries);
 }
diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h
index cfec516d..e7e66acb 100644
--- a/src/vnet/fib/fib_table.h
+++ b/src/vnet/fib/fib_table.h
@@ -729,4 +729,20 @@ extern u32 fib_table_get_num_entries(u32 fib_index,
 extern fib_table_t *fib_table_get(fib_node_index_t index,
 				  fib_protocol_t proto);
 
+/**
+ * @brief Call back function when walking entries in a FIB table
+ */
+typedef int (*fib_table_walk_fn_t)(fib_node_index_t fei,
+                                   void *ctx);
+
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void fib_table_walk(u32 fib_index,
+                           fib_protocol_t proto,
+                           fib_table_walk_fn_t fn,
+                           void *ctx);
+
 #endif
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index 5083db26..1c4a63a2 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -663,14 +663,15 @@ fib_test_v4 (void)
     /*
      * at this stage there are 5 entries in the test FIB (plus 5 in the default),
      * all of which are special sourced and so none of which share path-lists.
-     * There are also 6 entries, and 6 non-shared path-lists, in the v6 default
-     * table
+     * There are also 2 entries, and 2 non-shared path-lists, in the v6 default
+     * table, and 4 path-lists in the v6 MFIB table
      */
-#define NBR (5+5+6)
+#define ENBR (5+5+2)
+#define PNBR (5+5+6)
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB is empty");
-    FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -754,9 +755,9 @@ fib_test_v4 (void)
      * +2 interface routes +2 non-shared path-lists
      */
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB is empty");
-    FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNBR+2 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -808,9 +809,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+3 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNBR+3 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -838,9 +839,9 @@ fib_test_v4 (void)
      * -1 shared-path-list
      */
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB is empty");
-    FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNBR+2 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -1018,9 +1019,9 @@ fib_test_v4 (void)
      * +2 adj-fibs, and their non-shared path-lists
      */
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB is empty");
-    FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+4 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -1054,9 +1055,9 @@ fib_test_v4 (void)
      * +1 entry and a shared path-list
      */
     FIB_TEST((1 == fib_path_list_db_size()),   "path list DB is empty");
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+5 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /* 1.1.2.0/24 */
@@ -1087,9 +1088,9 @@ fib_test_v4 (void)
      * +1 entry only
      */
     FIB_TEST((1 == fib_path_list_db_size()),   "path list DB is empty");
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+6 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -1127,9 +1128,9 @@ fib_test_v4 (void)
      * +1 shared-pathlist
      */
     FIB_TEST((2 == fib_path_list_db_size()),   "path list DB is empty");
-    FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+6 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -1158,9 +1159,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1 == fib_path_list_db_size()),   "path list DB is %d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+6 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -1203,9 +1204,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((2  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     fib_prefix_t bgp_101_pfx = {
@@ -1239,9 +1240,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((2  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+8 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -1368,9 +1369,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((3  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+7 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -1983,9 +1984,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((4  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+12 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -2030,9 +2031,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((4  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+13 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -2080,9 +2081,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((5  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+9 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+9 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+14 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+14 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -2118,9 +2119,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((4  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+13 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -2154,9 +2155,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((4  == fib_path_list_db_size()),   "path list DB population:%d",
 	fib_path_list_db_size());
-    FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
 	fib_path_list_pool_size());
-    FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+12 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -2185,9 +2186,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((4  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+12 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -2215,9 +2216,9 @@ fib_test_v4 (void)
 
     FIB_TEST((3  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+7 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -2245,9 +2246,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((2  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+9 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+9 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -2355,9 +2356,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -2380,9 +2381,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((2  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+8 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
@@ -2428,9 +2429,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((3  == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     ai_03 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
@@ -2492,9 +2493,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
 
@@ -2562,9 +2563,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((4  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -2753,9 +2754,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -2830,9 +2831,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -3180,9 +3181,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -3247,9 +3248,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -3297,9 +3298,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -3342,9 +3343,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -3438,9 +3439,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((0  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+4 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -3482,9 +3483,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+5 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     fib_table_entry_delete(fib_index,
@@ -3493,9 +3494,9 @@ fib_test_v4 (void)
 
     FIB_TEST((0  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+4 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+4 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -3577,9 +3578,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((0  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR+2 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -3619,9 +3620,9 @@ fib_test_v4 (void)
      */
     FIB_TEST((0  == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
 
     /*
@@ -3644,11 +3645,11 @@ fib_test_v4 (void)
 
     FIB_TEST((0  == fib_path_list_db_size()), "path list DB population:%d",
     	     fib_path_list_db_size());
-    FIB_TEST((NBR-5 == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNBR-5 == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
-    FIB_TEST((NBR-5 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENBR-5 == fib_entry_pool_size()), "entry pool size is %d",
     	     fib_entry_pool_size());
-    FIB_TEST((NBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d",
+    FIB_TEST((ENBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d",
     	     pool_elts(fib_urpf_list_pool));
 
     return 0;
@@ -3720,13 +3721,15 @@ fib_test_v6 (void)
 
     /*
      * At this stage there is one v4 FIB with 5 routes and two v6 FIBs
-     * each with 6 entries. All entries are special so no path-list sharing.
+     * each with 2 entries and a v6 mfib with 4 path-lists.
+     * All entries are special so no path-list sharing.
      */
-#define NPS (5+6+6)
+#define ENPS (5+4)
+#define PNPS (5+4+4)
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB is empty");
-    FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is %d",
+    FIB_TEST((PNPS == fib_path_list_pool_size()), "path list pool size is %d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -3816,9 +3819,9 @@ fib_test_v6 (void)
      * +2 entries. +2 unshared path-lists
      */
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB is empty");
-    FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS+2 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS+2 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -3862,9 +3865,9 @@ fib_test_v6 (void)
      */
     FIB_TEST((1 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NPS+3 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS+3 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS+2 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -3890,9 +3893,9 @@ fib_test_v6 (void)
      */
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS+2 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS+2 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -4006,9 +4009,9 @@ fib_test_v6 (void)
      */
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NPS+4 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS+4 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS+4 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS+4 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -4073,9 +4076,9 @@ fib_test_v6 (void)
      */
     FIB_TEST((1 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS+5 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS+6 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -4201,9 +4204,9 @@ fib_test_v6 (void)
      */
     FIB_TEST((1 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS+5 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS+6 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -4275,9 +4278,9 @@ fib_test_v6 (void)
      */
     FIB_TEST((1 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS+7 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS+8 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
 
@@ -4401,9 +4404,9 @@ fib_test_v6 (void)
      */
     FIB_TEST((1 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS+7 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS+8 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -4501,9 +4504,9 @@ fib_test_v6 (void)
      */
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     /*
@@ -4513,9 +4516,9 @@ fib_test_v6 (void)
 
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
-    FIB_TEST((NPS-6 == fib_path_list_pool_size()), "path list pool size is%d",
+    FIB_TEST((PNPS-2 == fib_path_list_pool_size()), "path list pool size is%d",
 	     fib_path_list_pool_size());
-    FIB_TEST((NPS-6 == fib_entry_pool_size()), "entry pool size is %d",
+    FIB_TEST((ENPS-2 == fib_entry_pool_size()), "entry pool size is %d",
 	     fib_entry_pool_size());
 
     adj_unlock(ai_02);
diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c
index b66e7194..3ecb38e8 100644
--- a/src/vnet/fib/fib_types.c
+++ b/src/vnet/fib/fib_types.c
@@ -290,8 +290,10 @@ fib_forw_chain_type_to_link_type (fib_forward_chain_type_t fct)
     switch (fct)
     {
     case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
 	return (VNET_LINK_IP4);
     case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
 	return (VNET_LINK_IP6);
     case FIB_FORW_CHAIN_TYPE_ETHERNET:
 	return (VNET_LINK_ETHERNET);
@@ -313,8 +315,10 @@ fib_forw_chain_type_to_dpo_proto (fib_forward_chain_type_t fct)
     switch (fct)
     {
     case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
 	return (DPO_PROTO_IP4);
     case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
 	return (DPO_PROTO_IP6);
     case FIB_FORW_CHAIN_TYPE_ETHERNET:
 	return (DPO_PROTO_ETHERNET);
diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h
index 0a15fef1..c51bc9c0 100644
--- a/src/vnet/fib/fib_types.h
+++ b/src/vnet/fib/fib_types.h
@@ -95,6 +95,14 @@ typedef enum fib_forward_chain_type_t_ {
      * option is converted into one of the other three internally.
      */
     FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+    /**
+     * Contribute an object that is to be used to forward IP4 packets
+     */
+    FIB_FORW_CHAIN_TYPE_MCAST_IP4,
+    /**
+     * Contribute an object that is to be used to forward IP6 packets
+     */
+    FIB_FORW_CHAIN_TYPE_MCAST_IP6,
     /**
      * Contribute an object that is to be used to forward Ethernet packets.
      * This is last in the list since it is not valid for many FIB objects,
@@ -107,6 +115,8 @@ typedef enum fib_forward_chain_type_t_ {
     [FIB_FORW_CHAIN_TYPE_ETHERNET]      = "ethernet",     	\
     [FIB_FORW_CHAIN_TYPE_UNICAST_IP4]   = "unicast-ip4",	\
     [FIB_FORW_CHAIN_TYPE_UNICAST_IP6]   = "unicast-ip6",	\
+    [FIB_FORW_CHAIN_TYPE_MCAST_IP4]     = "multicast-ip4",	\
+    [FIB_FORW_CHAIN_TYPE_MCAST_IP6]     = "multicast-ip6",	\
     [FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS]  = "mpls-neos",	        \
     [FIB_FORW_CHAIN_TYPE_MPLS_EOS]      = "mpls-eos",	        \
 }
@@ -263,6 +273,10 @@ typedef enum fib_route_path_flags_t_
      * Recursion constraint of via an attahced prefix
      */
     FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED = (1 << 1),
+    /**
+     * A for-us/local path
+     */
+    FIB_ROUTE_PATH_LOCAL = (1 << 2),
 } fib_route_path_flags_t;
 
 /**
diff --git a/src/vnet/fib/fib_urpf_list.c b/src/vnet/fib/fib_urpf_list.c
index 263812ad..b4844420 100644
--- a/src/vnet/fib/fib_urpf_list.c
+++ b/src/vnet/fib/fib_urpf_list.c
@@ -29,16 +29,24 @@ format_fib_urpf_list (u8 *s, va_list args)
     u32 *swi;
 
     ui = va_arg(args, index_t);
-    urpf = fib_urpf_list_get(ui);
 
-    s = format(s, "uPRF-list:%d len:%d itfs:[",
-	       ui, vec_len(urpf->furpf_itfs));
+    if (INDEX_INVALID != ui)
+    {
+        urpf = fib_urpf_list_get(ui);
+
+        s = format(s, "uPRF-list:%d len:%d itfs:[",
+                   ui, vec_len(urpf->furpf_itfs));
 
-    vec_foreach(swi, urpf->furpf_itfs)
+        vec_foreach(swi, urpf->furpf_itfs)
+        {
+            s = format(s, "%d, ", *swi);
+        }
+        s = format(s, "]");
+    }
+    else
     {
-	s = format(s, "%d, ", *swi);
+        s = format(s, "uRPF-list: None");
     }
-    s = format(s, "]");
 
     return (s);
 }
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
index f6ebce00..e8211c80 100644
--- a/src/vnet/fib/ip4_fib.c
+++ b/src/vnet/fib/ip4_fib.c
@@ -378,16 +378,13 @@ ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib,
     ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE
 }
 
-static void
-ip4_fib_table_show_all (ip4_fib_t *fib,
-			vlib_main_t * vm)
+void
+ip4_fib_table_walk (ip4_fib_t *fib,
+                    fib_table_walk_fn_t fn,
+                    void *ctx)
 {
-    fib_node_index_t *fib_entry_indicies;
-    fib_node_index_t *fib_entry_index;
     int i;
 
-    fib_entry_indicies = NULL;
-
     for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++)
     {
 	uword * hash = fib->fib_entry_by_dst_address[i];
@@ -398,14 +395,45 @@ ip4_fib_table_show_all (ip4_fib_t *fib,
 
 	    hash_foreach_pair (p, hash,
 	    ({
-		vec_add1(fib_entry_indicies, p->value[0]);
+		fn(p->value[0], ctx);
 	    }));
 	}
     }
+}
+
+/**
+ * Walk show context
+ */
+typedef struct ip4_fib_show_walk_ctx_t_
+{
+    fib_node_index_t *ifsw_indicies;
+} ip4_fib_show_walk_ctx_t;
+
+static int
+ip4_fib_show_walk_cb (fib_node_index_t fib_entry_index,
+                      void *arg)
+{
+    ip4_fib_show_walk_ctx_t *ctx = arg;
+
+    vec_add1(ctx->ifsw_indicies, fib_entry_index);
+
+    return (1);
+}
+
+static void
+ip4_fib_table_show_all (ip4_fib_t *fib,
+			vlib_main_t * vm)
+{
+    ip4_fib_show_walk_ctx_t ctx = {
+        .ifsw_indicies = NULL,
+    };
+    fib_node_index_t *fib_entry_index;
 
-    vec_sort_with_function(fib_entry_indicies, fib_entry_cmp_for_sort);
+    ip4_fib_table_walk(fib, ip4_fib_show_walk_cb, &ctx);
+    vec_sort_with_function(ctx.ifsw_indicies,
+                           fib_entry_cmp_for_sort);
 
-    vec_foreach(fib_entry_index, fib_entry_indicies)
+    vec_foreach(fib_entry_index, ctx.ifsw_indicies)
     {
 	vlib_cli_output(vm, "%U",
                         format_fib_entry,
@@ -413,7 +441,7 @@ ip4_fib_table_show_all (ip4_fib_t *fib,
                         FIB_ENTRY_FORMAT_BRIEF);
     }
 
-    vec_free(fib_entry_indicies);
+    vec_free(ctx.ifsw_indicies);
 }
 
 static void
diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h
index cf312cdc..a8dc68b5 100644
--- a/src/vnet/fib/ip4_fib.h
+++ b/src/vnet/fib/ip4_fib.h
@@ -64,6 +64,15 @@ extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib,
 extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib,
 				    const ip4_address_t * dst);
 
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip4_fib_table_walk(ip4_fib_t *fib,
+                               fib_table_walk_fn_t fn,
+                               void *ctx);
+
 /**
  * @brief Get the FIB at the given index
  */
diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c
index d5b9bdcb..343ff55e 100644
--- a/src/vnet/fib/ip6_fib.c
+++ b/src/vnet/fib/ip6_fib.c
@@ -38,57 +38,6 @@ vnet_ip6_fib_init (u32 fib_index)
 				FIB_ENTRY_FLAG_DROP,
 				ADJ_INDEX_INVALID);
 
-    /*
-     * Add ff02::1:ff00:0/104 via local route for all tables.
-     *  This is required for neighbor discovery to work.
-     */
-    ip6_set_solicited_node_multicast_address(&pfx.fp_addr.ip6, 0);
-    pfx.fp_len = 104;
-    fib_table_entry_special_add(fib_index,
-				&pfx,
-				FIB_SOURCE_SPECIAL,
-				FIB_ENTRY_FLAG_LOCAL,
-				ADJ_INDEX_INVALID);
-
-    /*
-     * Add all-routers multicast address via local route for all tables
-     */
-    ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
-					IP6_MULTICAST_SCOPE_link_local,
-					IP6_MULTICAST_GROUP_ID_all_routers);
-    pfx.fp_len = 128;
-    fib_table_entry_special_add(fib_index,
-				&pfx,
-				FIB_SOURCE_SPECIAL,
-				FIB_ENTRY_FLAG_LOCAL,
-				ADJ_INDEX_INVALID);
-
-    /*
-     * Add all-nodes multicast address via local route for all tables
-     */
-    ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
-					IP6_MULTICAST_SCOPE_link_local,
-					IP6_MULTICAST_GROUP_ID_all_hosts);
-    pfx.fp_len = 128;
-    fib_table_entry_special_add(fib_index,
-				&pfx,
-				FIB_SOURCE_SPECIAL,
-				FIB_ENTRY_FLAG_LOCAL,
-				ADJ_INDEX_INVALID);
-
-    /*
-     *  Add all-mldv2  multicast address via local route for all tables
-     */
-    ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6,
-					IP6_MULTICAST_SCOPE_link_local,
-					IP6_MULTICAST_GROUP_ID_mldv2_routers);
-    pfx.fp_len = 128;
-    fib_table_entry_special_add(fib_index,
-				&pfx,
-				FIB_SOURCE_SPECIAL,
-				FIB_ENTRY_FLAG_LOCAL,
-				ADJ_INDEX_INVALID);
-
     /*
      * all link local for us
      */
@@ -512,27 +461,68 @@ ip6_fib_table_fwding_dpo_remove (u32 fib_index,
     if (--table->dst_address_length_refcounts[len] == 0)
     {
 	table->non_empty_dst_address_length_bitmap =
-            clib_bitmap_set (table->non_empty_dst_address_length_bitmap, 
+            clib_bitmap_set (table->non_empty_dst_address_length_bitmap,
                              128 - len, 0);
 	compute_prefix_lengths_in_search_order (table);
     }
 }
 
+/**
+ * @brief Context when walking the IPv6 table. Since all VRFs are in the
+ * same hash table, we need to filter only those we need as we walk
+ */
+typedef struct ip6_fib_walk_ctx_t_
+{
+    u32 i6w_fib_index;
+    fib_table_walk_fn_t i6w_fn;
+    void *i6w_ctx;
+} ip6_fib_walk_ctx_t;
+
+static int
+ip6_fib_walk_cb (clib_bihash_kv_24_8_t * kvp,
+                 void *arg)
+{
+    ip6_fib_walk_ctx_t *ctx = arg;
+
+    if ((kvp->key[2] >> 32) == ctx->i6w_fib_index)
+    {
+        ctx->i6w_fn(kvp->value, ctx->i6w_ctx);
+    }
+
+    return (1);
+}
+
+void
+ip6_fib_table_walk (u32 fib_index,
+                    fib_table_walk_fn_t fn,
+                    void *arg)
+{
+    ip6_fib_walk_ctx_t ctx = {
+        .i6w_fib_index = fib_index,
+        .i6w_fn = fn,
+        .i6w_ctx = arg,
+    };
+    ip6_main_t *im = &ip6_main;
+
+    BV(clib_bihash_foreach_key_value_pair)(&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
+					   ip6_fib_walk_cb,
+					   &ctx);
+
+}
+
 typedef struct ip6_fib_show_ctx_t_ {
-    u32 fib_index;
     fib_node_index_t *entries;
 } ip6_fib_show_ctx_t;
 
-static void
-ip6_fib_table_collect_entries (clib_bihash_kv_24_8_t * kvp,
-			       void *arg)
+static int
+ip6_fib_table_show_walk (fib_node_index_t fib_entry_index,
+                         void *arg)
 {
     ip6_fib_show_ctx_t *ctx = arg;
 
-    if ((kvp->key[2] >> 32) == ctx->fib_index)
-    {
-	vec_add1(ctx->entries, kvp->value);
-    }
+    vec_add1(ctx->entries, fib_entry_index);
+
+    return (1);
 }
 
 static void
@@ -541,15 +531,10 @@ ip6_fib_table_show_all (ip6_fib_t *fib,
 {
     fib_node_index_t *fib_entry_index;
     ip6_fib_show_ctx_t ctx = {
-	.fib_index = fib->index,
 	.entries = NULL,
     };
-    ip6_main_t *im = &ip6_main;
-
-    BV(clib_bihash_foreach_key_value_pair)(&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
-					   ip6_fib_table_collect_entries,
-					   &ctx);
 
+    ip6_fib_table_walk(fib->index, ip6_fib_table_show_walk, &ctx);
     vec_sort_with_function(ctx.entries, fib_entry_cmp_for_sort);
 
     vec_foreach(fib_entry_index, ctx.entries)
diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h
index f6af993a..78da3746 100644
--- a/src/vnet/fib/ip6_fib.h
+++ b/src/vnet/fib/ip6_fib.h
@@ -54,9 +54,18 @@ u32 ip6_fib_table_fwding_lookup_with_if_index(ip6_main_t * im,
 					      u32 sw_if_index,
 					      const ip6_address_t * dst);
 u32 ip6_fib_table_fwding_lookup(ip6_main_t * im,
-				u32 fib_index, 
+				u32 fib_index,
 				const ip6_address_t * dst);
 
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip6_fib_table_walk(u32 fib_index,
+                               fib_table_walk_fn_t fn,
+                               void *ctx);
+
 /**
  * @biref return the DPO that the LB stacks on.
  */
diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c
index 6a9b1ac2..cc657975 100644
--- a/src/vnet/fib/mpls_fib.c
+++ b/src/vnet/fib/mpls_fib.c
@@ -44,10 +44,11 @@
  * Switching between schemes based on observed/measured action similarity is not
  * considered on the grounds of complexity and flip-flopping.
  *
- * VPP mantra - favour performance over memory. We choose a 21 bit key.  
+ * VPP mantra - favour performance over memory. We choose a 21 bit key.
  */
 
 #include <vnet/fib/fib_table.h>
+#include <vnet/fib/mpls_fib.h>
 #include <vnet/dpo/load_balance.h>
 #include <vnet/dpo/drop_dpo.h>
 #include <vnet/dpo/punt_dpo.h>
@@ -342,6 +343,20 @@ mpls_fib_table_get_flow_hash_config (u32 fib_index)
     return (0);
 }
 
+void
+mpls_fib_table_walk (mpls_fib_t *mpls_fib,
+                     fib_table_walk_fn_t fn,
+                     void *ctx)
+{
+    fib_node_index_t lfei;
+    mpls_label_t key;
+
+    hash_foreach(key, lfei, mpls_fib->mf_entries,
+    ({
+	fn(lfei, ctx);
+    }));
+}
+
 static void
 mpls_fib_table_show_all (const mpls_fib_t *mpls_fib,
 			 vlib_main_t * vm)
diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h
index 93ae4623..e2ef9253 100644
--- a/src/vnet/fib/mpls_fib.h
+++ b/src/vnet/fib/mpls_fib.h
@@ -70,6 +70,15 @@ extern void mpls_fib_forwarding_table_reset(mpls_fib_t *mf,
 					    mpls_label_t label,
 					    mpls_eos_bit_t eos);
 
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void mpls_fib_table_walk(mpls_fib_t *fib,
+                                fib_table_walk_fn_t fn,
+                                void *ctx);
+
 /**
  * @brief
  *  Lookup a label and EOS bit in the MPLS_FIB table to retrieve the
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index 5371696c..65f6e7a7 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -395,6 +395,41 @@ define ip_add_del_route_reply
   i32 retval;
 };
 
+/** \brief Add / del route request
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param sw_if_index - software index of the new vlan's parent interface
+    @param vrf_id - fib table /vrf associated with the route
+
+    FIXME
+*/
+define ip_mroute_add_del
+{
+  u32 client_index;
+  u32 context;
+  u32 next_hop_sw_if_index;
+  u32 table_id;
+  u32 entry_flags;
+  u32 itf_flags;
+  u16 grp_address_length;
+  u8 create_vrf_if_needed;
+  u8 is_add;
+  u8 is_ipv6;
+  u8 is_local;
+  u8 grp_address[16];
+  u8 src_address[16];
+};
+
+/** \brief Reply for add / del mroute request
+    @param context - returned sender context, to match reply w/ request
+    @param retval - return code
+*/
+define ip_mroute_add_del_reply
+{
+  u32 context;
+  i32 retval;
+};
+
 define ip_address_details
 {
   u32 client_index;
@@ -424,6 +459,24 @@ define ip_dump
   u8 is_ipv6;
 };
 
+define mfib_signal_dump
+{
+  u32 client_index;
+  u32 context;
+};
+
+define mfib_signal_details
+{
+  u32 client_index;
+  u32 context;
+  u32 sw_if_index;
+  u32 table_id;
+  u16 grp_address_len;
+  u8 grp_address[16];
+  u8 src_address[16];
+  u16 ip_packet_len;
+  u8 ip_packet_data[256];
+};
 
 /*
  * Local Variables:
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index cc33dff4..b184fbae 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -68,6 +68,18 @@ typedef struct ip4_fib_t
 
 } ip4_fib_t;
 
+typedef struct ip4_mfib_t
+{
+  /* Hash table for each prefix length mapping. */
+  uword *fib_entry_by_dst_address[65];
+
+  /* Table ID (hash key) for this FIB. */
+  u32 table_id;
+
+  /* Index into FIB vector. */
+  u32 index;
+} ip4_mfib_t;
+
 struct ip4_main_t;
 
 typedef void (ip4_add_del_interface_address_function_t)
@@ -99,11 +111,17 @@ typedef struct ip4_main_t
   /** Vector of FIBs. */
   struct fib_table_t_ *fibs;
 
+  /** Vector of MFIBs. */
+  struct mfib_table_t_ *mfibs;
+
   u32 fib_masks[33];
 
   /** Table index indexed by software interface. */
   u32 *fib_index_by_sw_if_index;
 
+  /** Table index indexed by software interface. */
+  u32 *mfib_index_by_sw_if_index;
+
   /* IP4 enabled count by software interface */
   u8 *ip_enabled_by_sw_if_index;
 
@@ -111,6 +129,10 @@ typedef struct ip4_main_t
      ID space is not necessarily dense; index space is dense. */
   uword *fib_index_by_table_id;
 
+  /** Hash table mapping table id to multicast fib index.
+     ID space is not necessarily dense; index space is dense. */
+  uword *mfib_index_by_table_id;
+
   /** Functions to call when interface address changes. */
     ip4_add_del_interface_address_callback_t
     * add_del_interface_address_callbacks;
@@ -140,7 +162,9 @@ extern ip4_main_t ip4_main;
 /** Global ip4 input node.  Errors get attached to ip4 input node. */
 extern vlib_node_registration_t ip4_input_node;
 extern vlib_node_registration_t ip4_lookup_node;
+extern vlib_node_registration_t ip4_local_node;
 extern vlib_node_registration_t ip4_rewrite_node;
+extern vlib_node_registration_t ip4_rewrite_mcast_node;
 extern vlib_node_registration_t ip4_rewrite_local_node;
 extern vlib_node_registration_t ip4_arp_node;
 extern vlib_node_registration_t ip4_glean_node;
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 87b345bd..8081b34b 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -50,6 +50,7 @@
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/dpo/load_balance.h>
 #include <vnet/dpo/classify_dpo.h>
+#include <vnet/mfib/mfib_table.h>	/* for mFIB table and entry creation */
 
 /**
  * @file
@@ -714,16 +715,17 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
   ip_interface_address_t *ia = 0;
   ip4_address_t *result = 0;
 
-  foreach_ip_interface_address (lm, ia, sw_if_index,
-				1 /* honor unnumbered */ ,
-				(
-				  {
-				  ip4_address_t * a =
-				  ip_interface_address_get_address (lm, ia);
-				  result = a;
-				  break;
-				  }
-				));
+  /* *INDENT-OFF* */
+  foreach_ip_interface_address
+    (lm, ia, sw_if_index,
+     1 /* honor unnumbered */ ,
+     ({
+       ip4_address_t * a =
+         ip_interface_address_get_address (lm, ia);
+       result = a;
+       break;
+     }));
+  /* *INDENT-OFF* */
   if (result_ia)
     *result_ia = result ? ia : 0;
   return result;
@@ -748,9 +750,19 @@ ip4_add_interface_routes (u32 sw_if_index,
     {
       fib_node_index_t fei;
 
-      fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP4, NULL,	/* No next-hop address */
-					     sw_if_index, ~0,	// invalid FIB index
-					     1, NULL,	// no out-label stack
+      fei = fib_table_entry_update_one_path (fib_index, &pfx,
+                                             FIB_SOURCE_INTERFACE,
+                                             (FIB_ENTRY_FLAG_CONNECTED |
+                                              FIB_ENTRY_FLAG_ATTACHED),
+                                             FIB_PROTOCOL_IP4,
+                                             /* No next-hop address */
+                                             NULL,
+					     sw_if_index,
+                                             // invalid FIB index
+                                             ~0,
+					     1,
+                                             // no out-label stack
+                                             NULL,
 					     FIB_ROUTE_PATH_FLAG_NONE);
       a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
     }
@@ -778,8 +790,16 @@ ip4_add_interface_routes (u32 sw_if_index,
 	}
     }
 
-  fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP4, &pfx.fp_addr, sw_if_index, ~0,	// invalid FIB index
-				   1, NULL,	// no out-label stack
+  fib_table_entry_update_one_path (fib_index, &pfx,
+                                   FIB_SOURCE_INTERFACE,
+                                   (FIB_ENTRY_FLAG_CONNECTED |
+                                    FIB_ENTRY_FLAG_LOCAL),
+                                   FIB_PROTOCOL_IP4,
+                                   &pfx.fp_addr,
+                                   sw_if_index,
+                                   // invalid FIB index
+                                   ~0,
+				   1, NULL,
 				   FIB_ROUTE_PATH_FLAG_NONE);
 }
 
@@ -827,9 +847,10 @@ ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
 			       !is_enable, 0, 0);
 
-  vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
-			       !is_enable, 0, 0);
 
+  vnet_feature_enable_disable ("ip4-multicast",
+			       "ip4-mfib-forward-lookup",
+			       sw_if_index, is_enable, 0, 0);
 }
 
 static clib_error_t *
@@ -855,36 +876,37 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
    * most routers do.
    */
+  /* *INDENT-OFF* */
   if (!is_del)
     {
       /* When adding an address check that it does not conflict
          with an existing address. */
       ip_interface_address_t *ia;
-      foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
-				    0 /* honor unnumbered */ ,
-				    (
-				      {
-				      ip4_address_t * x =
-				      ip_interface_address_get_address
-				      (&im->lookup_main, ia);
-				      if (ip4_destination_matches_route
-					  (im, address, x, ia->address_length)
-					  ||
-					  ip4_destination_matches_route (im,
-									 x,
-									 address,
-									 address_length))
-				      return
-				      clib_error_create
-				      ("failed to add %U which conflicts with %U for interface %U",
-				       format_ip4_address_and_length, address,
-				       address_length,
-				       format_ip4_address_and_length, x,
-				       ia->address_length,
-				       format_vnet_sw_if_index_name, vnm,
-				       sw_if_index);}
-				    ));
+      foreach_ip_interface_address
+        (&im->lookup_main, ia, sw_if_index,
+         0 /* honor unnumbered */ ,
+         ({
+           ip4_address_t * x =
+             ip_interface_address_get_address
+             (&im->lookup_main, ia);
+           if (ip4_destination_matches_route
+               (im, address, x, ia->address_length) ||
+               ip4_destination_matches_route (im,
+                                              x,
+                                              address,
+                                              address_length))
+             return
+               clib_error_create
+               ("failed to add %U which conflicts with %U for interface %U",
+                format_ip4_address_and_length, address,
+                address_length,
+                format_ip4_address_and_length, x,
+                ia->address_length,
+                format_vnet_sw_if_index_name, vnm,
+                sw_if_index);
+         }));
     }
+  /* *INDENT-ON* */
 
   elts_before = pool_elts (lm->if_address_pool);
 
@@ -918,9 +940,10 @@ done:
 }
 
 clib_error_t *
-ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
-			       ip4_address_t * address, u32 address_length,
-			       u32 is_del)
+ip4_add_del_interface_address (vlib_main_t * vm,
+			       u32 sw_if_index,
+			       ip4_address_t * address,
+			       u32 address_length, u32 is_del)
 {
   return ip4_add_del_interface_address_internal
     (vm, sw_if_index, address, address_length, is_del);
@@ -1027,13 +1050,13 @@ VNET_FEATURE_INIT (ip4_vpath_mc, static) =
 {
   .arc_name = "ip4-multicast",
   .node_name = "vpath-input-ip4",
-  .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
+  .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
 };
 
 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
 {
   .arc_name = "ip4-multicast",
-  .node_name = "ip4-lookup-multicast",
+  .node_name = "ip4-mfib-forward-lookup",
   .runs_before = VNET_FEATURES ("ip4-drop"),
 };
 
@@ -1083,6 +1106,7 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
 
   /* Fill in lookup tables with default table (0). */
   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+  vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
 
   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
 			       is_add, 0, 0);
@@ -1123,6 +1147,7 @@ ip4_lookup_init (vlib_main_t * vm)
 
   /* Create FIB with index 0 and table id of 0. */
   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
+  mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
 
   {
     pg_node_t *pn;
@@ -1341,27 +1366,35 @@ ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
 }
 
+/* *INDENT-OFF* */
 VLIB_REGISTER_NODE (ip4_drop_node, static) =
 {
-  .function = ip4_drop,.name = "ip4-drop",.vector_size =
-    sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
-    1,.next_nodes =
-  {
-  [0] = "error-drop",}
-,};
+  .function = ip4_drop,.
+  name = "ip4-drop",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ip4_forward_next_trace,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
 
 VLIB_REGISTER_NODE (ip4_punt_node, static) =
 {
-  .function = ip4_punt,.name = "ip4-punt",.vector_size =
-    sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
-    1,.next_nodes =
-  {
-  [0] = "error-punt",}
-,};
+  .function = ip4_punt,
+  .name = "ip4-punt",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ip4_forward_next_trace,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-punt",
+  },
+};
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
+/* *INDENT-ON */
 
 /* Compute TCP/UDP/ICMP4 checksum in software. */
 u16
@@ -1500,13 +1533,15 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
 	  fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
 				vnet_buffer (p0)->sw_if_index[VLIB_RX]);
-	  fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
-	    fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+	  fib_index0 =
+	    (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+	     (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
 
 	  fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
 				vnet_buffer (p1)->sw_if_index[VLIB_RX]);
-	  fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
-	    fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+	  fib_index1 =
+	    (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
+	     (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
 
 	  mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
 	  mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
@@ -1726,8 +1761,9 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
 	  fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
 				vnet_buffer (p0)->sw_if_index[VLIB_RX]);
-	  fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
-	    fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+	  fib_index0 =
+	    (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+	     (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
 
 	  mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
 
@@ -1838,10 +1874,11 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   return frame->n_vectors;
 }
 
-VLIB_REGISTER_NODE (ip4_local_node, static) =
+VLIB_REGISTER_NODE (ip4_local_node) =
 {
   .function = ip4_local,.name = "ip4-local",.vector_size =
-    sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
+    sizeof (u32),.format_trace =
+    format_ip4_forward_next_trace,.n_next_nodes =
     IP_LOCAL_N_NEXT,.next_nodes =
   {
   [IP_LOCAL_NEXT_DROP] = "error-drop",
@@ -2022,8 +2059,8 @@ ip4_arp_inline (vlib_main_t * vm,
 	   * Can happen if the control-plane is programming tables
 	   * with traffic flowing; at least that's today's lame excuse.
 	   */
-	  if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
-	      (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
+	  if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
+	      || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
 	    {
 	      p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
 	    }
@@ -2196,15 +2233,17 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
     {
       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
       return clib_error_return
-	(0, "no matching interface address for destination %U (interface %U)",
-	 format_ip4_address, dst,
-	 format_vnet_sw_if_index_name, vnm, sw_if_index);
+	(0,
+	 "no matching interface address for destination %U (interface %U)",
+	 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
+	 sw_if_index);
     }
 
   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
 
   h =
-    vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
+    vlib_packet_template_get_packet (vm,
+				     &im->ip4_arp_request_packet_template,
 				     &bi);
 
   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
@@ -2243,7 +2282,7 @@ typedef enum
 always_inline uword
 ip4_rewrite_inline (vlib_main_t * vm,
 		    vlib_node_runtime_t * node,
-		    vlib_frame_t * frame, int is_midchain)
+		    vlib_frame_t * frame, int is_midchain, int is_mcast)
 {
   ip_lookup_main_t *lm = &ip4_main.lookup_main;
   u32 *from = vlib_frame_vector_args (frame);
@@ -2457,6 +2496,14 @@ ip4_rewrite_inline (vlib_main_t * vm,
 	      adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
 	      adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
 	    }
+	  if (is_mcast)
+	    {
+	      /*
+	       * copy bytes from the IP address into the MAC rewrite
+	       */
+	      vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1);
+	      vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1, 1);
+	    }
 
 	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
 					   to_next, n_left_to_next,
@@ -2530,6 +2577,13 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
 	  /* Guess we are only writing on simple Ethernet header. */
 	  vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+	  if (is_mcast)
+	    {
+	      /*
+	       * copy bytes from the IP address into the MAC rewrite
+	       */
+	      vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1);
+	    }
 
 	  /* Update packet buffer attributes/set output interface. */
 	  rw_len0 = adj0[0].rewrite_header.data_bytes;
@@ -2624,36 +2678,58 @@ static uword
 ip4_rewrite (vlib_main_t * vm,
 	     vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
-  return ip4_rewrite_inline (vm, node, frame, 0);
+  return ip4_rewrite_inline (vm, node, frame, 0, 0);
 }
 
 static uword
 ip4_midchain (vlib_main_t * vm,
 	      vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
-  return ip4_rewrite_inline (vm, node, frame, 1);
+  return ip4_rewrite_inline (vm, node, frame, 1, 0);
 }
 
-
-VLIB_REGISTER_NODE (ip4_rewrite_node) =
+static uword
+ip4_rewrite_mcast (vlib_main_t * vm,
+		   vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
-  .function = ip4_rewrite,.name = "ip4-rewrite",.vector_size =
-    sizeof (u32),.format_trace = format_ip4_rewrite_trace,.n_next_nodes =
-    2,.next_nodes =
-  {
-  [IP4_REWRITE_NEXT_DROP] = "error-drop",
-      [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",}
-,};
+  return ip4_rewrite_inline (vm, node, frame, 0, 1);
+}
 
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite);
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_rewrite_node) = {
+  .function = ip4_rewrite,
+  .name = "ip4-rewrite",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_ip4_rewrite_trace,
+
+  .n_next_nodes = 2,
+  .next_nodes = {
+    [IP4_REWRITE_NEXT_DROP] = "error-drop",
+    [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+  },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
 
-VLIB_REGISTER_NODE (ip4_midchain_node) =
-{
-.function = ip4_midchain,.name = "ip4-midchain",.vector_size =
-    sizeof (u32),.format_trace = format_ip4_forward_next_trace,.sibling_of =
-    "ip4-rewrite",};
+VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
+  .function = ip4_rewrite_mcast,
+  .name = "ip4-rewrite-mcast",
+  .vector_size = sizeof (u32),
 
+  .format_trace = format_ip4_rewrite_trace,
+  .sibling_of = "ip4-rewrite",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
+
+VLIB_REGISTER_NODE (ip4_midchain_node) = {
+  .function = ip4_midchain,
+  .name = "ip4-midchain",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ip4_forward_next_trace,
+  .sibling_of =  "ip4-rewrite",
+};
 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
+/* *INDENT-ON */
 
 static clib_error_t *
 add_del_interface_table (vlib_main_t * vm,
@@ -2695,6 +2771,11 @@ add_del_interface_table (vlib_main_t * vm,
     //
     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
+
+    fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+                                                   table_id);
+    vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+    im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
   }
 
 done:
@@ -2730,243 +2811,6 @@ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
 };
 /* *INDENT-ON* */
 
-
-static uword
-ip4_lookup_multicast (vlib_main_t * vm,
-		      vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
-  ip4_main_t *im = &ip4_main;
-  vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
-  u32 n_left_from, n_left_to_next, *from, *to_next;
-  ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
-
-      while (n_left_from >= 4 && n_left_to_next >= 2)
-	{
-	  vlib_buffer_t *p0, *p1;
-	  u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
-	  ip_lookup_next_t next0, next1;
-	  ip4_header_t *ip0, *ip1;
-	  u32 fib_index0, fib_index1;
-	  const dpo_id_t *dpo0, *dpo1;
-	  const load_balance_t *lb0, *lb1;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p2, *p3;
-
-	    p2 = vlib_get_buffer (vm, from[2]);
-	    p3 = vlib_get_buffer (vm, from[3]);
-
-	    vlib_prefetch_buffer_header (p2, LOAD);
-	    vlib_prefetch_buffer_header (p3, LOAD);
-
-	    CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
-	    CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
-	  }
-
-	  pi0 = to_next[0] = from[0];
-	  pi1 = to_next[1] = from[1];
-
-	  p0 = vlib_get_buffer (vm, pi0);
-	  p1 = vlib_get_buffer (vm, pi1);
-
-	  ip0 = vlib_buffer_get_current (p0);
-	  ip1 = vlib_buffer_get_current (p1);
-
-	  fib_index0 =
-	    vec_elt (im->fib_index_by_sw_if_index,
-		     vnet_buffer (p0)->sw_if_index[VLIB_RX]);
-	  fib_index1 =
-	    vec_elt (im->fib_index_by_sw_if_index,
-		     vnet_buffer (p1)->sw_if_index[VLIB_RX]);
-	  fib_index0 =
-	    (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
-	     (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
-	  fib_index1 =
-	    (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
-	     (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
-
-	  lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
-					       &ip0->dst_address);
-	  lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index1),
-					       &ip1->dst_address);
-
-	  lb0 = load_balance_get (lb_index0);
-	  lb1 = load_balance_get (lb_index1);
-
-	  ASSERT (lb0->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb0->lb_n_buckets));
-	  ASSERT (lb1->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb1->lb_n_buckets));
-
-	  vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
-	    (ip0, lb0->lb_hash_config);
-
-	  vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
-	    (ip1, lb1->lb_hash_config);
-
-	  dpo0 = load_balance_get_bucket_i (lb0,
-					    (vnet_buffer (p0)->ip.flow_hash &
-					     (lb0->lb_n_buckets_minus_1)));
-	  dpo1 = load_balance_get_bucket_i (lb1,
-					    (vnet_buffer (p1)->ip.flow_hash &
-					     (lb1->lb_n_buckets_minus_1)));
-
-	  next0 = dpo0->dpoi_next_node;
-	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-	  next1 = dpo1->dpoi_next_node;
-	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-
-	  if (1)		/* $$$$$$ HACK FIXME */
-	    vlib_increment_combined_counter
-	      (cm, cpu_index, lb_index0, 1,
-	       vlib_buffer_length_in_chain (vm, p0));
-	  if (1)		/* $$$$$$ HACK FIXME */
-	    vlib_increment_combined_counter
-	      (cm, cpu_index, lb_index1, 1,
-	       vlib_buffer_length_in_chain (vm, p1));
-
-	  from += 2;
-	  to_next += 2;
-	  n_left_to_next -= 2;
-	  n_left_from -= 2;
-
-	  wrong_next = (next0 != next) + 2 * (next1 != next);
-	  if (PREDICT_FALSE (wrong_next != 0))
-	    {
-	      switch (wrong_next)
-		{
-		case 1:
-		  /* A B A */
-		  to_next[-2] = pi1;
-		  to_next -= 1;
-		  n_left_to_next += 1;
-		  vlib_set_next_frame_buffer (vm, node, next0, pi0);
-		  break;
-
-		case 2:
-		  /* A A B */
-		  to_next -= 1;
-		  n_left_to_next += 1;
-		  vlib_set_next_frame_buffer (vm, node, next1, pi1);
-		  break;
-
-		case 3:
-		  /* A B C */
-		  to_next -= 2;
-		  n_left_to_next += 2;
-		  vlib_set_next_frame_buffer (vm, node, next0, pi0);
-		  vlib_set_next_frame_buffer (vm, node, next1, pi1);
-		  if (next0 == next1)
-		    {
-		      /* A B B */
-		      vlib_put_next_frame (vm, node, next, n_left_to_next);
-		      next = next1;
-		      vlib_get_next_frame (vm, node, next, to_next,
-					   n_left_to_next);
-		    }
-		}
-	    }
-	}
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  vlib_buffer_t *p0;
-	  ip4_header_t *ip0;
-	  u32 pi0, lb_index0;
-	  ip_lookup_next_t next0;
-	  u32 fib_index0;
-	  const dpo_id_t *dpo0;
-	  const load_balance_t *lb0;
-
-	  pi0 = from[0];
-	  to_next[0] = pi0;
-
-	  p0 = vlib_get_buffer (vm, pi0);
-
-	  ip0 = vlib_buffer_get_current (p0);
-
-	  fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
-				vnet_buffer (p0)->sw_if_index[VLIB_RX]);
-	  fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
-	    fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
-
-	  lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
-					       &ip0->dst_address);
-
-	  lb0 = load_balance_get (lb_index0);
-
-	  ASSERT (lb0->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb0->lb_n_buckets));
-
-	  vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
-	    (ip0, lb0->lb_hash_config);
-
-	  dpo0 = load_balance_get_bucket_i (lb0,
-					    (vnet_buffer (p0)->ip.flow_hash &
-					     (lb0->lb_n_buckets_minus_1)));
-
-	  next0 = dpo0->dpoi_next_node;
-	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
-	  if (1)		/* $$$$$$ HACK FIXME */
-	    vlib_increment_combined_counter
-	      (cm, cpu_index, lb_index0, 1,
-	       vlib_buffer_length_in_chain (vm, p0));
-
-	  from += 1;
-	  to_next += 1;
-	  n_left_to_next -= 1;
-	  n_left_from -= 1;
-
-	  if (PREDICT_FALSE (next0 != next))
-	    {
-	      n_left_to_next += 1;
-	      vlib_put_next_frame (vm, node, next, n_left_to_next);
-	      next = next0;
-	      vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
-	      to_next[0] = pi0;
-	      to_next += 1;
-	      n_left_to_next -= 1;
-	    }
-	}
-
-      vlib_put_next_frame (vm, node, next, n_left_to_next);
-    }
-
-  if (node->flags & VLIB_NODE_FLAG_TRACE)
-    ip4_forward_next_trace (vm, node, frame, VLIB_TX);
-
-  return frame->n_vectors;
-}
-
-VLIB_REGISTER_NODE (ip4_lookup_multicast_node, static) =
-{
-.function = ip4_lookup_multicast,.name =
-    "ip4-lookup-multicast",.vector_size = sizeof (u32),.sibling_of =
-    "ip4-lookup",.format_trace = format_ip4_lookup_trace,.n_next_nodes = 0,};
-
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node,
-			      ip4_lookup_multicast);
-
-VLIB_REGISTER_NODE (ip4_multicast_node, static) =
-{
-  .function = ip4_drop,.name = "ip4-multicast",.vector_size =
-    sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
-    1,.next_nodes =
-  {
-  [0] = "error-drop",}
-,};
-
 int
 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
 {
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index 1cf5e0b8..ba200a9f 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -426,7 +426,7 @@ VLIB_REGISTER_NODE (ip4_input_node) = {
     [IP4_INPUT_NEXT_DROP] = "error-drop",
     [IP4_INPUT_NEXT_PUNT] = "error-punt",
     [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
-    [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast",
+    [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
     [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
   },
 
@@ -448,7 +448,7 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = {
     [IP4_INPUT_NEXT_DROP] = "error-drop",
     [IP4_INPUT_NEXT_PUNT] = "error-punt",
     [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
-    [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast",
+    [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
     [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
   },
 
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index f493db01..6fecd42d 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -49,6 +49,7 @@
 #include <stdbool.h>
 #include <vppinfra/bihash_24_8.h>
 #include <vppinfra/bihash_template.h>
+#include <vnet/util/radix.h>
 
 /*
  * Default size of the ip6 fib hash table
@@ -75,6 +76,21 @@ typedef struct
   flow_hash_config_t flow_hash_config;
 } ip6_fib_t;
 
+typedef struct ip6_mfib_t
+{
+  /* Table ID (hash key) for this FIB. */
+  u32 table_id;
+
+  /* Index into FIB vector. */
+  u32 index;
+
+  /*
+   *  Pointer to the top of a radix tree.
+   * This cannot be realloc'd, hence it cannot be inlined with this table
+   */
+  struct radix_node_head *rhead;
+} ip6_mfib_t;
+
 struct ip6_main_t;
 
 typedef void (ip6_add_del_interface_address_function_t)
@@ -137,12 +153,18 @@ typedef struct ip6_main_t
   /* Pool of FIBs. */
   struct fib_table_t_ *fibs;
 
+  /** Vector of MFIBs. */
+  struct mfib_table_t_ *mfibs;
+
   /* Network byte orders subnet mask for each prefix length */
   ip6_address_t fib_masks[129];
 
   /* Table index indexed by software interface. */
   u32 *fib_index_by_sw_if_index;
 
+  /** Table index indexed by software interface. */
+  u32 *mfib_index_by_sw_if_index;
+
   /* IP6 enabled count by software interface */
   u8 *ip_enabled_by_sw_if_index;
 
@@ -150,6 +172,10 @@ typedef struct ip6_main_t
      ID space is not necessarily dense; index space is dense. */
   uword *fib_index_by_table_id;
 
+  /** Hash table mapping table id to multicast fib index.
+     ID space is not necessarily dense; index space is dense. */
+  uword *mfib_index_by_table_id;
+
   /* Hash table mapping interface rewrite adjacency index by sw if index. */
   uword *interface_route_adj_index_by_sw_if_index;
 
@@ -185,6 +211,7 @@ extern ip6_main_t ip6_main;
 /* Global ip6 input node.  Errors get attached to ip6 input node. */
 extern vlib_node_registration_t ip6_input_node;
 extern vlib_node_registration_t ip6_rewrite_node;
+extern vlib_node_registration_t ip6_rewrite_mcast_node;
 extern vlib_node_registration_t ip6_rewrite_local_node;
 extern vlib_node_registration_t ip6_discover_neighbor_node;
 extern vlib_node_registration_t ip6_glean_node;
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 232f7283..ac47b3ad 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -42,8 +42,8 @@
 #include <vnet/ethernet/ethernet.h>	/* for ethernet_header_t */
 #include <vnet/srp/srp.h>	/* for srp_hw_interface_class */
 #include <vppinfra/cache.h>
-#include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/ip6_mfib.h>
 #include <vnet/dpo/load_balance.h>
 #include <vnet/dpo/classify_dpo.h>
 
@@ -411,11 +411,14 @@ ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
 	return;
     }
 
+  if (sw_if_index != 0)
+    ip6_mfib_interface_enable_disable (sw_if_index, is_enable);
+
   vnet_feature_enable_disable ("ip6-unicast", "ip6-lookup", sw_if_index,
 			       is_enable, 0, 0);
 
-  vnet_feature_enable_disable ("ip6-multicast", "ip6-lookup", sw_if_index,
-			       is_enable, 0, 0);
+  vnet_feature_enable_disable ("ip6-multicast", "ip6-mfib-forward-lookup",
+			       sw_if_index, is_enable, 0, 0);
 
 }
 
@@ -457,6 +460,8 @@ ip6_add_del_interface_address (vlib_main_t * vm,
   ip6_address_fib_t ip6_af, *addr_fib = 0;
 
   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+  vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+
   ip6_addr_fib_init (&ip6_af, address,
 		     vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
   vec_add1 (addr_fib, ip6_af);
@@ -611,12 +616,12 @@ VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
   .arc_name = "ip6-multicast",
   .node_name = "vpath-input-ip6",
-  .runs_before = VNET_FEATURES ("ip6-lookup"),
+  .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
 };
 
 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
   .arc_name = "ip6-multicast",
-  .node_name = "ip6-lookup",
+  .node_name = "ip6-mfib-forward-lookup",
   .runs_before = VNET_FEATURES ("ip6-drop"),
 };
 
@@ -1122,22 +1127,6 @@ VLIB_REGISTER_NODE (ip6_punt_node, static) =
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt);
 
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_multicast_node, static) =
-{
-  .function = ip6_drop,
-  .name = "ip6-multicast",
-  .vector_size = sizeof (u32),
-  .format_trace = format_ip6_forward_next_trace,
-  .n_next_nodes = 1,
-  .next_nodes =
-  {
-    [0] = "error-drop",
-  },
-};
-
-/* *INDENT-ON* */
-
 /* Compute TCP/UDP/ICMP6 checksum in software. */
 u16
 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
@@ -1977,7 +1966,7 @@ typedef enum
 always_inline uword
 ip6_rewrite_inline (vlib_main_t * vm,
 		    vlib_node_runtime_t * node,
-		    vlib_frame_t * frame, int is_midchain)
+		    vlib_frame_t * frame, int is_midchain, int is_mcast)
 {
   ip_lookup_main_t *lm = &ip6_main.lookup_main;
   u32 *from = vlib_frame_vector_args (frame);
@@ -2165,6 +2154,14 @@ ip6_rewrite_inline (vlib_main_t * vm,
 	      adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
 	      adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
 	    }
+	  if (is_mcast)
+	    {
+	      /*
+	       * copy bytes from the IP address into the MAC rewrite
+	       */
+	      vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 0);
+	      vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1, 0);
+	    }
 
 	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
 					   to_next, n_left_to_next,
@@ -2265,6 +2262,10 @@ ip6_rewrite_inline (vlib_main_t * vm,
 	    {
 	      adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
 	    }
+	  if (is_mcast)
+	    {
+	      vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 0);
+	    }
 
 	  p0->error = error_node->errors[error0];
 
@@ -2292,16 +2293,21 @@ static uword
 ip6_rewrite (vlib_main_t * vm,
 	     vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
-  return ip6_rewrite_inline (vm, node, frame,
-			     /* midchain */ 0);
+  return ip6_rewrite_inline (vm, node, frame, 0, 0);
+}
+
+static uword
+ip6_rewrite_mcast (vlib_main_t * vm,
+		   vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  return ip6_rewrite_inline (vm, node, frame, 0, 1);
 }
 
 static uword
 ip6_midchain (vlib_main_t * vm,
 	      vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
-  return ip6_rewrite_inline (vm, node, frame,
-			     /* midchain */ 1);
+  return ip6_rewrite_inline (vm, node, frame, 1, 0);
 }
 
 /* *INDENT-OFF* */
@@ -2335,10 +2341,22 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) =
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite);
 
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
+{
+  .function = ip6_rewrite_mcast,
+  .name = "ip6-rewrite-mcast",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ip6_rewrite_trace,
+  .sibling_of = "ip6-rewrite",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast);
+
 /*
  * Hop-by-Hop handling
  */
-
 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
 
 #define foreach_ip6_hop_by_hop_error \
@@ -2346,13 +2364,15 @@ _(PROCESSED, "pkts with ip6 hop-by-hop options") \
 _(FORMAT, "incorrectly formatted hop-by-hop options") \
 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
 
+/* *INDENT-OFF* */
 typedef enum
 {
 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
   foreach_ip6_hop_by_hop_error
 #undef _
-    IP6_HOP_BY_HOP_N_ERROR,
+  IP6_HOP_BY_HOP_N_ERROR,
 } ip6_hop_by_hop_error_t;
+/* *INDENT-ON* */
 
 /*
  * Primary h-b-h handler trace support
@@ -2878,6 +2898,7 @@ ip6_lookup_init (vlib_main_t * vm)
 
   /* Create FIB with index 0 and table id of 0. */
   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0);
+  mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0);
 
   {
     pg_node_t *pn;
@@ -2955,6 +2976,12 @@ add_del_ip6_interface_table (vlib_main_t * vm,
 
     vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
     ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+
+    fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
+						    table_id);
+
+    vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index);
+    ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index;
   }
 
 
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
index bbc2ceba..20306088 100644
--- a/src/vnet/ip/ip6_input.c
+++ b/src/vnet/ip/ip6_input.c
@@ -64,6 +64,7 @@ typedef enum
 {
   IP6_INPUT_NEXT_DROP,
   IP6_INPUT_NEXT_LOOKUP,
+  IP6_INPUT_NEXT_LOOKUP_MULTICAST,
   IP6_INPUT_NEXT_ICMP_ERROR,
   IP6_INPUT_N_NEXT,
 } ip6_input_next_t;
@@ -142,12 +143,27 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
 	  sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
 
-	  arc0 =
-	    ip6_address_is_multicast (&ip0->dst_address) ?
-	    lm->mcast_feature_arc_index : lm->ucast_feature_arc_index;
-	  arc1 =
-	    ip6_address_is_multicast (&ip1->dst_address) ?
-	    lm->mcast_feature_arc_index : lm->ucast_feature_arc_index;
+	  if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address)))
+	    {
+	      arc0 = lm->mcast_feature_arc_index;
+	      next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST;
+	    }
+	  else
+	    {
+	      arc0 = lm->ucast_feature_arc_index;
+	      next0 = IP6_INPUT_NEXT_LOOKUP;
+	    }
+
+	  if (PREDICT_FALSE (ip6_address_is_multicast (&ip1->dst_address)))
+	    {
+	      arc1 = lm->mcast_feature_arc_index;
+	      next1 = IP6_INPUT_NEXT_LOOKUP_MULTICAST;
+	    }
+	  else
+	    {
+	      arc1 = lm->ucast_feature_arc_index;
+	      next1 = IP6_INPUT_NEXT_LOOKUP;
+	    }
 
 	  vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
 	  vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0;
@@ -240,9 +256,17 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  ip0 = vlib_buffer_get_current (p0);
 
 	  sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
-	  arc0 =
-	    ip6_address_is_multicast (&ip0->dst_address) ?
-	    lm->mcast_feature_arc_index : lm->ucast_feature_arc_index;
+	  if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address)))
+	    {
+	      arc0 = lm->mcast_feature_arc_index;
+	      next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST;
+	    }
+	  else
+	    {
+	      arc0 = lm->ucast_feature_arc_index;
+	      next0 = IP6_INPUT_NEXT_LOOKUP;
+	    }
+
 	  vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 
@@ -313,6 +337,7 @@ VLIB_REGISTER_NODE (ip6_input_node) = {
     [IP6_INPUT_NEXT_DROP] = "error-drop",
     [IP6_INPUT_NEXT_LOOKUP] = "ip6-lookup",
     [IP6_INPUT_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+    [IP6_INPUT_NEXT_LOOKUP_MULTICAST] = "ip6-mfib-forward-lookup",
   },
 
   .format_buffer = format_ip6_header,
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index 46c0e316..46d04769 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -21,6 +21,7 @@
 #include <vppinfra/mhash.h>
 #include <vppinfra/md5.h>
 #include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip6_fib.h>
 
@@ -116,9 +117,7 @@ typedef struct
   u32 seed;
   u64 randomizer;
   int ref_count;
-  adj_index_t all_nodes_adj_index;
-  adj_index_t all_routers_adj_index;
-  adj_index_t all_mldv2_routers_adj_index;
+  adj_index_t mcast_adj_index;
 
   /* timing information */
 #define DEF_MAX_RADV_INTERVAL 200
@@ -474,33 +473,72 @@ ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
 
   nbr = ip6_nd_find (sw_if_index, &adj->sub_type.nbr.next_hop.ip6);
 
-  if (NULL != nbr)
-    {
-      adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address,
-			ip6_nd_mk_complete_walk, nbr);
-    }
-  else
+  switch (adj->lookup_next_index)
     {
+    case IP_LOOKUP_NEXT_ARP:
+    case IP_LOOKUP_NEXT_GLEAN:
+      if (NULL != nbr)
+	{
+	  adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address,
+			    ip6_nd_mk_complete_walk, nbr);
+	}
+      else
+	{
+	  /*
+	   * no matching ND entry.
+	   * construct the rewrite required to for an ND packet, and stick
+	   * that in the adj's pipe to smoke.
+	   */
+	  adj_nbr_update_rewrite (ai,
+				  ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+				  ethernet_build_rewrite (vnm,
+							  sw_if_index,
+							  VNET_LINK_IP6,
+							  VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+	  /*
+	   * since the FIB has added this adj for a route, it makes sense it may
+	   * want to forward traffic sometime soon. Let's send a speculative ND.
+	   * just one. If we were to do periodically that wouldn't be bad either,
+	   * but that's more code than i'm prepared to write at this time for
+	   * relatively little reward.
+	   */
+	  ip6_nbr_probe (adj);
+	}
+      break;
+    case IP_LOOKUP_NEXT_MCAST:
       /*
-       * no matching ND entry.
-       * construct the rewrite required to for an ND packet, and stick
-       * that in the adj's pipe to smoke.
+       * Construct a partial rewrite from the known ethernet mcast dest MAC
        */
-      adj_nbr_update_rewrite (ai,
-			      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
-			      ethernet_build_rewrite (vnm,
-						      sw_if_index,
-						      VNET_LINK_IP6,
-						      VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+      adj_mcast_update_rewrite
+	(ai,
+	 ethernet_build_rewrite (vnm,
+				 sw_if_index,
+				 adj->ia_link,
+				 ethernet_ip6_mcast_dst_addr ()));
 
       /*
-       * since the FIB has added this adj for a route, it makes sense it may
-       * want to forward traffic sometime soon. Let's send a speculative ND.
-       * just one. If we were to do periodically that wouldn't be bad either,
-       * but that's more code than i'm prepared to write at this time for
-       * relatively little reward.
+       * Complete the remaining fields of the adj's rewrite to direct the
+       * complete of the rewrite at switch time by copying in the IP
+       * dst address's bytes.
+       * Ofset is 12 bytes from the end of the MAC header - which is 2
+       * bytes into the desintation address. And we write 4 bytes.
        */
-      ip6_nbr_probe (adj);
+      adj->rewrite_header.dst_mcast_offset = 12;
+      adj->rewrite_header.dst_mcast_n_bytes = 4;
+
+      break;
+
+    case IP_LOOKUP_NEXT_DROP:
+    case IP_LOOKUP_NEXT_PUNT:
+    case IP_LOOKUP_NEXT_LOCAL:
+    case IP_LOOKUP_NEXT_REWRITE:
+    case IP_LOOKUP_NEXT_LOAD_BALANCE:
+    case IP_LOOKUP_NEXT_MIDCHAIN:
+    case IP_LOOKUP_NEXT_ICMP_ERROR:
+    case IP_LOOKUP_N_NEXT:
+      ASSERT (0);
+      break;
     }
 }
 
@@ -1517,7 +1555,7 @@ icmp6_router_solicitation (vlib_main_t * vm,
 			}
 		      else
 			{
-			  adj_index0 = radv_info->all_nodes_adj_index;
+			  adj_index0 = radv_info->mcast_adj_index;
 			  if (adj_index0 == 0)
 			    error0 = ICMP6_ERROR_DST_LOOKUP_MISS;
 			  else
@@ -1918,10 +1956,8 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
 	  ip6_radv_prefix_t *p;
 	  ip6_mldp_group_t *m;
 
-	  /* remove adjacencies */
-	  adj_unlock (a->all_nodes_adj_index);
-	  adj_unlock (a->all_routers_adj_index);
-	  adj_unlock (a->all_mldv2_routers_adj_index);
+	  /* release the lock on the interface's mcast adj */
+	  adj_unlock (a->mcast_adj_index);
 
 	  /* clean up prefix_pool */
 	  /* *INDENT-OFF* */
@@ -2017,36 +2053,9 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
 	  mhash_init (&a->address_to_mldp_index, sizeof (uword),
 		      sizeof (ip6_address_t));
 
-	  {
-	    u8 link_layer_address[6] = { 0x33, 0x33, 0x00, 0x00, 0x00,
-	      IP6_MULTICAST_GROUP_ID_all_hosts
-	    };
-
-	    a->all_nodes_adj_index =
-	      adj_rewrite_add_and_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
-					sw_if_index, link_layer_address);
-	  }
-
-	  {
-	    u8 link_layer_address[6] = { 0x33, 0x33, 0x00, 0x00, 0x00,
-	      IP6_MULTICAST_GROUP_ID_all_routers
-	    };
-
-	    a->all_routers_adj_index =
-	      adj_rewrite_add_and_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
-					sw_if_index, link_layer_address);
-	  }
-
-	  {
-	    u8 link_layer_address[6] = { 0x33, 0x33, 0x00, 0x00, 0x00,
-	      IP6_MULTICAST_GROUP_ID_mldv2_routers
-	    };
-
-	    a->all_mldv2_routers_adj_index =
-	      adj_rewrite_add_and_lock (FIB_PROTOCOL_IP6,
-					VNET_LINK_IP6,
-					sw_if_index, link_layer_address);
-	  }
+	  a->mcast_adj_index = adj_mcast_add_or_lock (FIB_PROTOCOL_IP6,
+						      VNET_LINK_IP6,
+						      sw_if_index);
 
 	  /* add multicast groups we will always be reporting  */
 	  ip6_address_t addr;
@@ -2273,11 +2282,10 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
   vnet_buffer (b0)->sw_if_index[VLIB_RX] =
     vnet_main.local_interface_sw_if_index;
 
-  vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
-    radv_info->all_mldv2_routers_adj_index;
+  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = radv_info->mcast_adj_index;
   b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
 
-  vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
+  vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-mcast");
 
   f = vlib_get_frame_to_node (vm, node->index);
   to_next = vlib_frame_vector_args (f);
@@ -2301,7 +2309,7 @@ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
   .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT,
   .next_nodes = {
     [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop",
-    [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite",
+    [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-mcast",
     [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
   },
 };
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index aafde464..437d2674 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -33,6 +33,9 @@
 #include <vnet/dpo/classify_dpo.h>
 #include <vnet/dpo/ip_null_dpo.h>
 #include <vnet/ethernet/arp_packet.h>
+//#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/mfib/ip4_mfib.h>
+#include <vnet/mfib/mfib_signal.h>
 
 #include <vnet/vnet_msg_enum.h>
 
@@ -58,6 +61,8 @@ _(IP_FIB_DETAILS, ip_fib_details)                                       \
 _(IP6_FIB_DUMP, ip6_fib_dump)                                           \
 _(IP6_FIB_DETAILS, ip6_fib_details)                                     \
 _(IP_NEIGHBOR_DUMP, ip_neighbor_dump)                                   \
+_(IP_MROUTE_ADD_DEL, ip_mroute_add_del)                                 \
+_(MFIB_SIGNAL_DUMP, mfib_signal_dump)                                    \
 _(IP_NEIGHBOR_DETAILS, ip_neighbor_details)                             \
 _(IP_ADDRESS_DUMP, ip_address_dump)                                     \
 _(IP_DUMP, ip_dump)                                                     \
@@ -845,6 +850,144 @@ vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
   REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY);
 }
 
+static int
+add_del_mroute_check (fib_protocol_t table_proto,
+		      u32 table_id,
+		      u32 next_hop_sw_if_index,
+		      u8 is_local, u8 create_missing_tables, u32 * fib_index)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+
+  *fib_index = mfib_table_find (table_proto, ntohl (table_id));
+  if (~0 == *fib_index)
+    {
+      if (create_missing_tables)
+	{
+	  *fib_index = mfib_table_find_or_create_and_lock (table_proto,
+							   ntohl (table_id));
+	}
+      else
+	{
+	  /* No such VRF, and we weren't asked to create one */
+	  return VNET_API_ERROR_NO_SUCH_FIB;
+	}
+    }
+
+  if (~0 != ntohl (next_hop_sw_if_index))
+    {
+      if (pool_is_free_index (vnm->interface_main.sw_interfaces,
+			      ntohl (next_hop_sw_if_index)))
+	{
+	  return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+	}
+    }
+
+  return (0);
+}
+
+static int
+mroute_add_del_handler (u8 is_add,
+			u8 is_local,
+			u32 fib_index,
+			const mfib_prefix_t * prefix,
+			u32 entry_flags,
+			u32 next_hop_sw_if_index, u32 itf_flags)
+{
+  stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ );
+
+  fib_route_path_t path = {
+    .frp_sw_if_index = next_hop_sw_if_index,
+    .frp_proto = prefix->fp_proto,
+  };
+
+  if (is_local)
+    path.frp_flags |= FIB_ROUTE_PATH_LOCAL;
+
+
+  if (!is_local && ~0 == next_hop_sw_if_index)
+    {
+      mfib_table_entry_update (fib_index, prefix,
+			       MFIB_SOURCE_API, entry_flags);
+    }
+  else
+    {
+      if (is_add)
+	{
+	  mfib_table_entry_path_update (fib_index, prefix,
+					MFIB_SOURCE_API, &path, itf_flags);
+	}
+      else
+	{
+	  mfib_table_entry_path_remove (fib_index, prefix,
+					MFIB_SOURCE_API, &path);
+	}
+    }
+
+  stats_dsunlock ();
+  return (0);
+}
+
+static int
+api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
+{
+  fib_protocol_t fproto;
+  u32 fib_index;
+  int rv;
+
+  fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4);
+  rv = add_del_mroute_check (fproto,
+			     mp->table_id,
+			     mp->next_hop_sw_if_index,
+			     mp->is_local,
+			     mp->create_vrf_if_needed, &fib_index);
+
+  if (0 != rv)
+    return (rv);
+
+  mfib_prefix_t pfx = {
+    .fp_len = ntohs (mp->grp_address_length),
+    .fp_proto = fproto,
+  };
+
+  if (FIB_PROTOCOL_IP4 == fproto)
+    {
+      clib_memcpy (&pfx.fp_grp_addr.ip4, mp->grp_address,
+		   sizeof (pfx.fp_grp_addr.ip4));
+      clib_memcpy (&pfx.fp_src_addr.ip4, mp->src_address,
+		   sizeof (pfx.fp_src_addr.ip4));
+    }
+  else
+    {
+      clib_memcpy (&pfx.fp_grp_addr.ip6, mp->grp_address,
+		   sizeof (pfx.fp_grp_addr.ip6));
+      clib_memcpy (&pfx.fp_src_addr.ip6, mp->src_address,
+		   sizeof (pfx.fp_src_addr.ip6));
+    }
+
+  return (mroute_add_del_handler (mp->is_add,
+				  mp->is_local,
+				  fib_index, &pfx,
+				  ntohl (mp->entry_flags),
+				  ntohl (mp->next_hop_sw_if_index),
+				  ntohl (mp->itf_flags)));
+}
+
+void
+vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
+{
+  vl_api_ip_mroute_add_del_reply_t *rmp;
+  int rv;
+  vnet_main_t *vnm = vnet_get_main ();
+
+  vnm->api_errno = 0;
+
+  rv = api_mroute_add_del_t_handler (mp);
+
+  rv = (rv == 0) ? vnm->api_errno : rv;
+
+  REPLY_MACRO (VL_API_IP_MROUTE_ADD_DEL_REPLY);
+}
+
 static void
 send_ip_details (vpe_api_main_t * am,
 		 unix_shared_memory_queue_t * q, u32 sw_if_index, u32 context)
@@ -1148,6 +1291,73 @@ static void
   REPLY_MACRO (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY);
 }
 
+void
+vl_mfib_signal_send_one (unix_shared_memory_queue_t * q,
+			 u32 context, const mfib_signal_t * mfs)
+{
+  vl_api_mfib_signal_details_t *mp;
+  mfib_prefix_t prefix;
+  mfib_table_t *mfib;
+  mfib_itf_t *mfi;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = ntohs (VL_API_MFIB_SIGNAL_DETAILS);
+  mp->context = context;
+
+  mfi = mfib_itf_get (mfs->mfs_itf);
+  mfib_entry_get_prefix (mfs->mfs_entry, &prefix);
+  mfib = mfib_table_get (mfib_entry_get_fib_index (mfs->mfs_entry),
+			 prefix.fp_proto);
+  mp->table_id = ntohl (mfib->mft_table_id);
+  mp->sw_if_index = ntohl (mfi->mfi_sw_if_index);
+
+  if (FIB_PROTOCOL_IP4 == prefix.fp_proto)
+    {
+      mp->grp_address_len = ntohs (prefix.fp_len);
+
+      memcpy (mp->grp_address, &prefix.fp_grp_addr.ip4, 4);
+      if (prefix.fp_len > 32)
+	{
+	  memcpy (mp->src_address, &prefix.fp_src_addr.ip4, 4);
+	}
+    }
+  else
+    {
+      mp->grp_address_len = ntohs (prefix.fp_len);
+
+      ASSERT (0);
+    }
+
+  if (0 != mfs->mfs_buffer_len)
+    {
+      mp->ip_packet_len = ntohs (mfs->mfs_buffer_len);
+
+      memcpy (mp->ip_packet_data, mfs->mfs_buffer, mfs->mfs_buffer_len);
+    }
+  else
+    {
+      mp->ip_packet_len = 0;
+    }
+
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_mfib_signal_dump_t_handler (vl_api_mfib_signal_dump_t * mp)
+{
+  unix_shared_memory_queue_t *q;
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (q == 0)
+    {
+      return;
+    }
+
+  while (q->cursize < q->maxsize && mfib_signal_send_one (q, mp->context))
+    ;
+}
 
 #define vl_msg_name_crc_list
 #include <vnet/ip/ip.api.h>
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index 734a4cd7..6c5611d3 100644
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -43,6 +43,7 @@
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/fib/ip6_fib.h>
 #include <vnet/mpls/mpls.h>
+#include <vnet/mfib/mfib_table.h>
 #include <vnet/dpo/drop_dpo.h>
 #include <vnet/dpo/classify_dpo.h>
 #include <vnet/dpo/punt_dpo.h>
@@ -258,6 +259,9 @@ format_ip_lookup_next (u8 * s, va_list * args)
     case IP_LOOKUP_NEXT_GLEAN:
       t = "glean";
       break;
+    case IP_LOOKUP_NEXT_MCAST:
+      t = "mcast";
+      break;
     case IP_LOOKUP_NEXT_REWRITE:
       break;
     }
@@ -767,6 +771,173 @@ VLIB_CLI_COMMAND (ip_route_command, static) = {
 };
 /* *INDENT-ON* */
 
+clib_error_t *
+vnet_ip_mroute_cmd (vlib_main_t * vm,
+		    unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  clib_error_t *error = NULL;
+  fib_route_path_t rpath;
+  u32 table_id, is_del;
+  vnet_main_t *vnm;
+  mfib_prefix_t pfx;
+  u32 fib_index;
+  mfib_itf_flags_t iflags = 0;
+  mfib_entry_flags_t eflags = 0;
+
+  vnm = vnet_get_main ();
+  is_del = 0;
+  table_id = 0;
+  memset (&pfx, 0, sizeof (pfx));
+  memset (&rpath, 0, sizeof (rpath));
+  rpath.frp_sw_if_index = ~0;
+
+  /* Get a line of input. */
+  if (!unformat_user (main_input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "table %d", &table_id))
+	;
+      else if (unformat (line_input, "del"))
+	is_del = 1;
+      else if (unformat (line_input, "add"))
+	is_del = 0;
+      else if (unformat (line_input, "%U %U",
+			 unformat_ip4_address,
+			 &pfx.fp_src_addr.ip4,
+			 unformat_ip4_address, &pfx.fp_grp_addr.ip4))
+	{
+	  pfx.fp_proto = FIB_PROTOCOL_IP4;
+	  pfx.fp_len = 64;
+	}
+      else if (unformat (line_input, "%U %U",
+			 unformat_ip6_address,
+			 &pfx.fp_src_addr.ip6,
+			 unformat_ip6_address, &pfx.fp_grp_addr.ip6))
+	{
+	  pfx.fp_proto = FIB_PROTOCOL_IP6;
+	  pfx.fp_len = 256;
+	}
+      else if (unformat (line_input, "%U/%d",
+			 unformat_ip4_address,
+			 &pfx.fp_grp_addr.ip4, &pfx.fp_len))
+	{
+	  pfx.fp_proto = FIB_PROTOCOL_IP4;
+	}
+      else if (unformat (line_input, "%U/%d",
+			 unformat_ip6_address,
+			 &pfx.fp_grp_addr.ip6, &pfx.fp_len))
+	{
+	  pfx.fp_proto = FIB_PROTOCOL_IP6;
+	}
+      else if (unformat (line_input, "%U",
+			 unformat_ip4_address, &pfx.fp_grp_addr.ip4))
+	{
+	  memset (&pfx.fp_src_addr.ip4, 0, sizeof (pfx.fp_src_addr.ip4));
+	  pfx.fp_proto = FIB_PROTOCOL_IP4;
+	  pfx.fp_len = 32;
+	}
+      else if (unformat (line_input, "%U",
+			 unformat_ip6_address, &pfx.fp_grp_addr.ip6))
+	{
+	  memset (&pfx.fp_src_addr.ip6, 0, sizeof (pfx.fp_src_addr.ip6));
+	  pfx.fp_proto = FIB_PROTOCOL_IP6;
+	  pfx.fp_len = 128;
+	}
+      else if (unformat (line_input, "via %U",
+			 unformat_vnet_sw_interface, vnm,
+			 &rpath.frp_sw_if_index))
+	{
+	  rpath.frp_weight = 1;
+	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	}
+      else if (unformat (line_input, "%U", unformat_mfib_itf_flags, &iflags))
+	;
+      else if (unformat (line_input, "%U",
+			 unformat_mfib_entry_flags, &eflags))
+	;
+      else
+	{
+	  error = unformat_parse_error (line_input);
+	  goto done;
+	}
+    }
+
+  unformat_free (line_input);
+
+  if (~0 == table_id)
+    {
+      /*
+       * if no table_id is passed we will manipulate the default
+       */
+      fib_index = 0;
+    }
+  else
+    {
+      fib_index = mfib_table_find (pfx.fp_proto, table_id);
+
+      if (~0 == fib_index)
+	{
+	  error = clib_error_return (0, "Nonexistent table id %d", table_id);
+	  goto done;
+	}
+    }
+
+  if (is_del && 0 == rpath.frp_weight)
+    {
+      mfib_table_entry_delete (fib_index, &pfx, MFIB_SOURCE_CLI);
+    }
+  else if (eflags)
+    {
+      mfib_table_entry_update (fib_index, &pfx, MFIB_SOURCE_CLI, eflags);
+    }
+  else
+    {
+      if (is_del)
+	mfib_table_entry_path_remove (fib_index,
+				      &pfx, MFIB_SOURCE_CLI, &rpath);
+      else
+	mfib_table_entry_path_update (fib_index,
+				      &pfx, MFIB_SOURCE_CLI, &rpath, iflags);
+    }
+
+done:
+  return error;
+}
+
+/*?
+ * This command is used to add or delete IPv4 or IPv6  multicastroutes. All
+ * IP Addresses ('<em><dst-ip-addr>/<width></em>',
+ * '<em><next-hop-ip-addr></em>' and '<em><adj-hop-ip-addr></em>')
+ * can be IPv4 or IPv6, but all must be of the same form in a single
+ * command. To display the current set of routes, use the commands
+ * '<em>show ip mfib</em>' and '<em>show ip6 mfib</em>'.
+ * The full set of support flags for interfaces and route is shown via;
+ * '<em>show mfib route flags</em>' and '<em>show mfib itf flags</em>'
+ * respectively.
+ * @cliexpar
+ * Example of how to add a forwarding interface to a route (and create the
+ * route if it does not exist)
+ * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/0 Forward}
+ * Example of how to add an accepting interface to a route (and create the
+ * route if it does not exist)
+ * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/1 Accept}
+ * Example of changing the route's flags to send signals via the API
+ * @cliexcmd{ip mroute add 232.1.1.1 Signal}
+
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_mroute_command, static) =
+{
+  .path = "ip mroute",
+  .short_help = "ip mroute [add|del] <dst-ip-addr>/<width> [table <table-id>] [via <next-hop-ip-addr> [<interface>],",
+  .function = vnet_ip_mroute_cmd,
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
 /*
  * The next two routines address a longstanding script hemorrhoid.
  * Probing a v4 or v6 neighbor needs to appear to be synchronous,
diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h
index 3dbd7b3b..27c70943 100644
--- a/src/vnet/ip/lookup.h
+++ b/src/vnet/ip/lookup.h
@@ -91,6 +91,9 @@ typedef enum
   /** This packets needs to go to ICMP error */
   IP_LOOKUP_NEXT_ICMP_ERROR,
 
+  /** Multicast Adjacency. */
+  IP_LOOKUP_NEXT_MCAST,
+
   IP_LOOKUP_N_NEXT,
 } ip_lookup_next_t;
 
@@ -115,6 +118,7 @@ typedef enum
     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",				\
     [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean",			\
     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite",    		\
+    [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast",	        \
     [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain",		        \
     [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance",		\
     [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error",		\
@@ -127,6 +131,7 @@ typedef enum
     [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",		\
     [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean",			\
     [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",			\
+    [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast",		\
     [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain",			\
     [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip6-load-balance",		\
     [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error",		\
@@ -203,12 +208,6 @@ typedef struct ip_adjacency_t_
   /** Interface address index for this local/arp adjacency. */
   u32 if_address_index;
 
-  /** Force re-lookup in a different FIB. ~0 => normal behavior */
-  u16 mcast_group_index;
-
-  /** Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */
-  u16 saved_lookup_next_index;
-
   /*
    * link/ether-type
    */
@@ -236,28 +235,28 @@ typedef struct ip_adjacency_t_
        */
     struct
     {
-	  /**
-	   * The recursive next-hop
-	   */
+      /**
+       * The recursive next-hop
+       */
       ip46_address_t next_hop;
-	  /**
-	   * The node index of the tunnel's post rewrite/TX function.
-	   */
+      /**
+       * The node index of the tunnel's post rewrite/TX function.
+       */
       u32 tx_function_node;
-	  /**
-	   * The next DPO to use
-	   */
+      /**
+       * The next DPO to use
+       */
       dpo_id_t next_dpo;
-	  /**
-	   * A function to perform the post-rewrite fixup
-	   */
-      adj_midchain_fixup_t fixup_func;
-    } midchain;
       /**
-       * IP_LOOKUP_NEXT_GLEAN
-       *
-       * Glean the address to ARP for from the packet's destination
+       * A function to perform the post-rewrite fixup
        */
+      adj_midchain_fixup_t fixup_func;
+    } midchain;
+    /**
+     * IP_LOOKUP_NEXT_GLEAN
+     *
+     * Glean the address to ARP for from the packet's destination
+     */
     struct
     {
       ip46_address_t receive_addr;
@@ -291,43 +290,6 @@ STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) ==
 /* An all zeros address */
 extern const ip46_address_t zero_addr;
 
-/* IP multicast adjacency. */
-typedef struct
-{
-  /* Handle for this adjacency in adjacency heap. */
-  u32 heap_handle;
-
-  /* Number of adjecencies in block. */
-  u32 n_adj;
-
-  /* Rewrite string. */
-    vnet_declare_rewrite (64 - 2 * sizeof (u32));
-}
-ip_multicast_rewrite_t;
-
-typedef struct
-{
-  /* ip4-multicast-rewrite next index. */
-  u32 next_index;
-
-  u8 n_rewrite_bytes;
-
-  u8 rewrite_string[64 - 1 * sizeof (u32) - 1 * sizeof (u8)];
-}
-ip_multicast_rewrite_string_t;
-
-typedef struct
-{
-  ip_multicast_rewrite_t *rewrite_heap;
-
-  ip_multicast_rewrite_string_t *rewrite_strings;
-
-  /* Negative rewrite string index; >= 0 sw_if_index.
-     Sorted.  Used to hash. */
-  i32 **adjacency_id_vector;
-
-  uword *adjacency_by_id_vector;
-} ip_multicast_lookup_main_t;
 
 typedef struct
 {
diff --git a/src/vnet/mcast/mcast.c b/src/vnet/mcast/mcast.c
deleted file mode 100644
index 55be89ae..00000000
--- a/src/vnet/mcast/mcast.c
+++ /dev/null
@@ -1,565 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/mcast/mcast.h>
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/pg/pg.h>
-#include <vppinfra/error.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/icmp46_packet.h>
-#include <vnet/ip/ip4.h>
-
-typedef struct {
-  u32 sw_if_index;
-  u32 next_index;
-  u32 group_index;
-} mcast_prep_trace_t;
-
-/* packet trace format function */
-static u8 * format_mcast_prep_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  mcast_prep_trace_t * t = va_arg (*args, mcast_prep_trace_t *);
-  
-  s = format (s, "MCAST_PREP: group %d, next index %d, tx_sw_if_index %d",
-              t->group_index, t->next_index, t->sw_if_index);
-  return s;
-}
-
-mcast_main_t mcast_main;
-vlib_node_registration_t mcast_prep_node;
-vlib_node_registration_t mcast_recycle_node;
-
-#define foreach_mcast_prep_error \
-_(MCASTS, "Multicast Packets")
-
-typedef enum {
-#define _(sym,str) MCAST_PREP_ERROR_##sym,
-  foreach_mcast_prep_error
-#undef _
-  MCAST_PREP_N_ERROR,
-} mcast_prep_error_t;
-
-static char * mcast_prep_error_strings[] = {
-#define _(sym,string) string,
-  foreach_mcast_prep_error
-#undef _
-};
-
-typedef enum {
-  MCAST_PREP_NEXT_DROP,
-  MCAST_PREP_N_NEXT,
-} mcast_prep_next_t;
-
-static uword
-mcast_prep_node_fn (vlib_main_t * vm,
-		  vlib_node_runtime_t * node,
-		  vlib_frame_t * frame)
-{
-  u32 n_left_from, * from, * to_next;
-  mcast_prep_next_t next_index;
-  mcast_main_t * mcm = &mcast_main;
-  vlib_node_t *n = vlib_get_node (vm, mcast_prep_node.index);
-  u32 node_counter_base_index = n->error_heap_index;
-  vlib_error_main_t * em = &vm->error_main;
-  ip4_main_t * im = &ip4_main;
-  ip_lookup_main_t * lm = &im->lookup_main;
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index,
-			   to_next, n_left_to_next);
-
-      while (0 && n_left_from >= 4 && n_left_to_next >= 2)
-	{
-          u32 bi0, bi1;
-	  vlib_buffer_t * b0, * b1;
-          u32 next0, next1;
-          u32 sw_if_index0, sw_if_index1;
-          
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t * p2, * p3;
-            
-	    p2 = vlib_get_buffer (vm, from[2]);
-	    p3 = vlib_get_buffer (vm, from[3]);
-            
-	    vlib_prefetch_buffer_header (p2, LOAD);
-	    vlib_prefetch_buffer_header (p3, LOAD);
-
-	    CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-          /* speculatively enqueue b0 and b1 to the current next frame */
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  from += 2;
-	  to_next += 2;
-	  n_left_from -= 2;
-	  n_left_to_next -= 2;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-
-          sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
-          next0 = 0;
-          sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
-          next1 = 0;
-
-          /* $$$$ your message in this space. Process 2 x pkts */
-
-          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
-            {
-              if (b0->flags & VLIB_BUFFER_IS_TRACED) 
-                {
-                    mcast_prep_trace_t *t = 
-                      vlib_add_trace (vm, node, b0, sizeof (*t));
-                    t->sw_if_index = sw_if_index0;
-                    t->next_index = next0;
-                  }
-                if (b1->flags & VLIB_BUFFER_IS_TRACED) 
-                  {
-                    mcast_prep_trace_t *t = 
-                      vlib_add_trace (vm, node, b1, sizeof (*t));
-                    t->sw_if_index = sw_if_index1;
-                    t->next_index = next1;
-                  }
-              }
-            
-            /* verify speculative enqueues, maybe switch current next frame */
-            vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-                                             to_next, n_left_to_next,
-                                             bi0, bi1, next0, next1);
-        }
-      
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-          u32 bi0;
-	  vlib_buffer_t * b0;
-          u32 next0, adj_index0;
-          mcast_group_t * g0;
-          ip_adjacency_t * adj0;
-          
-          /* speculatively enqueue b0 to the current next frame */
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-
-          adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
-          adj0 = ip_get_adjacency (lm, adj_index0);
-          vnet_buffer(b0)->mcast.mcast_group_index = adj0->mcast_group_index;
-          g0 = pool_elt_at_index (mcm->groups, adj0->mcast_group_index);
-
-          /* 
-           * Handle the degenerate single-copy case 
-           * If we don't change the freelist, the packet will never
-           * make it to the recycle node...
-           */
-          if (PREDICT_TRUE(vec_len (g0->members) > 1))
-            {
-              /* Save the original free list index */
-              vnet_buffer(b0)->mcast.original_free_list_index =
-                b0->free_list_index;
-              
-              /* Swap in the multicast recycle list */
-              b0->free_list_index = mcm->mcast_recycle_list_index;
-              
-              /* 
-               * Make sure that intermediate "frees" don't screw up 
-               */
-              b0->recycle_count = vec_len (g0->members);
-              b0->flags |= VLIB_BUFFER_RECYCLE;
-
-              /* Set up for the recycle node */
-              vnet_buffer(b0)->mcast.mcast_current_index = 1;
-            }
-
-          /* Transmit the pkt on the first interface */
-          next0 = g0->members[0].prep_and_recycle_node_next_index;
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = 
-            g0->members[0].tx_sw_if_index;
-
-          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
-            mcast_prep_trace_t *t = 
-               vlib_add_trace (vm, node, b0, sizeof (*t));
-            t->next_index = next0;
-            t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX];
-            t->group_index = vnet_buffer(b0)->mcast.mcast_group_index;
-            }
-            
-          /* verify speculative enqueue, maybe switch current next frame */
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  em->counters[node_counter_base_index + MCAST_PREP_ERROR_MCASTS] += 
-      frame->n_vectors;
-
-  return frame->n_vectors;
-}
-
-VLIB_REGISTER_NODE (mcast_prep_node) = {
-  .function = mcast_prep_node_fn,
-  .name = "mcast_prep",
-  .vector_size = sizeof (u32),
-  .format_trace = format_mcast_prep_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  
-  .n_errors = ARRAY_LEN(mcast_prep_error_strings),
-  .error_strings = mcast_prep_error_strings,
-
-  .n_next_nodes = MCAST_PREP_N_NEXT,
-
-  /* edit / add dispositions here */
-  .next_nodes = {
-        [MCAST_PREP_NEXT_DROP] = "error-drop",
-  },
-};
-
-typedef struct {
-  u32 sw_if_index;
-  u32 next_index;
-  u32 current_member;
-  u32 group_index;
-} mcast_recycle_trace_t;
-
-static u8 * format_mcast_recycle_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  mcast_recycle_trace_t * t = va_arg (*args, mcast_recycle_trace_t *);
-  
-  s = format (s, 
-"MCAST_R: group %d, current member %d next (node) index %d, tx_sw_if_index %d",
-              t->group_index, t->current_member, t->next_index, t->sw_if_index);
-  return s;
-}
-
-#define foreach_mcast_recycle_error \
-_(RECYCLES, "Multicast Recycles")
-
-typedef enum {
-#define _(sym,str) MCAST_RECYCLE_ERROR_##sym,
-  foreach_mcast_recycle_error
-#undef _
-  MCAST_RECYCLE_N_ERROR,
-} mcast_recycle_error_t;
-
-static char * mcast_recycle_error_strings[] = {
-#define _(sym,string) string,
-  foreach_mcast_recycle_error
-#undef _
-};
-
-typedef enum {
-  MCAST_RECYCLE_NEXT_DROP,
-  MCAST_RECYCLE_N_NEXT,
-} mcast_recycle_next_t;
-
-static uword
-mcast_recycle_node_fn (vlib_main_t * vm,
-		  vlib_node_runtime_t * node,
-		  vlib_frame_t * frame)
-{
-  u32 n_left_from, * from, * to_next;
-  mcast_recycle_next_t next_index;
-  mcast_main_t * mcm = &mcast_main;
-  vlib_node_t *n = vlib_get_node (vm, mcast_recycle_node.index);
-  u32 node_counter_base_index = n->error_heap_index;
-  vlib_error_main_t * em = &vm->error_main;
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index,
-			   to_next, n_left_to_next);
-
-      while (0 && n_left_from >= 4 && n_left_to_next >= 2)
-	{
-          u32 bi0, bi1;
-	  vlib_buffer_t * b0, * b1;
-          u32 next0, next1;
-          u32 sw_if_index0, sw_if_index1;
-          
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t * p2, * p3;
-            
-	    p2 = vlib_get_buffer (vm, from[2]);
-	    p3 = vlib_get_buffer (vm, from[3]);
-            
-	    vlib_prefetch_buffer_header (p2, LOAD);
-	    vlib_prefetch_buffer_header (p3, LOAD);
-
-	    CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-          /* speculatively enqueue b0 and b1 to the current next frame */
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  from += 2;
-	  to_next += 2;
-	  n_left_from -= 2;
-	  n_left_to_next -= 2;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-
-          sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
-          next0 = 0;
-          sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
-          next1 = 0;
-
-          /* $$$$ your message in this space. Process 2 x pkts */
-
-          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
-            {
-              if (b0->flags & VLIB_BUFFER_IS_TRACED) 
-                {
-                    mcast_recycle_trace_t *t = 
-                      vlib_add_trace (vm, node, b0, sizeof (*t));
-                    t->sw_if_index = sw_if_index0;
-                    t->next_index = next0;
-                  }
-                if (b1->flags & VLIB_BUFFER_IS_TRACED) 
-                  {
-                    mcast_recycle_trace_t *t = 
-                      vlib_add_trace (vm, node, b1, sizeof (*t));
-                    t->sw_if_index = sw_if_index1;
-                    t->next_index = next1;
-                  }
-              }
-            
-            /* verify speculative enqueues, maybe switch current next frame */
-            vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-                                             to_next, n_left_to_next,
-                                             bi0, bi1, next0, next1);
-        }
-      
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-          u32 bi0;
-	  vlib_buffer_t * b0;
-          u32 next0;
-          u32 current_member0;
-          mcast_group_t * g0;
-          
-          /* speculatively enqueue b0 to the current next frame */
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-
-          g0 = pool_elt_at_index (mcm->groups, 
-                                  vnet_buffer(b0)->mcast.mcast_group_index);
-
-          /* No more replicas? */
-          if (b0->recycle_count == 1)
-            {
-              /* Restore the original free list index */
-              b0->free_list_index = 
-                vnet_buffer(b0)->mcast.original_free_list_index;
-              b0->flags &= ~(VLIB_BUFFER_RECYCLE);
-            }
-          current_member0 = vnet_buffer(b0)->mcast.mcast_current_index;
-          
-          next0 = 
-            g0->members[current_member0].prep_and_recycle_node_next_index;
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = 
-            g0->members[current_member0].tx_sw_if_index;
-          
-          vnet_buffer(b0)->mcast.mcast_current_index = 
-            current_member0 + 1;
-          
-          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
-            mcast_recycle_trace_t *t = 
-               vlib_add_trace (vm, node, b0, sizeof (*t));
-            t->next_index = next0;
-            t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX];
-            t->group_index = vnet_buffer(b0)->mcast.mcast_group_index;
-            t->current_member = current_member0;
-            }
-            
-          /* verify speculative enqueue, maybe switch current next frame */
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, next0);
-	}
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  em->counters[node_counter_base_index + MCAST_RECYCLE_ERROR_RECYCLES] += 
-      frame->n_vectors;
-
-  return frame->n_vectors;
-}
-
-VLIB_REGISTER_NODE (mcast_recycle_node) = {
-  .function = mcast_recycle_node_fn,
-  .name = "mcast-recycle",
-  .vector_size = sizeof (u32),
-  .format_trace = format_mcast_recycle_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  
-  .n_errors = ARRAY_LEN(mcast_recycle_error_strings),
-  .error_strings = mcast_recycle_error_strings,
-
-  .n_next_nodes = MCAST_RECYCLE_N_NEXT,
-
-  /* edit / add dispositions here */
-  .next_nodes = {
-        [MCAST_RECYCLE_NEXT_DROP] = "error-drop",
-  },
-};
-
-/*
- * fish pkts back from the recycle queue/freelist
- * un-flatten the context chains
- */
-static void mcast_recycle_callback (vlib_main_t *vm, 
-                                    vlib_buffer_free_list_t * fl)
-{
-  vlib_frame_t * f = 0;
-  u32 n_left_from;
-  u32 n_left_to_next = 0;
-  u32 n_this_frame = 0;
-  u32 * from;
-  u32 * to_next;
-  u32 bi0, pi0;
-  vlib_buffer_t *b0;
-  vlib_buffer_t *bnext0;
-  int i;
-
-  /* aligned, unaligned buffers */
-  for (i = 0; i < 2; i++) 
-    {
-      if (i == 0)
-        {
-          from = fl->aligned_buffers;
-          n_left_from = vec_len (from);
-        }
-      else
-        {
-          from = fl->unaligned_buffers;
-          n_left_from = vec_len (from);
-        }
-    
-      while (n_left_from > 0)
-        {
-          if (PREDICT_FALSE(n_left_to_next == 0)) 
-            {
-              if (f)
-                {
-                  f->n_vectors = n_this_frame;
-                  vlib_put_frame_to_node (vm, mcast_recycle_node.index, f);
-                }
-              
-              f = vlib_get_frame_to_node (vm, mcast_recycle_node.index);
-              to_next = vlib_frame_vector_args (f);
-              n_left_to_next = VLIB_FRAME_SIZE;
-              n_this_frame = 0;
-            }
-          
-          bi0 = from[0];
-          if (PREDICT_TRUE(n_left_from > 1))
-            {
-              pi0 = from[1];
-              vlib_prefetch_buffer_with_index(vm,pi0,LOAD);
-            }
-        
-          bnext0 = b0 = vlib_get_buffer (vm, bi0);
-          
-          while (bnext0->flags & VLIB_BUFFER_NEXT_PRESENT)
-            {
-              from += 1;
-              n_left_from -= 1;
-              bnext0 = vlib_get_buffer (vm, bnext0->next_buffer);
-            }
-          to_next[0] = bi0;
-
-          if (CLIB_DEBUG > 0)
-            vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED);
-
-          from++;
-          to_next++;
-          n_this_frame++;
-          n_left_to_next--;
-          n_left_from--;
-        }
-    }
-  
-  vec_reset_length (fl->aligned_buffers);
-  vec_reset_length (fl->unaligned_buffers);
-
-  if (f)
-    {
-      ASSERT(n_this_frame);
-      f->n_vectors = n_this_frame;
-      vlib_put_frame_to_node (vm, mcast_recycle_node.index, f);
-    }
-}
-
-clib_error_t *mcast_init (vlib_main_t *vm)
-{
-  mcast_main_t * mcm = &mcast_main;
-  vlib_buffer_main_t * bm = vm->buffer_main;
-  vlib_buffer_free_list_t * fl;
-    
-  mcm->vlib_main = vm;
-  mcm->vnet_main = vnet_get_main();
-  mcm->mcast_recycle_list_index = 
-    vlib_buffer_create_free_list (vm, 1024 /* fictional */, "mcast-recycle");
-
-  fl = pool_elt_at_index (bm->buffer_free_list_pool, 
-                          mcm->mcast_recycle_list_index);
-
-  fl->buffers_added_to_freelist_function = mcast_recycle_callback;
-
-  return 0;
-}
-
-VLIB_INIT_FUNCTION (mcast_init);
-
-
diff --git a/src/vnet/mcast/mcast.h b/src/vnet/mcast/mcast.h
deleted file mode 100644
index 96e51442..00000000
--- a/src/vnet/mcast/mcast.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __included_vnet_mcast_h__
-#define __included_vnet_mcast_h__
-
-#include <vnet/vnet.h>
-#include <vlib/buffer.h>
-#include <vlib/buffer_funcs.h>
-
-typedef struct {
-  /* Arrange for both prep and recycle nodes to have identical
-     next indices for a given output interface */
-  u32 prep_and_recycle_node_next_index;
-
-  /* Show command, etc. */
-  u32 tx_sw_if_index;
-} mcast_group_member_t;
-
-typedef struct {
-  /* vector of group members */
-  mcast_group_member_t * members;
-} mcast_group_t;
-
-typedef struct {
-  /* pool of multicast (interface) groups */
-  mcast_group_t * groups;
-
-  /* multicast "free" list, aka recycle list */
-  u32 mcast_recycle_list_index;
-
-  /* convenience */
-  vlib_main_t * vlib_main;
-  vnet_main_t * vnet_main;
-} mcast_main_t;
-
-mcast_main_t mcast_main;
-
-#endif /* __included_vnet_mcast_h__ */
diff --git a/src/vnet/mcast/mcast_test.c b/src/vnet/mcast/mcast_test.c
deleted file mode 100644
index be80c9fc..00000000
--- a/src/vnet/mcast/mcast_test.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/mcast/mcast.h>
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/pg/pg.h>
-#include <vppinfra/error.h>
-#include <vnet/ip/lookup.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/icmp46_packet.h>
-#include <vnet/ip/ip4.h>
-#include <vnet/mcast/mcast.h>
-
-typedef struct {
-  /* convenience */
-  vlib_main_t * vlib_main;
-  vnet_main_t * vnet_main;
-  mcast_main_t * mcast_main;
-} mcast_test_main_t;
-
-mcast_test_main_t mcast_test_main;
-vlib_node_registration_t mcast_prep_node;
-vlib_node_registration_t mcast_recycle_node;
-
-static clib_error_t *
-mcast_test_command_fn (vlib_main_t * vm,
-		 unformat_input_t * input,
-		 vlib_cli_command_t * cmd)
-{
-  /* u8 *rewrite_data; */
-  /* mcast_test_main_t * mtm = &mcast_test_main; */
-  /* mcast_main_t * mcm = mtm->mcast_main; */
-  /* ip_adjacency_t adj; */
-  /* u32 adj_index; */
-  /* mcast_group_t * g; */
-  /* mcast_group_member_t * member; */
-  /* unformat_input_t _line_input, * line_input = &_line_input; */
-  /* ip4_address_t dst_addr, zero; */
-  /* ip4_main_t * im = &ip4_main; */
-  /* ip_lookup_main_t * lm = &im->lookup_main; */
-
-  /* /\* Get a line of input. *\/ */
-  /* if (! unformat_user (input, unformat_line_input, line_input)) */
-  /*   return 0; */
-
-  /* pool_get (mcm->groups, g); */
-  /* memset (g, 0, sizeof (*g)); */
-
-  /* while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) */
-  /*   { */
-  /*     vnet_hw_interface_t *hw; */
-  /*     u32 next, sw_if_index; */
-
-  /*     if (unformat (line_input, "%U", unformat_vnet_sw_interface,  */
-  /*                   mtm->vnet_main, &sw_if_index))  */
-  /*       { */
-  /*         vec_add2 (g->members, member, 1); */
-  /*         member->tx_sw_if_index = sw_if_index; */
-          
-  /*         hw = vnet_get_sup_hw_interface (mtm->vnet_main,  */
-  /*                                         sw_if_index); */
-          
-  /*         next = vlib_node_add_next (mtm->vlib_main,  */
-  /*                                    mcast_prep_node.index, */
-  /*                                    hw->output_node_index); */
-          
-  /*         /\* Required to be the same next index... *\/ */
-  /*         vlib_node_add_next_with_slot (mtm->vlib_main, */
-  /*                                       mcast_recycle_node.index, */
-  /*                                       hw->output_node_index, next); */
-  /*         member->prep_and_recycle_node_next_index = next; */
-  /*       } */
-  /*     else */
-  /*       { */
-  /*         return unformat_parse_error (line_input); */
-  /*       } */
-  /*   } */
-
-  /* if (vec_len (g->members) == 0) */
-  /*   { */
-  /*     pool_put (mcm->groups, g); */
-  /*     vlib_cli_output (vm, "no group members specified"); */
-  /*     return 0; */
-  /*   } */
-
-
-  /* adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; */
-  /* adj.mcast_group_index = g - mcm->groups; */
-  /* rewrite_data = format (0, "abcdefg"); */
-
-  /* vnet_rewrite_for_tunnel */
-  /*   (mtm->vnet_main, */
-  /*    (u32)~0, /\* tx_sw_if_index, we dont know yet *\/ */
-  /*    ip4_rewrite_node.index, */
-  /*    mcast_prep_node.index, */
-  /*    &adj.rewrite_header, */
-  /*    rewrite_data, vec_len(rewrite_data)); */
-
-  /* ip_add_adjacency (lm, &adj, 1 /\* one adj *\/, */
-  /*                   &adj_index); */
-  
-  /* dst_addr.as_u32 = clib_host_to_net_u32 (0x0a000002); */
-  /* zero.as_u32 = 0; */
-
-  /* ip4_add_del_route_next_hop (im, */
-  /*                             IP4_ROUTE_FLAG_ADD, */
-  /*                             &dst_addr, */
-  /*                             24 /\* mask width *\/, */
-  /*                             &zero /\* no next hop *\/, */
-                          
-  /*                             0, // next hop sw if index */
-  /*                             1, // weight */
-  /*                             adj_index, */
-  /*                             0 /\* explicit fib 0 *\/); */
-
-  return 0;
-}
-
-static VLIB_CLI_COMMAND (mcast_test_command) = {
-  .path = "test mc",
-  .short_help = "test mc",
-  .function = mcast_test_command_fn,
-};
-
-clib_error_t *mcast_test_init (vlib_main_t *vm)
-{
-  mcast_test_main_t * mtm = &mcast_test_main;
-    
-  mtm->vlib_main = vm;
-  mtm->vnet_main = vnet_get_main();
-  mtm->mcast_main = &mcast_main;
-
-  return 0;
-}
-
-VLIB_INIT_FUNCTION (mcast_test_init);
diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c
new file mode 100644
index 00000000..08001c3f
--- /dev/null
+++ b/src/vnet/mfib/ip4_mfib.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mfib/ip4_mfib.h>
+
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/mfib_entry.h>
+
+static const mfib_prefix_t ip4_specials[] = {
+    {
+        /* (*,*)/0 */
+        .fp_src_addr = {
+            .ip4.data_u32 = 0,
+        },
+        .fp_grp_addr = {
+            .ip4.data_u32 = 0,
+        },
+        .fp_len  = 0,
+        .fp_proto = FIB_PROTOCOL_IP4,
+    },
+};
+
+static u32
+ip4_create_mfib_with_table_id (u32 table_id)
+{
+    mfib_table_t *mfib_table;
+
+    pool_get_aligned(ip4_main.mfibs, mfib_table, CLIB_CACHE_LINE_BYTES);
+    memset(mfib_table, 0, sizeof(*mfib_table));
+
+    mfib_table->mft_proto = FIB_PROTOCOL_IP4;
+    mfib_table->mft_index =
+        mfib_table->v4.index =
+            (mfib_table - ip4_main.mfibs);
+
+    hash_set (ip4_main.mfib_index_by_table_id,
+              table_id,
+              mfib_table->mft_index);
+
+    mfib_table->mft_table_id =
+        mfib_table->v4.table_id =
+            table_id;
+
+    mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4);
+
+    /*
+     * add the special entries into the new FIB
+     */
+    int ii;
+
+    for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+    {
+        mfib_prefix_t prefix = ip4_specials[ii];
+
+        prefix.fp_src_addr.ip4.data_u32 =
+            clib_host_to_net_u32(prefix.fp_src_addr.ip4.data_u32);
+        prefix.fp_grp_addr.ip4.data_u32 =
+            clib_host_to_net_u32(prefix.fp_grp_addr.ip4.data_u32);
+
+        mfib_table_entry_update(mfib_table->mft_index,
+                                &prefix,
+                                MFIB_SOURCE_DEFAULT_ROUTE,
+                                MFIB_ENTRY_FLAG_DROP);
+    }
+
+    return (mfib_table->mft_index);
+}
+
+void
+ip4_mfib_table_destroy (ip4_mfib_t *mfib)
+{
+    mfib_table_t *mfib_table = (mfib_table_t*)mfib;
+    int ii;
+
+    /*
+     * remove all the specials we added when the table was created.
+     */
+    for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+    {
+        fib_node_index_t mfei;
+        mfib_prefix_t prefix = ip4_specials[ii];
+
+        prefix.fp_src_addr.ip4.data_u32 =
+            clib_host_to_net_u32(prefix.fp_src_addr.ip4.data_u32);
+        prefix.fp_grp_addr.ip4.data_u32 =
+            clib_host_to_net_u32(prefix.fp_grp_addr.ip4.data_u32);
+
+        mfei = mfib_table_lookup(mfib_table->mft_index, &prefix);
+        mfib_table_entry_delete_index(mfei, MFIB_SOURCE_DEFAULT_ROUTE);
+    }
+
+    /*
+     * validate no more routes.
+     */
+    ASSERT(0 == mfib_table->mft_total_route_counts);
+    ASSERT(~0 != mfib_table->mft_table_id);
+
+    hash_unset (ip4_main.mfib_index_by_table_id, mfib_table->mft_table_id);
+    pool_put(ip4_main.mfibs, mfib_table);
+}
+
+u32
+ip4_mfib_table_find_or_create_and_lock (u32 table_id)
+{
+    u32 index;
+
+    index = ip4_mfib_index_from_table_id(table_id);
+    if (~0 == index)
+        return ip4_create_mfib_with_table_id(table_id);
+    mfib_table_lock(index, FIB_PROTOCOL_IP4);
+
+    return (index);
+}
+
+u32
+ip4_mfib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+    if (sw_if_index >= vec_len(ip4_main.mfib_index_by_sw_if_index))
+    {
+        /*
+         * This is the case for interfaces that are not yet mapped to
+         * a IP table
+         */
+        return (~0);
+    }
+    return (ip4_main.mfib_index_by_sw_if_index[sw_if_index]);
+}
+
+#define IPV4_MFIB_GRP_LEN(_len)\
+    (_len > 32 ? 32 : _len)
+
+#define IP4_MFIB_MK_KEY(_grp, _src, _len, _key)                         \
+{                                                                       \
+    _key  = ((u64)(_grp->data_u32 &                                     \
+                   ip4_main.fib_masks[IPV4_MFIB_GRP_LEN(_len)])) << 32; \
+    _key |= _src->data_u32;                                             \
+}
+#define IP4_MFIB_MK_GRP_KEY(_grp, _len, _key)                           \
+{                                                                       \
+    _key  = ((u64)(_grp->data_u32 &                                     \
+                   ip4_main.fib_masks[IPV4_MFIB_GRP_LEN(_len)])) << 32; \
+}
+
+/*
+ * ip4_fib_table_lookup_exact_match
+ *
+ * Exact match prefix lookup
+ */
+fib_node_index_t
+ip4_mfib_table_lookup_exact_match (const ip4_mfib_t *mfib,
+                                   const ip4_address_t *grp,
+                                   const ip4_address_t *src,
+                                   u32 len)
+{
+    uword * hash, * result;
+    u64 key;
+
+    hash = mfib->fib_entry_by_dst_address[len];
+    IP4_MFIB_MK_KEY(grp, src, len, key);
+
+    result = hash_get(hash, key);
+
+    if (NULL != result) {
+        return (result[0]);
+    }
+    return (FIB_NODE_INDEX_INVALID);
+}
+
+/*
+ * ip4_fib_table_lookup
+ *
+ * Longest prefix match
+ */
+fib_node_index_t
+ip4_mfib_table_lookup (const ip4_mfib_t *mfib,
+                       const ip4_address_t *src,
+                       const ip4_address_t *grp,
+                       u32 len)
+{
+    uword * hash, * result;
+    i32 mask_len;
+    u64 key;
+
+    mask_len = len;
+
+    if (PREDICT_TRUE(64 == mask_len))
+    {
+        hash = mfib->fib_entry_by_dst_address[mask_len];
+        IP4_MFIB_MK_KEY(grp, src, mask_len, key);
+
+        result = hash_get (hash, key);
+
+        if (NULL != result) {
+            return (result[0]);
+        }
+    }
+
+    for (mask_len = 32; mask_len >= 0; mask_len--)
+    {
+        hash = mfib->fib_entry_by_dst_address[mask_len];
+        IP4_MFIB_MK_GRP_KEY(grp, mask_len, key);
+
+        result = hash_get (hash, key);
+
+        if (NULL != result) {
+            return (result[0]);
+        }
+    }
+    return (FIB_NODE_INDEX_INVALID);
+}
+
+void
+ip4_mfib_table_entry_insert (ip4_mfib_t *mfib,
+                             const ip4_address_t *grp,
+                             const ip4_address_t *src,
+                             u32 len,
+                             fib_node_index_t fib_entry_index)
+{
+    uword * hash, * result;
+    u64 key;
+
+    IP4_MFIB_MK_KEY(grp, src, len, key);
+    hash = mfib->fib_entry_by_dst_address[len];
+    result = hash_get (hash, key);
+
+    if (NULL == result) {
+        /*
+         * adding a new entry
+         */
+        if (NULL == hash) {
+            hash = hash_create (32 /* elts */, sizeof (uword));
+            hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK);
+        }
+        hash = hash_set(hash, key, fib_entry_index);
+        mfib->fib_entry_by_dst_address[len] = hash;
+    }
+    else
+    {
+        ASSERT(0);
+    }
+}
+
+void
+ip4_mfib_table_entry_remove (ip4_mfib_t *mfib,
+                             const ip4_address_t *grp,
+                             const ip4_address_t *src,
+                             u32 len)
+{
+    uword * hash, * result;
+    u64 key;
+
+    IP4_MFIB_MK_KEY(grp, src, len, key);
+    hash = mfib->fib_entry_by_dst_address[len];
+    result = hash_get (hash, key);
+
+    if (NULL == result)
+    {
+        /*
+         * removing a non-existant entry. i'll allow it.
+         */
+    }
+    else
+    {
+        hash_unset(hash, key);
+    }
+
+    mfib->fib_entry_by_dst_address[len] = hash;
+}
+
+static void
+ip4_mfib_table_show_all (ip4_mfib_t *mfib,
+                         vlib_main_t * vm)
+{
+    fib_node_index_t *mfib_entry_indicies;
+    fib_node_index_t *mfib_entry_index;
+    int i;
+
+    mfib_entry_indicies = NULL;
+
+    for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++)
+    {
+        uword * hash = mfib->fib_entry_by_dst_address[i];
+
+        if (NULL != hash)
+        {
+            hash_pair_t * p;
+
+            hash_foreach_pair (p, hash,
+            ({
+                vec_add1(mfib_entry_indicies, p->value[0]);
+            }));
+        }
+    }
+
+    vec_sort_with_function(mfib_entry_indicies, mfib_entry_cmp_for_sort);
+
+    vec_foreach(mfib_entry_index, mfib_entry_indicies)
+    {
+        vlib_cli_output(vm, "%U",
+                        format_mfib_entry,
+                        *mfib_entry_index,
+                        MFIB_ENTRY_FORMAT_BRIEF);
+    }
+
+    vec_free(mfib_entry_indicies);
+}
+
+static void
+ip4_mfib_table_show_one (ip4_mfib_t *mfib,
+                         vlib_main_t * vm,
+                         ip4_address_t *src,
+                         ip4_address_t *grp,
+                         u32 mask_len)
+{
+    vlib_cli_output(vm, "%U",
+                    format_mfib_entry,
+                    ip4_mfib_table_lookup(mfib, src, grp, mask_len),
+                    MFIB_ENTRY_FORMAT_DETAIL);
+}
+
+static clib_error_t *
+ip4_show_mfib (vlib_main_t * vm,
+               unformat_input_t * input,
+               vlib_cli_command_t * cmd)
+{
+    ip4_main_t * im4 = &ip4_main;
+    mfib_table_t *mfib_table;
+    int verbose, matching;
+    ip4_address_t grp, src = {{0}};
+    u32 mask = 32;
+    int i, table_id = -1, fib_index = ~0;
+
+    verbose = 1;
+    matching = 0;
+
+    while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+        if (unformat (input, "brief") || unformat (input, "summary")
+            || unformat (input, "sum"))
+            verbose = 0;
+
+        else if (unformat (input, "%U %U",
+                           unformat_ip4_address, &src,
+                           unformat_ip4_address, &grp))
+        {
+            matching = 1;
+            mask = 64;
+        }
+        else if (unformat (input, "%U", unformat_ip4_address, &grp))
+        {
+            matching = 1;
+            mask = 32;
+        }
+        else if (unformat (input, "%U/%d",
+                           unformat_ip4_address, &grp, &mask))
+            matching = 1;
+        else if (unformat (input, "table %d", &table_id))
+            ;
+        else if (unformat (input, "index %d", &fib_index))
+            ;
+        else
+            break;
+    }
+
+    pool_foreach (mfib_table, im4->mfibs,
+    ({
+        ip4_mfib_t *mfib = &mfib_table->v4;
+
+        if (table_id >= 0 && table_id != (int)mfib->table_id)
+            continue;
+        if (fib_index != ~0 && fib_index != (int)mfib->index)
+            continue;
+
+        vlib_cli_output (vm, "%U, fib_index %d",
+                         format_mfib_table_name, mfib->index, FIB_PROTOCOL_IP4,
+                         mfib->index);
+
+        /* Show summary? */
+        if (! verbose)
+        {
+            vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
+            for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++)
+            {
+                uword * hash = mfib->fib_entry_by_dst_address[i];
+                uword n_elts = hash_elts (hash);
+                if (n_elts > 0)
+                    vlib_cli_output (vm, "%20d%16d", i, n_elts);
+            }
+            continue;
+        }
+
+        if (!matching)
+        {
+            ip4_mfib_table_show_all(mfib, vm);
+        }
+        else
+        {
+            ip4_mfib_table_show_one(mfib, vm, &src, &grp, mask);
+        }
+    }));
+
+    return 0;
+}
+
+/*?
+ * This command displays the IPv4 MulticasrFIB Tables (VRF Tables) and
+ * the route entries for each table.
+ *
+ * @note This command will run for a long time when the FIB tables are
+ * comprised of millions of entries. For those senarios, consider displaying
+ * a single table or summary mode.
+ *
+ * @cliexpar
+ * Example of how to display all the IPv4 Multicast FIB tables:
+ * @cliexstart{show ip fib}
+ * ipv4-VRF:0, fib_index 0
+ * (*, 0.0.0.0/0):  flags:D,
+ *  Interfaces:
+ *  multicast-ip4-chain
+ *   [@1]: dpo-drop ip4
+ * (*, 232.1.1.1/32):
+ * Interfaces:
+ *  test-eth1: Forward,
+ *  test-eth2: Forward,
+ *  test-eth0: Accept,
+ * multicast-ip4-chain
+ * [@2]: dpo-replicate: [index:1 buckets:2 to:[0:0]]
+ *   [0] [@1]: ipv4-mcast: test-eth1: IP4: d0:d1:d2:d3:d4:01 -> 01:00:05:00:00:00
+ *   [1] [@1]: ipv4-mcast: test-eth2: IP4: d0:d1:d2:d3:d4:02 -> 01:00:05:00:00:00
+ *
+ * @cliexend
+ * Example of how to display a summary of all IPv4 FIB tables:
+ * @cliexstart{show ip fib summary}
+ * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ *     Prefix length         Count
+ *                    0               1
+ *                    8               2
+ *                   32               4
+ * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto
+ *     Prefix length         Count
+ *                    0               1
+ *                    8               2
+ *                   24               2
+ *                   32               4
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
+    .path = "show ip mfib",
+    .short_help = "show ip mfib [summary] [table <table-id>] [index <fib-id>] [<gre-addr>[/<mask>]] [<gre-addr>] [<src-addr> <gre-addr>]",
+    .function = ip4_show_mfib,
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/mfib/ip4_mfib.h b/src/vnet/mfib/ip4_mfib.h
new file mode 100644
index 00000000..6fc74a36
--- /dev/null
+++ b/src/vnet/mfib/ip4_mfib.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The IPv4 Multicast-FIB
+ *
+ * FIXME
+ *
+ * This IPv4 FIB is used by the protocol independent FIB. So directly using
+ * this APIs in client code is not encouraged. However, this IPv4 FIB can be
+ * used if all the client wants is an IPv4 prefix data-base
+ */
+
+#ifndef __IP4_MFIB_H__
+#define __IP4_MFIB_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/mfib/mfib_table.h>
+
+extern fib_node_index_t ip4_mfib_table_lookup(const ip4_mfib_t *fib,
+                                              const ip4_address_t *src,
+                                              const ip4_address_t *grp,
+                                              u32 len);
+extern fib_node_index_t ip4_mfib_table_lookup_exact_match(const ip4_mfib_t *fib,
+                                                          const ip4_address_t *grp,
+                                                          const ip4_address_t *src,
+                                                          u32 len);
+
+extern void ip4_mfib_table_entry_remove(ip4_mfib_t *fib,
+                                        const ip4_address_t *grp,
+                                        const ip4_address_t *src,
+                                        u32 len);
+
+extern void ip4_mfib_table_entry_insert(ip4_mfib_t *fib,
+                                        const ip4_address_t *grp,
+                                        const ip4_address_t *src,
+                                        u32 len,
+                                        fib_node_index_t fib_entry_index);
+extern void ip4_mfib_table_destroy(ip4_mfib_t *fib);
+
+/**
+ * @brief Get the FIB at the given index
+ */
+static inline ip4_mfib_t *
+ip4_mfib_get (u32 index)
+{
+    return (&(pool_elt_at_index(ip4_main.mfibs, index)->v4));
+}
+
+/**
+ * @brief Get or create an IPv4 fib.
+ *
+ * Get or create an IPv4 fib with the provided table ID.
+ *
+ * @param table_id
+ *      When set to \c ~0, an arbitrary and unused fib ID is picked
+ *      and can be retrieved with \c ret->table_id.
+ *      Otherwise, the fib ID to be used to retrieve or create the desired fib.
+ * @returns A pointer to the retrieved or created fib.
+ *
+ */
+extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id);
+extern u32 ip4_mfib_table_create_and_lock(void);
+
+static inline
+u32 ip4_mfib_index_from_table_id (u32 table_id)
+{
+  ip4_main_t * im = &ip4_main;
+  uword * p;
+
+  p = hash_get (im->mfib_index_by_table_id, table_id);
+  if (!p)
+    return ~0;
+
+  return p[0];
+}
+
+extern u32 ip4_mfib_table_get_index_for_sw_if_index(u32 sw_if_index);
+
+
+#endif
+
diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c
new file mode 100644
index 00000000..0c2e4c7b
--- /dev/null
+++ b/src/vnet/mfib/ip6_mfib.c
@@ -0,0 +1,663 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mfib/ip6_mfib.h>
+
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/fib/ip6_fib.h>
+
+/**
+ * The number of bytes in an address/ask key in the radix tree
+ * First byte is the length in bytes.
+ */
+#define IP6_MFIB_KEY_LEN 33
+
+/**
+ * Key and mask for radix
+ */
+typedef struct ip6_mfib_key_t_
+{
+    u8 key[IP6_MFIB_KEY_LEN];
+    u8 mask[IP6_MFIB_KEY_LEN];
+} ip6_mfib_key_t;
+
+/**
+ * An object that is inserted into the radix tree.
+ * Since it's in the tree and has pointers, it cannot realloc and so cannot
+ * come from a vlib pool.
+ */
+typedef struct ip6_mfib_node_t_
+{
+    struct radix_node i6mn_nodes[2];
+    ip6_mfib_key_t i6mn_key;
+    index_t i6mn_entry;
+} ip6_mfib_node_t;
+
+static const mfib_prefix_t all_zeros = {
+    /* (*,*) */
+    .fp_src_addr = {
+        .ip6.as_u64 = {0, 0},
+    },
+    .fp_grp_addr = {
+        .ip6.as_u64 = {0, 0},
+    },
+    .fp_len  = 0,
+    .fp_proto = FIB_PROTOCOL_IP6,
+};
+
+typedef enum ip6_mfib_special_type_t_ {
+    IP6_MFIB_SPECIAL_TYPE_NONE,
+    IP6_MFIB_SPECIAL_TYPE_SOLICITED,
+} ip6_mfib_special_type_t;
+
+typedef struct ip6_mfib_special_t_ {
+    /**
+     * @brief solicited or not
+     */
+    ip6_mfib_special_type_t ims_type;
+
+    /**
+     * @brief the Prefix length
+     */
+    u8 ims_len;
+
+    /**
+     * @brief The last byte of the mcast address
+     */
+    u8 ims_byte;
+    /**
+     * @brief The scope of the address
+     */
+    u8 ims_scope;
+} ip6_mfib_special_t;
+
+static const ip6_mfib_special_t ip6_mfib_specials[] =
+{
+    {
+        /*
+         * Add ff02::1:ff00:0/104 via local route for all tables.
+         *  This is required for neighbor discovery to work.
+         */
+        .ims_type = IP6_MFIB_SPECIAL_TYPE_SOLICITED,
+        .ims_len = 104,
+    },
+    {
+        /*
+         * all-routers multicast address
+         */
+        .ims_type = IP6_MFIB_SPECIAL_TYPE_NONE,
+        .ims_scope = IP6_MULTICAST_SCOPE_link_local,
+        .ims_byte = IP6_MULTICAST_GROUP_ID_all_routers,
+        .ims_len = 128,
+    },
+    {
+        /*
+         * all-nodes multicast address
+         */
+        .ims_type = IP6_MFIB_SPECIAL_TYPE_NONE,
+        .ims_scope = IP6_MULTICAST_SCOPE_link_local,
+        .ims_byte = IP6_MULTICAST_GROUP_ID_all_hosts,
+        .ims_len = 128,
+    },
+    {
+        /*
+         *  Add all-mldv2  multicast address via local route for all tables
+         */
+        .ims_type = IP6_MFIB_SPECIAL_TYPE_NONE,
+        .ims_len = 128,
+        .ims_scope = IP6_MULTICAST_SCOPE_link_local,
+        .ims_byte = IP6_MULTICAST_GROUP_ID_mldv2_routers,
+    }
+};
+
+#define FOR_EACH_IP6_SPECIAL(_pfx, _body)                               \
+{                                                                       \
+    const ip6_mfib_special_t *_spec;                                    \
+    u8 _ii;                                                             \
+    for (_ii = 0;                                                       \
+         _ii < ARRAY_LEN(ip6_mfib_specials);                            \
+         _ii++)                                                         \
+    {                                                                   \
+        _spec = &ip6_mfib_specials[_ii];                                \
+        if (IP6_MFIB_SPECIAL_TYPE_SOLICITED == _spec->ims_type)         \
+        {                                                               \
+            ip6_set_solicited_node_multicast_address(                   \
+                &(_pfx)->fp_grp_addr.ip6, 0);                           \
+        }                                                               \
+        else                                                            \
+        {                                                               \
+            ip6_set_reserved_multicast_address (                        \
+                &(_pfx)->fp_grp_addr.ip6,                               \
+                _spec->ims_scope,                                       \
+                _spec->ims_byte);                                       \
+        }                                                               \
+        (_pfx)->fp_len = _spec->ims_len;                                \
+        do { _body; } while (0);                                        \
+    }                                                                   \
+}
+
+
+static u32
+ip6_create_mfib_with_table_id (u32 table_id)
+{
+    mfib_table_t *mfib_table;
+    mfib_prefix_t pfx = {
+        .fp_proto = FIB_PROTOCOL_IP6,
+    };
+    const fib_route_path_t path_for_us = {
+        .frp_proto = FIB_PROTOCOL_IP6,
+        .frp_addr = zero_addr,
+        .frp_sw_if_index = 0xffffffff,
+        .frp_fib_index = ~0,
+        .frp_weight = 0,
+        .frp_flags = FIB_ROUTE_PATH_LOCAL,
+    };
+
+    pool_get_aligned(ip6_main.mfibs, mfib_table, CLIB_CACHE_LINE_BYTES);
+    memset(mfib_table, 0, sizeof(*mfib_table));
+
+    mfib_table->mft_proto = FIB_PROTOCOL_IP6;
+    mfib_table->mft_index =
+        mfib_table->v6.index =
+            (mfib_table - ip6_main.mfibs);
+
+    hash_set (ip6_main.mfib_index_by_table_id,
+              table_id,
+              mfib_table->mft_index);
+
+    mfib_table->mft_table_id =
+        mfib_table->v6.table_id =
+            table_id;
+
+    mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6);
+
+    mfib_table->v6.rhead =
+        clib_mem_alloc_aligned (sizeof(*mfib_table->v6.rhead),
+                                CLIB_CACHE_LINE_BYTES);
+    rn_inithead0(mfib_table->v6.rhead, 8);
+
+    /*
+     * add the special entries into the new FIB
+     */
+    mfib_table_entry_update(mfib_table->mft_index,
+                            &all_zeros,
+                            MFIB_SOURCE_DEFAULT_ROUTE,
+                            MFIB_ENTRY_FLAG_DROP);
+
+    /*
+     * Add each of the specials
+     */
+    FOR_EACH_IP6_SPECIAL(&pfx,
+    ({
+        mfib_table_entry_path_update(mfib_table->mft_index,
+                                     &pfx,
+                                     MFIB_SOURCE_SPECIAL,
+                                     &path_for_us,
+                                     MFIB_ITF_FLAG_FORWARD);
+    }));
+
+    return (mfib_table->mft_index);
+}
+
+void
+ip6_mfib_table_destroy (ip6_mfib_t *mfib)
+{
+    mfib_table_t *mfib_table = (mfib_table_t*)mfib;
+    fib_node_index_t mfei;
+    mfib_prefix_t pfx = {
+        .fp_proto = FIB_PROTOCOL_IP6,
+    };
+    const fib_route_path_t path_for_us = {
+        .frp_proto = FIB_PROTOCOL_IP6,
+        .frp_addr = zero_addr,
+        .frp_sw_if_index = 0xffffffff,
+        .frp_fib_index = ~0,
+        .frp_weight = 0,
+        .frp_flags = FIB_ROUTE_PATH_LOCAL,
+    };
+
+    /*
+     * remove all the specials we added when the table was created.
+     */
+    FOR_EACH_IP6_SPECIAL(&pfx,
+    {
+        mfib_table_entry_path_remove(mfib_table->mft_index,
+                                     &pfx,
+                                     MFIB_SOURCE_SPECIAL,
+                                     &path_for_us);
+    });
+
+    mfei = mfib_table_lookup_exact_match(mfib_table->mft_index, &all_zeros);
+    mfib_table_entry_delete_index(mfei, MFIB_SOURCE_DEFAULT_ROUTE);
+
+    /*
+     * validate no more routes.
+     */
+    ASSERT(0 == mfib_table->mft_total_route_counts);
+    ASSERT(~0 != mfib_table->mft_table_id);
+
+    hash_unset (ip6_main.mfib_index_by_table_id, mfib_table->mft_table_id);
+    clib_mem_free(mfib_table->v6.rhead);
+    pool_put(ip6_main.mfibs, mfib_table);
+}
+
+void
+ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable)
+{
+    const fib_route_path_t path = {
+        .frp_proto = FIB_PROTOCOL_IP6,
+        .frp_addr = zero_addr,
+        .frp_sw_if_index = sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 0,
+    };
+    mfib_prefix_t pfx = {
+        .fp_proto = FIB_PROTOCOL_IP6,
+    };
+    u32 mfib_index;
+
+    vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index);
+    mfib_index = ip6_mfib_table_get_index_for_sw_if_index(sw_if_index);
+
+    if (is_enable)
+    {
+        FOR_EACH_IP6_SPECIAL(&pfx,
+        {
+            mfib_table_entry_path_update(mfib_index,
+                                         &pfx,
+                                         MFIB_SOURCE_SPECIAL,
+                                         &path,
+                                         MFIB_ITF_FLAG_ACCEPT);
+        });
+    }
+    else
+    {
+        FOR_EACH_IP6_SPECIAL(&pfx,
+        {
+            mfib_table_entry_path_remove(mfib_index,
+                                         &pfx,
+                                         MFIB_SOURCE_SPECIAL,
+                                         &path);
+        });
+    }
+}
+
+u32
+ip6_mfib_table_find_or_create_and_lock (u32 table_id)
+{
+    u32 index;
+
+    index = ip6_mfib_index_from_table_id(table_id);
+    if (~0 == index)
+        return ip6_create_mfib_with_table_id(table_id);
+    mfib_table_lock(index, FIB_PROTOCOL_IP6);
+
+    return (index);
+}
+
+u32
+ip6_mfib_table_get_index_for_sw_if_index (u32 sw_if_index)
+{
+    if (sw_if_index >= vec_len(ip6_main.mfib_index_by_sw_if_index))
+    {
+        /*
+         * This is the case for interfaces that are not yet mapped to
+         * a IP table
+         */
+        return (~0);
+    }
+    return (ip6_main.mfib_index_by_sw_if_index[sw_if_index]);
+}
+
+#define IP6_MFIB_MK_KEY(_grp, _src, _key)                           \
+{                                                                   \
+    (_key)->key[0] = 33;                                            \
+    memcpy((_key)->key+1, _grp, 16);                                \
+    memcpy((_key)->key+17, _src, 16);                               \
+}
+
+#define IP6_MFIB_MK_KEY_MASK(_grp, _src, _len, _key)                \
+{                                                                   \
+    IP6_MFIB_MK_KEY(_grp, _src, _key);                              \
+                                                                    \
+    (_key)->mask[0] = 33;                                           \
+    if (_len <= 128)                                                \
+    {                                                               \
+        memcpy((_key)->mask+1, &ip6_main.fib_masks[_len], 16);      \
+        memset((_key)->mask+17, 0, 16);                             \
+    }                                                               \
+    else                                                            \
+    {                                                               \
+        ASSERT(_len == 256);                                        \
+        memcpy((_key)->mask+1, &ip6_main.fib_masks[128], 16);       \
+        memcpy((_key)->mask+17, &ip6_main.fib_masks[128], 16);      \
+    }                                                               \
+}
+
+/*
+ * ip6_fib_table_lookup_exact_match
+ *
+ * Exact match prefix lookup
+ */
+fib_node_index_t
+ip6_mfib_table_lookup_exact_match (const ip6_mfib_t *mfib,
+                                   const ip6_address_t *grp,
+                                   const ip6_address_t *src,
+                                   u32 len)
+{
+    ip6_mfib_node_t *i6mn;
+    ip6_mfib_key_t key;
+
+    IP6_MFIB_MK_KEY_MASK(grp, src, len, &key);
+
+    i6mn = (ip6_mfib_node_t*) rn_lookup(key.key, key.mask,
+                                        (struct radix_node_head *)mfib->rhead);
+
+    if (NULL == i6mn)
+    {
+        return (INDEX_INVALID);
+    }
+
+    return (i6mn->i6mn_entry);
+}
+
+/*
+ * ip6_fib_table_lookup
+ *
+ * Longest prefix match
+ */
+fib_node_index_t
+ip6_mfib_table_lookup (const ip6_mfib_t *mfib,
+                       const ip6_address_t *src,
+                       const ip6_address_t *grp,
+                       u32 len)
+{
+    ip6_mfib_node_t *i6mn;
+    ip6_mfib_key_t key;
+
+    IP6_MFIB_MK_KEY_MASK(grp, src, len, &key);
+
+    i6mn = (ip6_mfib_node_t*) rn_search_m(key.key,
+                                          mfib->rhead->rnh_treetop,
+                                          key.mask);
+
+    ASSERT(NULL != i6mn);
+
+    return (i6mn->i6mn_entry);
+}
+
+/*
+ * ip6_fib_table_lookup
+ *
+ * Longest prefix match no mask
+ */
+fib_node_index_t
+ip6_mfib_table_lookup2 (const ip6_mfib_t *mfib,
+                        const ip6_address_t *src,
+                        const ip6_address_t *grp)
+{
+    ip6_mfib_node_t *i6mn;
+    ip6_mfib_key_t key;
+
+    IP6_MFIB_MK_KEY(grp, src, &key);
+
+    i6mn = (ip6_mfib_node_t*) rn_match(key.key,
+                                       (struct radix_node_head *)mfib->rhead); // const cast
+
+    ASSERT(NULL != i6mn);
+
+    return (i6mn->i6mn_entry);
+}
+
+void
+ip6_mfib_table_entry_insert (ip6_mfib_t *mfib,
+                             const ip6_address_t *grp,
+                             const ip6_address_t *src,
+                             u32 len,
+                             fib_node_index_t mfib_entry_index)
+{
+    ip6_mfib_node_t *i6mn = clib_mem_alloc(sizeof(*i6mn));
+
+    memset(i6mn, 0, sizeof(*i6mn));
+
+    IP6_MFIB_MK_KEY_MASK(grp, src, len, &i6mn->i6mn_key);
+    i6mn->i6mn_entry = mfib_entry_index;
+
+    if (NULL == rn_addroute(i6mn->i6mn_key.key,
+                            i6mn->i6mn_key.mask,
+                            mfib->rhead,
+                            i6mn->i6mn_nodes))
+    {
+        ASSERT(0);
+    }
+}
+
+void
+ip6_mfib_table_entry_remove (ip6_mfib_t *mfib,
+                             const ip6_address_t *grp,
+                             const ip6_address_t *src,
+                             u32 len)
+{
+    ip6_mfib_node_t *i6mn;
+    ip6_mfib_key_t key;
+
+    IP6_MFIB_MK_KEY_MASK(grp, src, len, &key);
+
+    i6mn = (ip6_mfib_node_t*) rn_delete(key.key, key.mask, mfib->rhead);
+
+    clib_mem_free(i6mn);
+}
+
+static clib_error_t *
+ip6_mfib_module_init (vlib_main_t * vm)
+{
+    return (NULL);
+}
+
+VLIB_INIT_FUNCTION(ip6_mfib_module_init);
+
+static void
+ip6_mfib_table_show_one (ip6_mfib_t *mfib,
+                         vlib_main_t * vm,
+                         ip6_address_t *src,
+                         ip6_address_t *grp,
+                         u32 mask_len)
+{
+    vlib_cli_output(vm, "%U",
+                    format_mfib_entry,
+                    ip6_mfib_table_lookup(mfib, src, grp, mask_len),
+                    MFIB_ENTRY_FORMAT_DETAIL);
+}
+
+typedef struct ip6_mfib_show_ctx_t_ {
+    u32 fib_index;
+    fib_node_index_t *entries;
+} ip6_mfib_show_ctx_t;
+
+
+static int
+ip6_mfib_table_collect_entries (struct radix_node *rn, void *arg)
+{
+    ip6_mfib_show_ctx_t *ctx = arg;
+    ip6_mfib_node_t *i6mn;
+
+    i6mn = (ip6_mfib_node_t*) rn;
+
+    vec_add1(ctx->entries, i6mn->i6mn_entry);
+
+    return (0);
+}
+
+static void
+ip6_mfib_table_show_all (ip6_mfib_t *mfib,
+                         vlib_main_t * vm)
+{
+    fib_node_index_t *mfib_entry_index;
+    ip6_mfib_show_ctx_t ctx = {
+        .fib_index = mfib->index,
+        .entries = NULL,
+    };
+
+    rn_walktree(mfib->rhead,
+                ip6_mfib_table_collect_entries,
+                &ctx);
+
+    vec_sort_with_function(ctx.entries, mfib_entry_cmp_for_sort);
+
+    vec_foreach(mfib_entry_index, ctx.entries)
+    {
+        vlib_cli_output(vm, "%U",
+                        format_mfib_entry,
+                        *mfib_entry_index,
+                        MFIB_ENTRY_FORMAT_BRIEF);
+    }
+
+    vec_free(ctx.entries);
+}
+
+static clib_error_t *
+ip6_show_mfib (vlib_main_t * vm,
+               unformat_input_t * input,
+               vlib_cli_command_t * cmd)
+{
+    ip6_main_t * im4 = &ip6_main;
+    mfib_table_t *mfib_table;
+    int verbose, matching;
+    ip6_address_t grp, src = {{0}};
+    u32 mask = 32;
+    int table_id = -1, fib_index = ~0;
+
+    verbose = 1;
+    matching = 0;
+
+    while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+        if (unformat (input, "brief") || unformat (input, "summary")
+            || unformat (input, "sum"))
+            verbose = 0;
+
+        else if (unformat (input, "%U %U",
+                           unformat_ip6_address, &src,
+                           unformat_ip6_address, &grp))
+        {
+            matching = 1;
+            mask = 64;
+        }
+        else if (unformat (input, "%U", unformat_ip6_address, &grp))
+        {
+            matching = 1;
+            mask = 32;
+        }
+        else if (unformat (input, "%U/%d",
+                           unformat_ip6_address, &grp, &mask))
+            matching = 1;
+        else if (unformat (input, "table %d", &table_id))
+            ;
+        else if (unformat (input, "index %d", &fib_index))
+            ;
+        else
+            break;
+    }
+
+    pool_foreach (mfib_table, im4->mfibs,
+    ({
+        ip6_mfib_t *mfib = &mfib_table->v6;
+
+        if (table_id >= 0 && table_id != (int)mfib->table_id)
+            continue;
+        if (fib_index != ~0 && fib_index != (int)mfib->index)
+            continue;
+
+        vlib_cli_output (vm, "%U, fib_index %d",
+                         format_mfib_table_name, mfib->index, FIB_PROTOCOL_IP6,
+                         mfib->index);
+
+        /* Show summary? */
+        if (! verbose)
+        {
+            /* vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); */
+            /* for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++) */
+            /* { */
+            /*     uword * hash = mfib->fib_entry_by_dst_address[i]; */
+            /*     uword n_elts = hash_elts (hash); */
+            /*     if (n_elts > 0) */
+            /*         vlib_cli_output (vm, "%20d%16d", i, n_elts); */
+            /* } */
+            continue;
+        }
+
+        if (!matching)
+        {
+            ip6_mfib_table_show_all(mfib, vm);
+        }
+        else
+        {
+            ip6_mfib_table_show_one(mfib, vm, &src, &grp, mask);
+        }
+    }));
+
+    return 0;
+}
+
+/*
+ * This command displays the IPv4 MulticasrFIB Tables (VRF Tables) and
+ * the route entries for each table.
+ *
+ * @note This command will run for a long time when the FIB tables are
+ * comprised of millions of entries. For those senarios, consider displaying
+ * a single table or summary mode.
+ *
+ * @cliexpar
+ * Example of how to display all the IPv4 Multicast FIB tables:
+ * @cliexstart{show ip fib}
+ * ipv4-VRF:0, fib_index 0
+ * (*, 0.0.0.0/0):  flags:D,
+ *  Interfaces:
+ *  multicast-ip6-chain
+ *   [@1]: dpo-drop ip6
+ * (*, 232.1.1.1/32):
+ * Interfaces:
+ *  test-eth1: Forward,
+ *  test-eth2: Forward,
+ *  test-eth0: Accept,
+ * multicast-ip6-chain
+ * [@2]: dpo-replicate: [index:1 buckets:2 to:[0:0]]
+ *   [0] [@1]: ipv4-mcast: test-eth1: IP6: d0:d1:d2:d3:d4:01 -> 01:00:05:00:00:00
+ *   [1] [@1]: ipv4-mcast: test-eth2: IP6: d0:d1:d2:d3:d4:02 -> 01:00:05:00:00:00
+ *
+ * @cliexend
+ * Example of how to display a summary of all IPv4 FIB tables:
+ * @cliexstart{show ip fib summary}
+ * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ *     Prefix length         Count
+ *                    0               1
+ *                    8               2
+ *                   32               4
+ * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto
+ *     Prefix length         Count
+ *                    0               1
+ *                    8               2
+ *                   24               2
+ *                   32               4
+ * @cliexend
+ */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
+    .path = "show ip6 mfib",
+    .short_help = "show ip mfib [summary] [table <table-id>] [index <fib-id>] [<grp-addr>[/<mask>]] [<grp-addr>] [<src-addr> <grp-addr>]",
+    .function = ip6_show_mfib,
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/mfib/ip6_mfib.h b/src/vnet/mfib/ip6_mfib.h
new file mode 100644
index 00000000..d91af46d
--- /dev/null
+++ b/src/vnet/mfib/ip6_mfib.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The IPv4 Multicast-FIB
+ *
+ * FIXME
+ *
+ * This IPv4 FIB is used by the protocol independent FIB. So directly using
+ * this APIs in client code is not encouraged. However, this IPv4 FIB can be
+ * used if all the client wants is an IPv4 prefix data-base
+ */
+
+#ifndef __IP6_MFIB_H__
+#define __IP6_MFIB_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/mfib/mfib_table.h>
+
+extern fib_node_index_t ip6_mfib_table_lookup(const ip6_mfib_t *fib,
+                                              const ip6_address_t *src,
+                                              const ip6_address_t *grp,
+                                              u32 len);
+extern fib_node_index_t ip6_mfib_table_lookup_exact_match(const ip6_mfib_t *fib,
+                                                          const ip6_address_t *grp,
+                                                          const ip6_address_t *src,
+                                                          u32 len);
+
+extern void ip6_mfib_table_entry_remove(ip6_mfib_t *fib,
+                                        const ip6_address_t *grp,
+                                        const ip6_address_t *src,
+                                        u32 len);
+
+extern void ip6_mfib_table_entry_insert(ip6_mfib_t *fib,
+                                        const ip6_address_t *grp,
+                                        const ip6_address_t *src,
+                                        u32 len,
+                                        fib_node_index_t fib_entry_index);
+extern void ip6_mfib_table_destroy(ip6_mfib_t *fib);
+
+/**
+ * @brief
+ *  Add/remove the interface from the accepting list of the special MFIB entries
+ */
+extern void ip6_mfib_interface_enable_disable(u32 sw_if_index,
+                                              int is_enable);
+
+/**
+ * @brief Get the FIB at the given index
+ */
+static inline ip6_mfib_t *
+ip6_mfib_get (u32 index)
+{
+    return (&(pool_elt_at_index(ip6_main.mfibs, index)->v6));
+}
+
+/**
+ * @brief Get or create an IPv4 fib.
+ *
+ * Get or create an IPv4 fib with the provided table ID.
+ *
+ * @param table_id
+ *      When set to \c ~0, an arbitrary and unused fib ID is picked
+ *      and can be retrieved with \c ret->table_id.
+ *      Otherwise, the fib ID to be used to retrieve or create the desired fib.
+ * @returns A pointer to the retrieved or created fib.
+ *
+ */
+extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id);
+extern u32 ip6_mfib_table_create_and_lock(void);
+
+
+static inline
+u32 ip6_mfib_index_from_table_id (u32 table_id)
+{
+  ip6_main_t * im = &ip6_main;
+  uword * p;
+
+  p = hash_get (im->mfib_index_by_table_id, table_id);
+  if (!p)
+    return ~0;
+
+  return p[0];
+}
+
+extern u32 ip6_mfib_table_get_index_for_sw_if_index(u32 sw_if_index);
+
+/**
+ * @brief Data-plane lookup function
+ */
+extern fib_node_index_t ip6_mfib_table_lookup2(const ip6_mfib_t *mfib,
+                                               const ip6_address_t *src,
+                                               const ip6_address_t *grp);
+
+#endif
+
diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c
new file mode 100644
index 00000000..479ce5f1
--- /dev/null
+++ b/src/vnet/mfib/mfib_entry.c
@@ -0,0 +1,1096 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/fib/fib_path_list.h>
+
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+/**
+ * Debug macro
+ */
+#ifdef MFIB_DEBUG
+#DEFIne MFIB_ENTRY_DBG(_e, _fmt, _args...)		\
+{                                                       \
+    u8*__tmp = NULL;					\
+    __tmp = format(__tmp, "e:[%d:%U",                   \
+                   mfib_entry_get_index(_e),		\
+                   format_ip46_address,			\
+                   &_e->mfe_prefix.fp_grp_addr,		\
+                   IP46_TYPE_ANY);			\
+    __tmp = format(__tmp, "/%d,",			\
+                   _e->mfe_prefix.fp_len);		\
+    __tmp = format(__tmp, "%U]",                        \
+                   mfib_entry_get_index(_e),		\
+                   format_ip46_address,			\
+                   &_e->mfe_prefix.fp_src_addr,		\
+                   IP46_TYPE_ANY);			\
+    __tmp = format(__tmp, _fmt, ##_args);		\
+    clib_warning("%s", __tmp);				\
+    vec_free(__tmp);					\
+}
+#else
+#define MFIB_ENTRY_DBG(_e, _fmt, _args...)
+#endif
+
+/**
+ * The source of an MFIB entry
+ */
+typedef struct mfib_entry_src_t_
+{
+    /**
+     * Which source this is
+     */
+    mfib_source_t mfes_src;
+
+    /**
+     * The path-list of forwarding interfaces
+     */
+    fib_node_index_t mfes_pl;
+
+    /**
+     * Route flags
+     */
+    mfib_entry_flags_t mfes_flags;
+
+    /**
+     * The hash table of all interfaces
+     */
+    mfib_itf_t *mfes_itfs;
+} mfib_entry_src_t;
+
+/**
+ * String names for each source
+ */
+static const char *mfib_source_names[] = MFIB_SOURCE_NAMES;
+
+/*
+ * Pool for all fib_entries
+ */
+mfib_entry_t *mfib_entry_pool;
+
+static fib_node_t *
+mfib_entry_get_node (fib_node_index_t index)
+{
+    return ((fib_node_t*)mfib_entry_get(index));
+}
+
+static fib_protocol_t
+mfib_entry_get_proto (const mfib_entry_t * mfib_entry)
+{
+    return (mfib_entry->mfe_prefix.fp_proto);
+}
+
+fib_forward_chain_type_t
+mfib_entry_get_default_chain_type (const mfib_entry_t *mfib_entry)
+{
+    switch (mfib_entry->mfe_prefix.fp_proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        return (FIB_FORW_CHAIN_TYPE_MCAST_IP4);
+    case FIB_PROTOCOL_IP6:
+        return (FIB_FORW_CHAIN_TYPE_MCAST_IP6);
+    case FIB_PROTOCOL_MPLS:
+        ASSERT(0);
+        break;
+    }
+    return (FIB_FORW_CHAIN_TYPE_MCAST_IP4);
+}
+
+static u8 *
+format_mfib_entry_dpo (u8 * s, va_list * args)
+{
+    index_t fei = va_arg(*args, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*args, u32);
+
+    return (format(s, "%U",
+                   format_mfib_entry, fei,
+                   MFIB_ENTRY_FORMAT_BRIEF));
+}
+
+u8 *
+format_mfib_entry (u8 * s, va_list * args)
+{
+    fib_node_index_t fei, mfi;
+    mfib_entry_t *mfib_entry;
+    mfib_entry_src_t *msrc;
+    u32 sw_if_index;
+    int level;
+
+    fei = va_arg (*args, fib_node_index_t);
+    level = va_arg (*args, int);
+    mfib_entry = mfib_entry_get(fei);
+
+    s = format (s, "%U", format_mfib_prefix, &mfib_entry->mfe_prefix);
+    s = format (s, ": %U", format_mfib_entry_flags, mfib_entry->mfe_flags);
+
+    if (level >= MFIB_ENTRY_FORMAT_DETAIL)
+    {
+        s = format (s, "\n");
+        s = format (s, " fib:%d", mfib_entry->mfe_fib_index);
+        s = format (s, " index:%d", mfib_entry_get_index(mfib_entry));
+        s = format (s, " locks:%d\n", mfib_entry->mfe_node.fn_locks);
+        vec_foreach(msrc, mfib_entry->mfe_srcs)
+        {
+            s = format (s, "  src:%s", mfib_source_names[msrc->mfes_src]);
+            s = format (s, ": %U\n", format_mfib_entry_flags, msrc->mfes_flags);
+            if (FIB_NODE_INDEX_INVALID != msrc->mfes_pl)
+            {
+                s = fib_path_list_format(msrc->mfes_pl, s);
+            }
+            hash_foreach(sw_if_index, mfi, msrc->mfes_itfs,
+            ({
+                s = format(s, "    %U\n", format_mfib_itf, mfi);
+            }));
+        }
+    }
+
+    s = format(s, "\n  Interfaces:");
+    hash_foreach(sw_if_index, mfi, mfib_entry->mfe_itfs,
+    ({
+        s = format(s, "\n  %U", format_mfib_itf, mfi);
+    }));
+
+    s = format(s, "\n  %U-chain\n  %U",
+               format_fib_forw_chain_type,
+               mfib_entry_get_default_chain_type(mfib_entry),
+               format_dpo_id,
+               &mfib_entry->mfe_rep,
+               2);
+    s = format(s, "\n");
+
+    if (level >= MFIB_ENTRY_FORMAT_DETAIL2)
+    {
+        s = format(s, "\nchildren:");
+        s = fib_node_children_format(mfib_entry->mfe_node.fn_children, s);
+    }
+
+    return (s);
+}
+
+static mfib_entry_t*
+mfib_entry_from_fib_node (fib_node_t *node)
+{
+#if CLIB_DEBUG > 0
+    ASSERT(FIB_NODE_TYPE_MFIB_ENTRY == node->fn_type);
+#endif
+    return ((mfib_entry_t*)node);
+}
+
+static int
+mfib_entry_src_cmp_for_sort (void * v1,
+                             void * v2)
+{
+    mfib_entry_src_t *esrc1 = v1, *esrc2 = v2;
+
+    return (esrc1->mfes_src - esrc2->mfes_src);
+}
+
+static void
+mfib_entry_src_init (mfib_entry_t *mfib_entry,
+                     mfib_source_t source)
+
+{
+    mfib_entry_src_t esrc = {
+        .mfes_pl = FIB_NODE_INDEX_INVALID,
+        .mfes_flags = MFIB_ENTRY_FLAG_NONE,
+        .mfes_src = source,
+    };
+
+    vec_add1(mfib_entry->mfe_srcs, esrc);
+    vec_sort_with_function(mfib_entry->mfe_srcs,
+                           mfib_entry_src_cmp_for_sort);
+}
+
+static mfib_entry_src_t *
+mfib_entry_src_find (const mfib_entry_t *mfib_entry,
+                    mfib_source_t source,
+                    u32 *index)
+
+{
+    mfib_entry_src_t *esrc;
+    int ii;
+
+    ii = 0;
+    vec_foreach(esrc, mfib_entry->mfe_srcs)
+    {
+        if (esrc->mfes_src == source)
+        {
+            if (NULL != index)
+            {
+                *index = ii;
+            }
+            return (esrc);
+        }
+        else
+        {
+            ii++;
+        }
+    }
+
+    return (NULL);
+}
+
+static mfib_entry_src_t *
+mfib_entry_src_find_or_create (mfib_entry_t *mfib_entry,
+                              mfib_source_t source)
+{
+    mfib_entry_src_t *esrc;
+
+    esrc = mfib_entry_src_find(mfib_entry, source, NULL);
+
+    if (NULL == esrc)
+    {
+        mfib_entry_src_init(mfib_entry, source);
+    }
+
+    return (mfib_entry_src_find(mfib_entry, source, NULL));
+}
+
+static mfib_entry_src_t*
+mfib_entry_get_best_src (const mfib_entry_t *mfib_entry)
+{
+    mfib_entry_src_t *bsrc;
+
+    /*
+     * the enum of sources is deliberately arranged in priority order
+     */
+    if (0 == vec_len(mfib_entry->mfe_srcs))
+    {
+        bsrc = NULL;
+    }
+    else
+    {
+        bsrc = vec_elt_at_index(mfib_entry->mfe_srcs, 0);
+    }
+
+    return (bsrc);
+}
+
+static void
+mfib_entry_src_flush (mfib_entry_src_t *msrc)
+{
+    u32 sw_if_index;
+    index_t mfii;
+
+    hash_foreach(sw_if_index, mfii, msrc->mfes_itfs,
+    ({
+        mfib_itf_delete(mfib_itf_get(mfii));
+    }));
+}
+
+static void
+mfib_entry_src_remove (mfib_entry_t *mfib_entry,
+                       mfib_source_t source)
+
+{
+    mfib_entry_src_t *msrc;
+    u32 index = ~0;
+
+    msrc = mfib_entry_src_find(mfib_entry, source, &index);
+
+    if (NULL != msrc)
+    {
+        mfib_entry_src_flush(msrc);
+        vec_del1(mfib_entry->mfe_srcs, index);
+    }
+}
+
+static int
+mfib_entry_src_n_itfs (const mfib_entry_src_t *msrc)
+{
+    return (hash_elts(msrc->mfes_itfs));
+}
+
+
+static void
+mfib_entry_last_lock_gone (fib_node_t *node)
+{
+    mfib_entry_t *mfib_entry;
+    mfib_entry_src_t *msrc;
+
+    mfib_entry = mfib_entry_from_fib_node(node);
+
+    dpo_reset(&mfib_entry->mfe_rep);
+
+    MFIB_ENTRY_DBG(mfib_entry, "last-lock");
+
+    vec_foreach(msrc, mfib_entry->mfe_srcs)
+    {
+        mfib_entry_src_flush(msrc);
+    }
+
+    fib_path_list_unlock(mfib_entry->mfe_parent);
+    vec_free(mfib_entry->mfe_srcs);
+
+    fib_node_deinit(&mfib_entry->mfe_node);
+    pool_put(mfib_entry_pool, mfib_entry);
+}
+
+/*
+ * mfib_entry_back_walk_notify
+ *
+ * A back walk has reach this entry.
+ */
+static fib_node_back_walk_rc_t
+mfib_entry_back_walk_notify (fib_node_t *node,
+                            fib_node_back_walk_ctx_t *ctx)
+{
+    // FIXME - re-evalute
+
+    return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+static void
+mfib_entry_show_memory (void)
+{
+    fib_show_memory_usage("multicast-Entry",
+                          pool_elts(mfib_entry_pool),
+                          pool_len(mfib_entry_pool),
+                          sizeof(mfib_entry_t));
+}
+
+/*
+ * The MFIB entry's graph node virtual function table
+ */
+static const fib_node_vft_t mfib_entry_vft = {
+    .fnv_get = mfib_entry_get_node,
+    .fnv_last_lock = mfib_entry_last_lock_gone,
+    .fnv_back_walk = mfib_entry_back_walk_notify,
+    .fnv_mem_show = mfib_entry_show_memory,
+};
+
+u32
+mfib_entry_child_add (fib_node_index_t mfib_entry_index,
+                      fib_node_type_t child_type,
+                      fib_node_index_t child_index)
+{
+    return (fib_node_child_add(FIB_NODE_TYPE_MFIB_ENTRY,
+                               mfib_entry_index,
+                               child_type,
+                               child_index));
+};
+
+void
+mfib_entry_child_remove (fib_node_index_t mfib_entry_index,
+                         u32 sibling_index)
+{
+    fib_node_child_remove(FIB_NODE_TYPE_MFIB_ENTRY,
+                          mfib_entry_index,
+                          sibling_index);
+}
+
+static mfib_entry_t *
+mfib_entry_alloc (u32 fib_index,
+                  const mfib_prefix_t *prefix,
+                  fib_node_index_t *mfib_entry_index)
+{
+    mfib_entry_t *mfib_entry;
+
+    pool_get(mfib_entry_pool, mfib_entry);
+    memset(mfib_entry, 0, sizeof(*mfib_entry));
+
+    fib_node_init(&mfib_entry->mfe_node,
+                  FIB_NODE_TYPE_MFIB_ENTRY);
+
+    mfib_entry->mfe_fib_index = fib_index;
+    mfib_entry->mfe_prefix = *prefix;
+    mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID;
+
+    dpo_reset(&mfib_entry->mfe_rep);
+
+    *mfib_entry_index = mfib_entry_get_index(mfib_entry);
+
+    MFIB_ENTRY_DBG(mfib_entry, "alloc");
+
+    return (mfib_entry);
+}
+
+typedef struct mfib_entry_collect_forwarding_ctx_t_
+{
+    load_balance_path_t * next_hops;
+    fib_forward_chain_type_t fct;
+} mfib_entry_collect_forwarding_ctx_t;
+
+static int
+mfib_entry_src_collect_forwarding (fib_node_index_t pl_index,
+                                   fib_node_index_t path_index,
+                                   void *arg)
+{
+    mfib_entry_collect_forwarding_ctx_t *ctx;
+    load_balance_path_t *nh;
+
+    ctx = arg;
+
+    /*
+     * if the path is not resolved, don't include it.
+     */
+    if (!fib_path_is_resolved(path_index))
+    {
+        return (!0);
+    }
+
+    switch (ctx->fct)
+    {
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+        /*
+         * EOS traffic with no label to stack, we need the IP Adj
+         */
+        vec_add2(ctx->next_hops, nh, 1);
+
+        nh->path_index = path_index;
+        nh->path_weight = fib_path_get_weight(path_index);
+        fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+        break;
+
+    case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+    case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+    case FIB_FORW_CHAIN_TYPE_ETHERNET:
+        ASSERT(0);
+        break;
+    }
+
+    return (!0);
+}
+
+static void
+mfib_entry_stack (mfib_entry_t *mfib_entry)
+{
+    mfib_entry_collect_forwarding_ctx_t ctx = {
+        .next_hops = NULL,
+        .fct = mfib_entry_get_default_chain_type(mfib_entry),
+    };
+    dpo_proto_t dp;
+
+    dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry));
+
+    if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent)
+    {
+        fib_path_list_walk(mfib_entry->mfe_parent,
+                           mfib_entry_src_collect_forwarding,
+                           &ctx);
+
+        if (!dpo_id_is_valid(&mfib_entry->mfe_rep) ||
+            dpo_is_drop(&mfib_entry->mfe_rep))
+        {
+            dpo_id_t tmp_dpo = DPO_INVALID;
+
+            dpo_set(&tmp_dpo,
+                    DPO_REPLICATE, dp,
+                    replicate_create(0, dp));
+
+            dpo_stack(DPO_MFIB_ENTRY, dp,
+                      &mfib_entry->mfe_rep,
+                      &tmp_dpo);
+
+            dpo_reset(&tmp_dpo);
+        }
+        replicate_multipath_update(&mfib_entry->mfe_rep,
+                                   ctx.next_hops);
+    }
+    else
+    {
+        dpo_stack(DPO_MFIB_ENTRY, dp,
+                  &mfib_entry->mfe_rep,
+                  drop_dpo_get(dp));
+    }
+}
+
+static void
+mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc,
+                                const fib_route_path_t *rpath)
+{
+    fib_node_index_t old_pl_index;
+    fib_route_path_t *rpaths;
+
+    /*
+     * path-lists require a vector of paths
+     */
+    rpaths = NULL;
+    vec_add1(rpaths, rpath[0]);
+
+    old_pl_index = msrc->mfes_pl;
+
+    if (FIB_NODE_INDEX_INVALID == msrc->mfes_pl)
+    {
+        msrc->mfes_pl =
+            fib_path_list_create(FIB_PATH_LIST_FLAG_NO_URPF,
+                                 rpaths);
+    }
+    else
+    {
+        msrc->mfes_pl =
+            fib_path_list_copy_and_path_add(msrc->mfes_pl,
+                                            FIB_PATH_LIST_FLAG_NO_URPF,
+                                            rpaths);
+    }
+    fib_path_list_lock(msrc->mfes_pl);
+    fib_path_list_unlock(old_pl_index);
+
+    vec_free(rpaths);
+}
+
+static int
+mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc,
+                                   const fib_route_path_t *rpath)
+{
+    fib_node_index_t old_pl_index;
+    fib_route_path_t *rpaths;
+
+    /*
+     * path-lists require a vector of paths
+     */
+    rpaths = NULL;
+    vec_add1(rpaths, rpath[0]);
+
+    old_pl_index = msrc->mfes_pl;
+
+    msrc->mfes_pl =
+        fib_path_list_copy_and_path_remove(msrc->mfes_pl,
+                                           FIB_PATH_LIST_FLAG_NONE,
+                                           rpaths);
+
+    fib_path_list_lock(msrc->mfes_pl);
+    fib_path_list_unlock(old_pl_index);
+
+    vec_free(rpaths);
+
+    return (FIB_NODE_INDEX_INVALID != msrc->mfes_pl);
+}
+
+static void
+mfib_entry_recalculate_forwarding (mfib_entry_t *mfib_entry)
+{
+    fib_node_index_t old_pl_index;
+    mfib_entry_src_t *bsrc;
+
+    old_pl_index = mfib_entry->mfe_parent;
+
+    /*
+     * copy the forwarding data from the bast source
+     */
+    bsrc = mfib_entry_get_best_src(mfib_entry);
+
+    if (NULL == bsrc)
+    {
+        mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID;
+    }
+    else
+    {
+        mfib_entry->mfe_parent = bsrc->mfes_pl;
+        mfib_entry->mfe_flags = bsrc->mfes_flags;
+        mfib_entry->mfe_itfs = bsrc->mfes_itfs;
+    }
+
+    /*
+     * re-stack the entry on the best forwarding info.
+     */
+    if (old_pl_index != mfib_entry->mfe_parent ||
+        FIB_NODE_INDEX_INVALID == old_pl_index)
+    {
+        mfib_entry_stack(mfib_entry);
+
+        fib_path_list_lock(mfib_entry->mfe_parent);
+        fib_path_list_unlock(old_pl_index);
+    }
+}
+
+
+fib_node_index_t
+mfib_entry_create (u32 fib_index,
+                   mfib_source_t source,
+                   const mfib_prefix_t *prefix,
+                   mfib_entry_flags_t entry_flags)
+{
+    fib_node_index_t mfib_entry_index;
+    mfib_entry_t *mfib_entry;
+    mfib_entry_src_t *msrc;
+
+    mfib_entry = mfib_entry_alloc(fib_index, prefix,
+                                  &mfib_entry_index);
+    msrc = mfib_entry_src_find_or_create(mfib_entry, source);
+    msrc->mfes_flags = entry_flags;
+
+    mfib_entry_recalculate_forwarding(mfib_entry);
+
+    return (mfib_entry_index);
+}
+
+static int
+mfib_entry_ok_for_delete (mfib_entry_t *mfib_entry)
+{
+    return (0 == vec_len(mfib_entry->mfe_srcs));
+}
+
+static int
+mfib_entry_src_ok_for_delete (const mfib_entry_src_t *msrc)
+{
+    return ((MFIB_ENTRY_FLAG_NONE == msrc->mfes_flags &&
+             0 == mfib_entry_src_n_itfs(msrc)));
+}
+
+int
+mfib_entry_update (fib_node_index_t mfib_entry_index,
+                   mfib_source_t source,
+                   mfib_entry_flags_t entry_flags)
+{
+    mfib_entry_t *mfib_entry;
+    mfib_entry_src_t *msrc;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+    msrc = mfib_entry_src_find_or_create(mfib_entry, source);
+    msrc->mfes_flags = entry_flags;
+
+    if (mfib_entry_src_ok_for_delete(msrc))
+    {
+        /*
+         * this source has no interfaces and no flags.
+         * it has nothing left to give - remove it
+         */
+        mfib_entry_src_remove(mfib_entry, source);
+    }
+
+    mfib_entry_recalculate_forwarding(mfib_entry);
+
+    return (mfib_entry_ok_for_delete(mfib_entry));
+}
+
+static void
+mfib_entry_itf_add (mfib_entry_src_t *msrc,
+                    u32 sw_if_index,
+                    index_t mi)
+{
+    hash_set(msrc->mfes_itfs, sw_if_index, mi);
+}
+
+static void
+mfib_entry_itf_remove (mfib_entry_src_t *msrc,
+                       u32 sw_if_index)
+{
+    mfib_itf_t *mfi;
+
+    mfi = mfib_entry_itf_find(msrc->mfes_itfs, sw_if_index);
+
+    mfib_itf_delete(mfi);
+
+    hash_unset(msrc->mfes_itfs, sw_if_index);
+}
+
+void
+mfib_entry_path_update (fib_node_index_t mfib_entry_index,
+                        mfib_source_t source,
+                        const fib_route_path_t *rpath,
+                        mfib_itf_flags_t itf_flags)
+{
+    mfib_entry_t *mfib_entry;
+    mfib_entry_src_t *msrc;
+    mfib_itf_t *mfib_itf;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+    ASSERT(NULL != mfib_entry);
+    msrc = mfib_entry_src_find_or_create(mfib_entry, source);
+
+    /*
+     * search for the interface in the current set
+     */
+    mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs,
+                                   rpath[0].frp_sw_if_index);
+
+    if (NULL == mfib_itf)
+    {
+        /*
+         * this is a path we do not yet have. If it is forwarding then we
+         * add it to the replication set
+         */
+        if (itf_flags & MFIB_ITF_FLAG_FORWARD)
+        {
+            mfib_entry_forwarding_path_add(msrc, rpath);
+        }
+        /*
+         * construct a new ITF for this entry's list
+         */
+        mfib_entry_itf_add(msrc,
+                           rpath[0].frp_sw_if_index,
+                           mfib_itf_create(rpath[0].frp_sw_if_index,
+                                           itf_flags));
+    }
+    else
+    {
+        int was_forwarding = !!(mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD);
+        int is_forwarding  = !!(itf_flags & MFIB_ITF_FLAG_FORWARD);
+
+        if (!was_forwarding && is_forwarding)
+        {
+            mfib_entry_forwarding_path_add(msrc, rpath);
+        }
+        else if (was_forwarding && !is_forwarding)
+        {
+            mfib_entry_forwarding_path_remove(msrc, rpath);
+        }
+        /*
+         * packets in flight see these updates.
+         */
+        mfib_itf->mfi_flags = itf_flags;
+    }
+
+    mfib_entry_recalculate_forwarding(mfib_entry);
+}
+
+/*
+ * mfib_entry_path_remove
+ *
+ * remove a path from the entry.
+ * return the mfib_entry's index if it is still present, INVALID otherwise.
+ */
+int
+mfib_entry_path_remove (fib_node_index_t mfib_entry_index,
+                        mfib_source_t source,
+                        const fib_route_path_t *rpath)
+{
+    mfib_entry_t *mfib_entry;
+    mfib_entry_src_t *msrc;
+    mfib_itf_t *mfib_itf;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+    ASSERT(NULL != mfib_entry);
+    msrc = mfib_entry_src_find(mfib_entry, source, NULL);
+
+    if (NULL == msrc)
+    {
+        /*
+         * there are no paths left for this source
+         */
+        return (mfib_entry_ok_for_delete(mfib_entry));
+    }
+
+    /*
+     * search for the interface in the current set
+     */
+    mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs,
+                                   rpath[0].frp_sw_if_index);
+
+    if (NULL == mfib_itf)
+    {
+        /*
+         * removing a path that does not exist
+         */
+        return (mfib_entry_ok_for_delete(mfib_entry));
+    }
+
+    /*
+     * we have this path. If it is forwarding then we
+     * remove it to the replication set
+     */
+    if (mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD)
+    {
+        mfib_entry_forwarding_path_remove(msrc, rpath);
+    }
+
+    /*
+     * remove the interface/path from this entry's list
+     */
+    mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index);
+
+    if (mfib_entry_src_ok_for_delete(msrc))
+    {
+        /*
+         * this source has no interfaces and no flags.
+         * it has nothing left to give - remove it
+         */
+        mfib_entry_src_remove(mfib_entry, source);
+    }
+
+    mfib_entry_recalculate_forwarding(mfib_entry);
+
+    return (mfib_entry_ok_for_delete(mfib_entry));
+}
+
+/**
+ * mfib_entry_delete
+ *
+ * The source is withdrawing all the paths it provided
+ */
+int
+mfib_entry_delete (fib_node_index_t mfib_entry_index,
+                   mfib_source_t source)
+{
+    mfib_entry_t *mfib_entry;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+    mfib_entry_src_remove(mfib_entry, source);
+
+    mfib_entry_recalculate_forwarding(mfib_entry);
+
+    return (mfib_entry_ok_for_delete(mfib_entry));
+}
+
+static int
+fib_ip4_address_compare (ip4_address_t * a1,
+                         ip4_address_t * a2)
+{
+    /*
+     * IP addresses are unsiged ints. the return value here needs to be signed
+     * a simple subtraction won't cut it.
+     * If the addresses are the same, the sort order is undefiend, so phoey.
+     */
+    return ((clib_net_to_host_u32(a1->data_u32) >
+             clib_net_to_host_u32(a2->data_u32) ) ?
+            1 : -1);
+}
+
+static int
+fib_ip6_address_compare (ip6_address_t * a1,
+                         ip6_address_t * a2)
+{
+  int i;
+  for (i = 0; i < ARRAY_LEN (a1->as_u16); i++)
+  {
+      int cmp = (clib_net_to_host_u16 (a1->as_u16[i]) -
+                 clib_net_to_host_u16 (a2->as_u16[i]));
+      if (cmp != 0)
+          return cmp;
+  }
+  return 0;
+}
+
+static int
+mfib_entry_cmp (fib_node_index_t mfib_entry_index1,
+                fib_node_index_t mfib_entry_index2)
+{
+    mfib_entry_t *mfib_entry1, *mfib_entry2;
+    int cmp = 0;
+
+    mfib_entry1 = mfib_entry_get(mfib_entry_index1);
+    mfib_entry2 = mfib_entry_get(mfib_entry_index2);
+
+    switch (mfib_entry1->mfe_prefix.fp_proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        cmp = fib_ip4_address_compare(&mfib_entry1->mfe_prefix.fp_grp_addr.ip4,
+                                      &mfib_entry2->mfe_prefix.fp_grp_addr.ip4);
+
+        if (0 == cmp)
+        {
+            cmp = fib_ip4_address_compare(&mfib_entry1->mfe_prefix.fp_src_addr.ip4,
+                                          &mfib_entry2->mfe_prefix.fp_src_addr.ip4);
+        }
+        break;
+    case FIB_PROTOCOL_IP6:
+        cmp = fib_ip6_address_compare(&mfib_entry1->mfe_prefix.fp_grp_addr.ip6,
+                                      &mfib_entry2->mfe_prefix.fp_grp_addr.ip6);
+
+        if (0 == cmp)
+        {
+            cmp = fib_ip6_address_compare(&mfib_entry1->mfe_prefix.fp_src_addr.ip6,
+                                          &mfib_entry2->mfe_prefix.fp_src_addr.ip6);
+        }
+        break;
+    case FIB_PROTOCOL_MPLS:
+        ASSERT(0);
+        cmp = 0;
+        break;
+    }
+
+    if (0 == cmp) {
+        cmp = (mfib_entry1->mfe_prefix.fp_len - mfib_entry2->mfe_prefix.fp_len);
+    }
+    return (cmp);
+}
+
+int
+mfib_entry_cmp_for_sort (void *i1, void *i2)
+{
+    fib_node_index_t *mfib_entry_index1 = i1, *mfib_entry_index2 = i2;
+
+    return (mfib_entry_cmp(*mfib_entry_index1,
+                           *mfib_entry_index2));
+}
+
+void
+mfib_entry_lock (fib_node_index_t mfib_entry_index)
+{
+    mfib_entry_t *mfib_entry;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+
+    fib_node_lock(&mfib_entry->mfe_node);
+}
+
+void
+mfib_entry_unlock (fib_node_index_t mfib_entry_index)
+{
+    mfib_entry_t *mfib_entry;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+
+    fib_node_unlock(&mfib_entry->mfe_node);
+}
+
+static void
+mfib_entry_dpo_lock (dpo_id_t *dpo)
+{
+}
+static void
+mfib_entry_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+const static dpo_vft_t mfib_entry_dpo_vft = {
+    .dv_lock = mfib_entry_dpo_lock,
+    .dv_unlock = mfib_entry_dpo_unlock,
+    .dv_format = format_mfib_entry_dpo,
+    .dv_mem_show = mfib_entry_show_memory,
+};
+
+const static char* const mfib_entry_ip4_nodes[] =
+{
+    "ip4-mfib-forward-rpf",
+    NULL,
+};
+const static char* const mfib_entry_ip6_nodes[] =
+{
+    "ip6-mfib-forward-rpf",
+    NULL,
+};
+
+const static char* const * const mfib_entry_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = mfib_entry_ip4_nodes,
+    [DPO_PROTO_IP6]  = mfib_entry_ip6_nodes,
+};
+
+void
+mfib_entry_module_init (void)
+{
+    fib_node_register_type (FIB_NODE_TYPE_MFIB_ENTRY, &mfib_entry_vft);
+    dpo_register(DPO_MFIB_ENTRY, &mfib_entry_dpo_vft, mfib_entry_nodes);
+}
+
+void
+mfib_entry_encode (fib_node_index_t mfib_entry_index,
+                  fib_route_path_encode_t **api_rpaths)
+{
+    mfib_entry_t *mfib_entry;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+    fib_path_list_walk(mfib_entry->mfe_parent, fib_path_encode, api_rpaths);
+}
+
+void
+mfib_entry_get_prefix (fib_node_index_t mfib_entry_index,
+                      mfib_prefix_t *pfx)
+{
+    mfib_entry_t *mfib_entry;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+    *pfx = mfib_entry->mfe_prefix;
+}
+
+u32
+mfib_entry_get_fib_index (fib_node_index_t mfib_entry_index)
+{
+    mfib_entry_t *mfib_entry;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+
+    return (mfib_entry->mfe_fib_index);
+}
+
+void
+mfib_entry_contribute_forwarding (fib_node_index_t mfib_entry_index,
+                                  fib_forward_chain_type_t type,
+                                  dpo_id_t *dpo)
+{
+    /*
+     * An IP mFIB entry can only provide a forwarding chain that
+     * is the same IP proto as the prefix.
+     * No use-cases (i know of) for other combinations.
+     */
+    mfib_entry_t *mfib_entry;
+    dpo_proto_t dp;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+
+    dp = fib_proto_to_dpo(mfib_entry->mfe_prefix.fp_proto);
+
+    if (type == fib_forw_chain_type_from_dpo_proto(dp))
+    {
+        dpo_copy(dpo, &mfib_entry->mfe_rep);
+    }
+    else
+    {
+        dpo_copy(dpo, drop_dpo_get(dp));
+    }
+}
+
+u32
+mfib_entry_pool_size (void)
+{
+    return (pool_elts(mfib_entry_pool));
+}
+
+static clib_error_t *
+show_mfib_entry_command (vlib_main_t * vm,
+                        unformat_input_t * input,
+                        vlib_cli_command_t * cmd)
+{
+    fib_node_index_t fei;
+
+    if (unformat (input, "%d", &fei))
+    {
+        /*
+         * show one in detail
+         */
+        if (!pool_is_free_index(mfib_entry_pool, fei))
+        {
+            vlib_cli_output (vm, "%d@%U",
+                             fei,
+                             format_mfib_entry, fei,
+                             MFIB_ENTRY_FORMAT_DETAIL2);
+        }
+        else
+        {
+            vlib_cli_output (vm, "entry %d invalid", fei);
+        }
+    }
+    else
+    {
+        /*
+         * show all
+         */
+        vlib_cli_output (vm, "FIB Entries:");
+        pool_foreach_index(fei, mfib_entry_pool,
+        ({
+            vlib_cli_output (vm, "%d@%U",
+                             fei,
+                             format_mfib_entry, fei,
+                             MFIB_ENTRY_FORMAT_BRIEF);
+        }));
+    }
+
+    return (NULL);
+}
+
+VLIB_CLI_COMMAND (show_mfib_entry, static) = {
+  .path = "show mfib entry",
+  .function = show_mfib_entry_command,
+  .short_help = "show mfib entry",
+};
diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h
new file mode 100644
index 00000000..cc5d5326
--- /dev/null
+++ b/src/vnet/mfib/mfib_entry.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_ENTRY_H__
+#define __MFIB_ENTRY_H__
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/mfib/mfib_types.h>
+#include <vnet/mfib/mfib_itf.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * An entry in a FIB table.
+ *
+ * This entry represents a route added to the FIB that is stored
+ * in one of the FIB tables.
+ */
+typedef struct mfib_entry_t_ {
+    CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
+    /**
+     * Base class. The entry's node representation in the graph.
+     */
+    fib_node_t mfe_node;
+    /**
+     * The prefix of the route
+     */
+    mfib_prefix_t mfe_prefix;
+    /**
+     * The index of the FIB table this entry is in
+     */
+    u32 mfe_fib_index;
+    /**
+     * the path-list for which this entry is a child. This is also the path-list
+     * that is contributing forwarding for this entry.
+     */
+    fib_node_index_t mfe_parent;
+    /**
+     * index of this entry in the parent's child list.
+     * This is set when this entry is added as a child, but can also
+     * be changed by the parent as it manages its list.
+     */
+    u32 mfe_sibling;
+
+    /**
+     * A vector of sources contributing forwarding
+     */
+    struct mfib_entry_src_t_ *mfe_srcs;
+
+    /**
+     * 2nd cache line has the members used in the data plane
+     */
+    CLIB_CACHE_LINE_ALIGN_MARK(cacheline1);
+
+    /**
+     * The Replicate used for forwarding.
+     */
+    dpo_id_t mfe_rep;
+
+    /**
+     * Route flags
+     */
+    mfib_entry_flags_t mfe_flags;
+
+    /**
+     * A hash table of interfaces
+     */
+    mfib_itf_t *mfe_itfs;
+} mfib_entry_t;
+
+#define MFIB_ENTRY_FORMAT_BRIEF   (0x0)
+#define MFIB_ENTRY_FORMAT_DETAIL  (0x1)
+#define MFIB_ENTRY_FORMAT_DETAIL2 (0x2)
+
+extern u8 *format_mfib_entry(u8 * s, va_list * args);
+
+
+extern fib_node_index_t mfib_entry_create(u32 fib_index,
+                                          mfib_source_t source,
+                                          const mfib_prefix_t *prefix,
+                                          mfib_entry_flags_t entry_flags);
+
+extern int mfib_entry_update(fib_node_index_t fib_entry_index,
+                             mfib_source_t source,
+                             mfib_entry_flags_t entry_flags);
+
+extern void mfib_entry_path_update(fib_node_index_t fib_entry_index,
+                                   mfib_source_t source,
+                                   const fib_route_path_t *rpath,
+                                   mfib_itf_flags_t itf_flags);
+
+
+extern int mfib_entry_path_remove(fib_node_index_t fib_entry_index,
+                                  mfib_source_t source,
+                                  const fib_route_path_t *rpath);
+
+extern int mfib_entry_delete(fib_node_index_t mfib_entry_index,
+                             mfib_source_t source);
+
+extern int mfib_entry_cmp_for_sort(void *i1, void *i2);
+
+extern u32 mfib_entry_child_add(fib_node_index_t mfib_entry_index,
+                                fib_node_type_t type,
+                                fib_node_index_t child_index);
+extern void mfib_entry_child_remove(fib_node_index_t mfib_entry_index,
+                                    u32 sibling_index);
+
+extern void mfib_entry_lock(fib_node_index_t fib_entry_index);
+extern void mfib_entry_unlock(fib_node_index_t fib_entry_index);
+
+extern void mfib_entry_get_prefix(fib_node_index_t fib_entry_index,
+                                  mfib_prefix_t *pfx);
+extern u32 mfib_entry_get_fib_index(fib_node_index_t fib_entry_index);
+
+extern void mfib_entry_contribute_forwarding(
+    fib_node_index_t mfib_entry_index,
+    fib_forward_chain_type_t type,
+    dpo_id_t *dpo);
+
+extern void mfib_entry_module_init(void);
+
+
+extern mfib_entry_t *mfib_entry_pool;
+
+static inline mfib_entry_t *
+mfib_entry_get (fib_node_index_t index)
+{
+    return (pool_elt_at_index(mfib_entry_pool, index));
+}
+static inline fib_node_index_t
+mfib_entry_get_index (const mfib_entry_t *mfe)
+{
+    return (mfe - mfib_entry_pool);
+}
+
+
+static inline mfib_itf_t *
+mfib_entry_itf_find (mfib_itf_t *itfs,
+                     u32 sw_if_index)
+{
+    uword *p;
+
+    p = hash_get(itfs, sw_if_index);
+
+    if (NULL != p)
+    {
+        return (mfib_itf_get(p[0]));
+    }
+
+    return (NULL);
+}
+
+static inline mfib_itf_t *
+mfib_entry_get_itf (const mfib_entry_t *mfe,
+                    u32 sw_if_index)
+{
+    return (mfib_entry_itf_find(mfe->mfe_itfs, sw_if_index));
+}
+
+#endif
diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c
new file mode 100644
index 00000000..5fe0a57c
--- /dev/null
+++ b/src/vnet/mfib/mfib_forward.c
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mfib/mfib_itf.h>
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/mfib/ip4_mfib.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/mfib/mfib_signal.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+
+#include <vnet/ip/ip4.h>
+#include <vnet/vnet.h>
+
+typedef struct mfib_forward_lookup_trace_t_ {
+    u32 entry_index;
+    u32 fib_index;
+} mfib_forward_lookup_trace_t;
+
+static u8 *
+format_mfib_forward_lookup_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    mfib_forward_lookup_trace_t * t = va_arg (*args, mfib_forward_lookup_trace_t *);
+
+    s = format (s, "fib %d entry %d", t->fib_index, t->entry_index);
+    return s;
+}
+
+/* Common trace function for all ip4-forward next nodes. */
+void
+mfib_forward_lookup_trace (vlib_main_t * vm,
+                           vlib_node_runtime_t * node,
+                           vlib_frame_t * frame)
+{
+    u32 * from, n_left;
+    ip4_main_t * im = &ip4_main;
+
+    n_left = frame->n_vectors;
+    from = vlib_frame_vector_args (frame);
+
+    while (n_left >= 4)
+    {
+        mfib_forward_lookup_trace_t * t0, * t1;
+        vlib_buffer_t * b0, * b1;
+        u32 bi0, bi1;
+
+        /* Prefetch next iteration. */
+        vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+        vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+        bi0 = from[0];
+        bi1 = from[1];
+
+        b0 = vlib_get_buffer (vm, bi0);
+        b1 = vlib_get_buffer (vm, bi1);
+
+        if (b0->flags & VLIB_BUFFER_IS_TRACED)
+        {
+            t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+            t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+            t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index,
+                                     vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+        }
+        if (b1->flags & VLIB_BUFFER_IS_TRACED)
+        {
+            t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+            t1->entry_index = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+            t1->fib_index = vec_elt (im->mfib_index_by_sw_if_index,
+                                     vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+        }
+        from += 2;
+        n_left -= 2;
+    }
+
+    while (n_left >= 1)
+    {
+        mfib_forward_lookup_trace_t * t0;
+        vlib_buffer_t * b0;
+        u32 bi0;
+
+        bi0 = from[0];
+
+        b0 = vlib_get_buffer (vm, bi0);
+
+        if (b0->flags & VLIB_BUFFER_IS_TRACED)
+        {
+            t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+            t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+            t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index,
+                                     vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+        }
+        from += 1;
+        n_left -= 1;
+    }
+}
+
+typedef enum mfib_forward_lookup_next_t_ {
+    MFIB_FORWARD_LOOKUP_NEXT_RPF,
+    MFIB_FORWARD_LOOKUP_N_NEXT,
+} mfib_forward_lookup_next_t;
+
+static uword
+mfib_forward_lookup (vlib_main_t * vm,
+                     vlib_node_runtime_t * node,
+                     vlib_frame_t * frame,
+                     int is_v4)
+{
+    u32 n_left_from, n_left_to_next, * from, * to_next;
+
+    from = vlib_frame_vector_args (frame);
+    n_left_from = frame->n_vectors;
+
+    while (n_left_from > 0)
+    {
+        vlib_get_next_frame (vm, node, MFIB_FORWARD_LOOKUP_NEXT_RPF,
+                             to_next, n_left_to_next);
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            fib_node_index_t mfei0;
+            vlib_buffer_t * p0;
+            u32 fib_index0;
+            u32 pi0;
+
+            pi0 = from[0];
+            to_next[0] = pi0;
+            from += 1;
+            to_next += 1;
+            n_left_to_next -= 1;
+            n_left_from -= 1;
+
+            p0 = vlib_get_buffer (vm, pi0);
+
+            if (is_v4)
+            {
+                ip4_header_t * ip0;
+
+                fib_index0 = vec_elt (ip4_main.mfib_index_by_sw_if_index,
+                                      vnet_buffer(p0)->sw_if_index[VLIB_RX]);
+                ip0 = vlib_buffer_get_current (p0);
+                mfei0 = ip4_mfib_table_lookup(ip4_mfib_get(fib_index0),
+                                              &ip0->src_address,
+                                              &ip0->dst_address,
+                                              64);
+            }
+            else
+            {
+                ip6_header_t * ip0;
+
+                fib_index0 = vec_elt (ip6_main.mfib_index_by_sw_if_index,
+                                      vnet_buffer(p0)->sw_if_index[VLIB_RX]);
+                ip0 = vlib_buffer_get_current (p0);
+                mfei0 = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index0),
+                                               &ip0->src_address,
+                                               &ip0->dst_address);
+            }
+
+            vnet_buffer (p0)->ip.adj_index[VLIB_TX] = mfei0;
+        }
+
+        vlib_put_next_frame(vm, node,
+                            MFIB_FORWARD_LOOKUP_NEXT_RPF,
+                            n_left_to_next);
+    }
+
+    if (node->flags & VLIB_NODE_FLAG_TRACE)
+        mfib_forward_lookup_trace(vm, node, frame);
+
+    return frame->n_vectors;
+}
+
+static uword
+ip4_mfib_forward_lookup (vlib_main_t * vm,
+                         vlib_node_runtime_t * node,
+                         vlib_frame_t * frame)
+{
+    return (mfib_forward_lookup (vm, node, frame, 1));
+}
+
+VLIB_REGISTER_NODE (ip4_mfib_forward_lookup_node, static) = {
+    .function = ip4_mfib_forward_lookup,
+    .name = "ip4-mfib-forward-lookup",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mfib_forward_lookup_trace,
+
+    .n_next_nodes = MFIB_FORWARD_LOOKUP_N_NEXT,
+    .next_nodes = {
+        [MFIB_FORWARD_LOOKUP_NEXT_RPF] = "ip4-mfib-forward-rpf",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mfib_forward_lookup_node,
+                              ip4_mfib_forward_lookup)
+
+static uword
+ip6_mfib_forward_lookup (vlib_main_t * vm,
+                         vlib_node_runtime_t * node,
+                         vlib_frame_t * frame)
+{
+    return (mfib_forward_lookup (vm, node, frame, 0));
+}
+
+VLIB_REGISTER_NODE (ip6_mfib_forward_lookup_node, static) = {
+    .function = ip6_mfib_forward_lookup,
+    .name = "ip6-mfib-forward-lookup",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mfib_forward_lookup_trace,
+
+    .n_next_nodes = MFIB_FORWARD_LOOKUP_N_NEXT,
+    .next_nodes = {
+        [MFIB_FORWARD_LOOKUP_NEXT_RPF] = "ip6-mfib-forward-rpf",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mfib_forward_lookup_node,
+                              ip6_mfib_forward_lookup)
+
+
+typedef struct mfib_forward_rpf_trace_t_ {
+    u32 entry_index;
+    u32 sw_if_index;
+    mfib_itf_flags_t itf_flags;
+} mfib_forward_rpf_trace_t;
+
+typedef enum mfib_forward_rpf_next_t_ {
+    MFIB_FORWARD_RPF_NEXT_DROP,
+    MFIB_FORWARD_RPF_N_NEXT,
+} mfib_forward_rpf_next_t;
+
+static u8 *
+format_mfib_forward_rpf_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    mfib_forward_rpf_trace_t * t = va_arg (*args, mfib_forward_rpf_trace_t *);
+
+    s = format (s, "entry %d", t->entry_index);
+    s = format (s, " %d", t->sw_if_index);
+    s = format (s, " %U", format_mfib_itf_flags, t->itf_flags);
+
+    return s;
+}
+
+static int
+mfib_forward_connected_check (vlib_buffer_t * b0,
+                              u32 sw_if_index,
+                              int is_v4)
+{
+    /*
+     * Lookup the source of the IP packet in the
+     * FIB. return true if the entry is attached.
+     */
+    index_t lbi0;
+
+    if (is_v4)
+    {
+        load_balance_t *lb0;
+        ip4_header_t *ip0;
+
+        ip0 = vlib_buffer_get_current(b0);
+
+        lbi0 = ip4_fib_forwarding_lookup(
+                   ip4_fib_table_get_index_for_sw_if_index(
+                       sw_if_index),
+                   &ip0->src_address);
+        lb0 = load_balance_get(lbi0);
+
+        return (FIB_ENTRY_FLAG_ATTACHED &
+                lb0->lb_fib_entry_flags);
+    }
+    else
+    {
+        ASSERT(0);
+    }
+    return (0);
+}
+
+static void
+mfib_forward_itf_signal (vlib_main_t *vm,
+                         const mfib_entry_t *mfe,
+                         mfib_itf_t *mfi,
+                         vlib_buffer_t *b0)
+{
+    mfib_itf_flags_t old_flags;
+
+    old_flags = __sync_fetch_and_or(&mfi->mfi_flags,
+                                    MFIB_ITF_FLAG_SIGNAL_PRESENT);
+
+    if (!(old_flags & MFIB_ITF_FLAG_SIGNAL_PRESENT))
+    {
+        /*
+         * we were the lucky ones to set the signal present flag
+         */
+        if (!(old_flags & MFIB_ITF_FLAG_DONT_PRESERVE))
+        {
+            /*
+             * preserve a copy of the packet for the control
+             * plane to examine.
+             * Only allow one preserved packet at at time, since
+             * when the signal present flag is cleared so is the
+             * preserved packet.
+             */
+            mfib_signal_push(mfe, mfi, b0);
+        }
+        else
+        {
+            /*
+             *  The control plane just wants the signal, not the packet as well
+             */
+            mfib_signal_push(mfe, mfi, NULL);
+        }
+    }
+    /*
+     * else
+     *   there is already a signal present on this interface that the
+     *   control plane has not yet acknowledged
+     */
+}
+
+always_inline uword
+mfib_forward_rpf (vlib_main_t * vm,
+                  vlib_node_runtime_t * node,
+                  vlib_frame_t * frame,
+                  int is_v4)
+{
+    u32 n_left_from, n_left_to_next, * from, * to_next;
+    mfib_forward_rpf_next_t next;
+
+    from = vlib_frame_vector_args (frame);
+    n_left_from = frame->n_vectors;
+    next = MFIB_FORWARD_RPF_NEXT_DROP;
+
+    while (n_left_from > 0)
+    {
+        vlib_get_next_frame (vm, node, next,
+                             to_next, n_left_to_next);
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            fib_node_index_t mfei0;
+            const mfib_entry_t *mfe0;
+            mfib_itf_t *mfi0;
+            vlib_buffer_t * b0;
+            u32 pi0, next0;
+            mfib_itf_flags_t iflags0;
+            mfib_entry_flags_t eflags0;
+
+            pi0 = from[0];
+            to_next[0] = pi0;
+            from += 1;
+            to_next += 1;
+            n_left_to_next -= 1;
+            n_left_from -= 1;
+
+            b0 = vlib_get_buffer (vm, pi0);
+            mfei0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+            mfe0 = mfib_entry_get(mfei0);
+            mfi0 = mfib_entry_get_itf(mfe0,
+                                      vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+
+            /*
+             * throughout this function we are 'PREDICT' optimising
+             * for the case of throughput traffic that is not replicated
+             * to the host stack nor sets local flags
+             */
+            if (PREDICT_TRUE(NULL != mfi0))
+            {
+                iflags0 = mfi0->mfi_flags;
+            }
+            else
+            {
+                iflags0 = MFIB_ITF_FLAG_NONE;
+            }
+            eflags0 = mfe0->mfe_flags;
+
+            if (PREDICT_FALSE(eflags0 & MFIB_ENTRY_FLAG_CONNECTED))
+            {
+                /*
+                 * lookup the source in the unicast FIB - check it
+                 * matches a connected.
+                 */
+                if (mfib_forward_connected_check(
+                        b0,
+                        vnet_buffer(b0)->sw_if_index[VLIB_RX],
+                        is_v4))
+                {
+                    mfib_forward_itf_signal(vm, mfe0, mfi0, b0);
+                }
+            }
+            if (PREDICT_FALSE((eflags0 & MFIB_ENTRY_FLAG_SIGNAL) ^
+                              (iflags0 & MFIB_ITF_FLAG_NEGATE_SIGNAL)))
+            {
+                /*
+                 * Entry signal XOR interface negate-signal
+                 */
+                if (NULL != mfi0)
+                {
+                    mfib_forward_itf_signal(vm, mfe0, mfi0, b0);
+                }
+            }
+
+            if (PREDICT_TRUE((iflags0 & MFIB_ITF_FLAG_ACCEPT) ||
+                             (eflags0 & MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF)))
+            {
+                /*
+                 * This interface is accepting packets for the matching entry
+                 */
+                next0 = mfe0->mfe_rep.dpoi_next_node;
+
+                vnet_buffer(b0)->ip.adj_index[VLIB_TX] =
+                    mfe0->mfe_rep.dpoi_index;
+            }
+            else
+            {
+                next0 = MFIB_FORWARD_RPF_NEXT_DROP;
+            }
+
+            if (b0->flags & VLIB_BUFFER_IS_TRACED)
+            {
+                mfib_forward_rpf_trace_t *t0;
+
+                t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+                t0->entry_index = mfei0;
+                if (NULL == mfi0)
+                {
+                    t0->sw_if_index = ~0;
+                    t0->itf_flags = MFIB_ITF_FLAG_NONE;
+                }
+                else
+                {
+                    t0->sw_if_index = mfi0->mfi_sw_if_index;
+                    t0->itf_flags = mfi0->mfi_flags;
+                }
+            }
+            vlib_validate_buffer_enqueue_x1 (vm, node, next,
+                                             to_next, n_left_to_next,
+                                             pi0, next0);
+        }
+
+        vlib_put_next_frame(vm, node, next, n_left_to_next);
+    }
+
+    return frame->n_vectors;
+}
+
+static uword
+ip4_mfib_forward_rpf (vlib_main_t * vm,
+                      vlib_node_runtime_t * node,
+                      vlib_frame_t * frame)
+{
+    return (mfib_forward_rpf(vm, node, frame, 1));
+}
+
+
+VLIB_REGISTER_NODE (ip4_mfib_forward_rpf_node, static) = {
+    .function = ip4_mfib_forward_rpf,
+    .name = "ip4-mfib-forward-rpf",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mfib_forward_rpf_trace,
+
+    .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT,
+    .next_nodes = {
+        [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mfib_forward_rpf_node,
+                              ip4_mfib_forward_rpf)
+
+static uword
+ip6_mfib_forward_rpf (vlib_main_t * vm,
+                      vlib_node_runtime_t * node,
+                      vlib_frame_t * frame)
+{
+    return (mfib_forward_rpf(vm, node, frame, 1));
+}
+
+
+VLIB_REGISTER_NODE (ip6_mfib_forward_rpf_node, static) = {
+    .function = ip6_mfib_forward_rpf,
+    .name = "ip6-mfib-forward-rpf",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mfib_forward_rpf_trace,
+
+    .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT,
+    .next_nodes = {
+        [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mfib_forward_rpf_node,
+                              ip6_mfib_forward_rpf)
+
diff --git a/src/vnet/mfib/mfib_itf.c b/src/vnet/mfib/mfib_itf.c
new file mode 100644
index 00000000..b9fa1ec6
--- /dev/null
+++ b/src/vnet/mfib/mfib_itf.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vnet/mfib/mfib_itf.h>
+#include <vnet/mfib/mfib_signal.h>
+
+mfib_itf_t *mfib_itf_pool;
+
+index_t
+mfib_itf_create (u32 sw_if_index,
+                 mfib_itf_flags_t mfi_flags)
+{
+    mfib_itf_t *mfib_itf;
+
+    pool_get_aligned(mfib_itf_pool, mfib_itf,
+                     CLIB_CACHE_LINE_BYTES);
+
+    mfib_itf->mfi_sw_if_index = sw_if_index;
+    mfib_itf->mfi_flags = mfi_flags;
+    mfib_itf->mfi_si = INDEX_INVALID;
+
+    return (mfib_itf - mfib_itf_pool);
+}
+
+void
+mfib_itf_delete (mfib_itf_t *mfi)
+{
+    mfib_signal_remove_itf(mfi);
+    pool_put(mfib_itf_pool, mfi);
+}
+
+u8 *
+format_mfib_itf (u8 * s, va_list * args)
+{
+    mfib_itf_t *mfib_itf;
+    vnet_main_t *vnm;
+    index_t mfi;
+
+    mfi = va_arg (*args, index_t);
+
+    vnm = vnet_get_main();
+    mfib_itf = mfib_itf_get(mfi);
+
+    if (~0 != mfib_itf->mfi_sw_if_index)
+    {
+        return (format(s, " %U: %U",
+                       format_vnet_sw_interface_name,
+                       vnm,
+                       vnet_get_sw_interface(vnm,
+                                             mfib_itf->mfi_sw_if_index),
+                       format_mfib_itf_flags, mfib_itf->mfi_flags));
+    }
+    else
+    {
+        return (format(s, " local: %U",
+                       format_mfib_itf_flags, mfib_itf->mfi_flags));
+    }
+    return (s);
+}
+
+static clib_error_t *
+show_mfib_itf_command (vlib_main_t * vm,
+                        unformat_input_t * input,
+                        vlib_cli_command_t * cmd)
+{
+    index_t mfii;
+
+    if (unformat (input, "%d", &mfii))
+    {
+        /*
+         * show one in detail
+         */
+        if (!pool_is_free_index(mfib_itf_pool, mfii))
+        {
+            vlib_cli_output (vm, "%d@%U",
+                             mfii,
+                             format_mfib_itf, mfii);
+        }
+        else
+        {
+            vlib_cli_output (vm, "itf %d invalid", mfii);
+        }
+    }
+    else
+    {
+        /*
+         * show all
+         */
+        vlib_cli_output (vm, "mFIB interfaces::");
+        pool_foreach_index(mfii, mfib_itf_pool,
+        ({
+            vlib_cli_output (vm, "%d@%U",
+                             mfii,
+                             format_mfib_itf, mfii);
+        }));
+    }
+
+    return (NULL);
+}
+
+VLIB_CLI_COMMAND (show_mfib_itf, static) = {
+  .path = "show mfib interface",
+  .function = show_mfib_itf_command,
+  .short_help = "show mfib interface",
+};
diff --git a/src/vnet/mfib/mfib_itf.h b/src/vnet/mfib/mfib_itf.h
new file mode 100644
index 00000000..5f26a476
--- /dev/null
+++ b/src/vnet/mfib/mfib_itf.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_ITF_H__
+#define __MFIB_ITF_H__
+
+#include <vlib/vlib.h>
+#include <vnet/mfib/mfib_types.h>
+
+/**
+ * @brief An interface associated with a particular MFIB entry
+ */
+typedef struct mfib_itf_t_
+{
+    /**
+     * @brief Falags on the entry
+     */
+    mfib_itf_flags_t mfi_flags;
+
+    /**
+     * The SW IF index that this MFIB interface represents
+     */
+    u32 mfi_sw_if_index;
+
+    /**
+     * The index of the signal in the pending list
+     */
+    u32 mfi_si;
+} mfib_itf_t;
+
+
+extern index_t mfib_itf_create(u32 sw_if_index,
+                               mfib_itf_flags_t mfi_flags);
+extern void mfib_itf_delete(mfib_itf_t *mfi);
+
+extern u8 *format_mfib_itf(u8 * s, va_list * args);
+
+extern mfib_itf_t *mfib_itf_pool;
+
+static inline mfib_itf_t *
+mfib_itf_get (index_t mi)
+{
+    return (pool_elt_at_index(mfib_itf_pool, mi));
+}
+static inline index_t
+mfib_itf_get_index (const mfib_itf_t *mfi)
+{
+    return (mfi - mfib_itf_pool);
+}
+
+#endif
diff --git a/src/vnet/mfib/mfib_signal.c b/src/vnet/mfib/mfib_signal.c
new file mode 100644
index 00000000..9f6205de
--- /dev/null
+++ b/src/vnet/mfib/mfib_signal.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <vnet/vnet.h>
+#include <vnet/mfib/mfib_signal.h>
+#include <vppinfra/dlist.h>
+
+/**
+ * @brief Pool of signals
+ */
+static mfib_signal_t *mfib_signal_pool;
+
+/**
+ * @brief pool of dlist elements
+ */
+static dlist_elt_t *mfib_signal_dlist_pool;
+
+/**
+ * the list/set of interfaces with signals pending
+ */
+typedef struct mfib_signal_q_t_
+{
+    /**
+     * the dlist indext that is the head of the list
+     */
+    u32 mip_head;
+
+    /**
+     * Spin lock to protect the list
+     */
+    int mip_lock;
+} mfib_signal_q_t;
+
+/**
+ * @brief The pending queue of signals to deliver to the control plane
+ */
+static mfib_signal_q_t mfib_signal_pending ;
+
+static void
+mfib_signal_list_init (void)
+{
+    dlist_elt_t *head;
+    u32 hi;
+
+    pool_get(mfib_signal_dlist_pool, head);
+    hi = head - mfib_signal_dlist_pool;
+
+    mfib_signal_pending.mip_head = hi;
+    clib_dlist_init(mfib_signal_dlist_pool, hi);
+}
+
+void
+mfib_signal_module_init (void)
+{
+    mfib_signal_list_init();
+}
+
+int
+mfib_signal_send_one (struct _unix_shared_memory_queue *q,
+                      u32 context)
+{
+    u32 li, si;
+
+    /*
+     * with the lock held, pop a signal from the q.
+     */
+    while (__sync_lock_test_and_set (&mfib_signal_pending.mip_lock, 1))
+        ;
+    {
+        li = clib_dlist_remove_head(mfib_signal_dlist_pool,
+                                    mfib_signal_pending.mip_head);
+    }
+    mfib_signal_pending.mip_lock = 0;
+
+    if (~0 != li)
+    {
+        mfib_signal_t *mfs;
+        mfib_itf_t *mfi;
+        dlist_elt_t *elt;
+
+        elt = pool_elt_at_index(mfib_signal_dlist_pool, li);
+        si = elt->value;
+
+        mfs = pool_elt_at_index(mfib_signal_pool, si);
+        mfi = mfib_itf_get(mfs->mfs_itf);
+        mfi->mfi_si = INDEX_INVALID;
+        __sync_fetch_and_and(&mfi->mfi_flags,
+                             ~MFIB_ITF_FLAG_SIGNAL_PRESENT);
+
+
+        vl_mfib_signal_send_one(q, context, mfs);
+
+        /*
+         * with the lock held, return the resoruces of the signals posted
+         */
+        while (__sync_lock_test_and_set(&mfib_signal_pending.mip_lock, 1))
+            ;
+        {
+            pool_put_index(mfib_signal_pool, si);
+            pool_put_index(mfib_signal_dlist_pool, li);
+        }
+        mfib_signal_pending.mip_lock = 0;
+
+        return (1);
+    }
+    return (0);
+}
+
+void
+mfib_signal_push (const mfib_entry_t *mfe,
+                  mfib_itf_t *mfi,
+                  vlib_buffer_t *b0)
+{
+    mfib_signal_t *mfs;
+    dlist_elt_t *elt;
+    u32 si, li;
+
+    while (__sync_lock_test_and_set (&mfib_signal_pending.mip_lock, 1))
+        ;
+    {
+        pool_get(mfib_signal_pool, mfs);
+        pool_get(mfib_signal_dlist_pool, elt);
+
+        si = mfs - mfib_signal_pool;
+        li = elt - mfib_signal_dlist_pool;
+
+        elt->value = si;
+        mfi->mfi_si = li;
+
+        clib_dlist_addhead(mfib_signal_dlist_pool,
+                           mfib_signal_pending.mip_head,
+                           li);
+    }
+    mfib_signal_pending.mip_lock = 0;
+
+    mfs->mfs_entry = mfib_entry_get_index(mfe);
+    mfs->mfs_itf = mfib_itf_get_index(mfi);
+
+    if (NULL != b0)
+    {
+        mfs->mfs_buffer_len = b0->current_length;
+        memcpy(mfs->mfs_buffer,
+               vlib_buffer_get_current(b0),
+               (mfs->mfs_buffer_len > MFIB_SIGNAL_BUFFER_SIZE ?
+                MFIB_SIGNAL_BUFFER_SIZE :
+                mfs->mfs_buffer_len));
+    }
+    else
+    {
+        mfs->mfs_buffer_len = 0;
+    }
+}
+
+void
+mfib_signal_remove_itf (const mfib_itf_t *mfi)
+{
+    u32 li;
+
+    /*
+     * lock the queue to prevent further additions while we fiddle.
+     */
+    li = mfi->mfi_si;
+
+    if (INDEX_INVALID != li)
+    {
+        /*
+         * it's in the pending q
+         */
+        while (__sync_lock_test_and_set (&mfib_signal_pending.mip_lock, 1))
+            ;
+        {
+            dlist_elt_t *elt;
+
+            /*
+             * with the lock held;
+             *  - remove the signal from the pending list
+             *  - free up the signal and list entry obejcts
+             */
+            clib_dlist_remove(mfib_signal_dlist_pool, li);
+
+            elt = pool_elt_at_index(mfib_signal_dlist_pool, li);
+            pool_put_index(mfib_signal_pool, elt->value);
+            pool_put(mfib_signal_dlist_pool, elt);
+        }
+
+        mfib_signal_pending.mip_lock = 0;
+    }
+}
diff --git a/src/vnet/mfib/mfib_signal.h b/src/vnet/mfib/mfib_signal.h
new file mode 100644
index 00000000..732d8aff
--- /dev/null
+++ b/src/vnet/mfib/mfib_signal.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_SIGNAL_H__
+#define __MFIB_SIGNAL_H__
+
+#include <vlib/vlib.h>
+#include <vnet/mfib/mfib_types.h>
+#include <vnet/mfib/mfib_itf.h>
+#include <vnet/mfib/mfib_entry.h>
+
+#define MFIB_SIGNAL_BUFFER_SIZE 255
+
+/**
+ * A pair of indicies, for the entry and interface resp.
+ */
+typedef struct mfib_signal_t_
+{
+    fib_node_index_t mfs_entry;
+    index_t mfs_itf;
+
+    /**
+     * @brief A buffer copied from the DP plane that triggered the signal
+     */
+    u8 mfs_buffer[MFIB_SIGNAL_BUFFER_SIZE];
+
+    u8 mfs_buffer_len;
+} mfib_signal_t;
+
+
+extern void mfib_signal_push(const mfib_entry_t *mfe,
+                             mfib_itf_t *mfi,
+                             vlib_buffer_t *b0);
+extern void mfib_signal_remove_itf(const mfib_itf_t *mfi);
+
+extern void mfib_signal_module_init(void);
+
+struct _unix_shared_memory_queue;
+
+extern void vl_mfib_signal_send_one(struct _unix_shared_memory_queue *q,
+                                    u32 context,
+                                    const mfib_signal_t *mfs);
+extern int mfib_signal_send_one(struct _unix_shared_memory_queue *q,
+                                u32 context);
+
+#endif
+
diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c
new file mode 100644
index 00000000..e4c0936d
--- /dev/null
+++ b/src/vnet/mfib/mfib_table.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/ip4_mfib.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/mfib/mfib_signal.h>
+
+mfib_table_t *
+mfib_table_get (fib_node_index_t index,
+                fib_protocol_t proto)
+{
+    switch (proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        return (pool_elt_at_index(ip4_main.mfibs, index));
+    case FIB_PROTOCOL_IP6:
+        return (pool_elt_at_index(ip6_main.mfibs, index));
+    case FIB_PROTOCOL_MPLS:
+        break;
+    }
+    ASSERT(0);
+    return (NULL);
+}
+
+static inline fib_node_index_t
+mfib_table_lookup_i (const mfib_table_t *mfib_table,
+                     const mfib_prefix_t *prefix)
+{
+    switch (prefix->fp_proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        return (ip4_mfib_table_lookup(&mfib_table->v4,
+                                      &prefix->fp_src_addr.ip4,
+                                      &prefix->fp_grp_addr.ip4,
+                                      prefix->fp_len));
+    case FIB_PROTOCOL_IP6:
+        return (ip6_mfib_table_lookup(&mfib_table->v6,
+                                      &prefix->fp_src_addr.ip6,
+                                      &prefix->fp_grp_addr.ip6,
+                                      prefix->fp_len));
+    case FIB_PROTOCOL_MPLS:
+        break;
+    }
+    return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+mfib_table_lookup (u32 fib_index,
+                   const mfib_prefix_t *prefix)
+{
+    return (mfib_table_lookup_i(mfib_table_get(fib_index, prefix->fp_proto), prefix));
+}
+
+static inline fib_node_index_t
+mfib_table_lookup_exact_match_i (const mfib_table_t *mfib_table,
+                                 const mfib_prefix_t *prefix)
+{
+    switch (prefix->fp_proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        return (ip4_mfib_table_lookup_exact_match(&mfib_table->v4,
+                                                  &prefix->fp_grp_addr.ip4,
+                                                  &prefix->fp_src_addr.ip4,
+                                                  prefix->fp_len));
+    case FIB_PROTOCOL_IP6:
+        return (ip6_mfib_table_lookup_exact_match(&mfib_table->v6,
+                                                  &prefix->fp_grp_addr.ip6,
+                                                  &prefix->fp_src_addr.ip6,
+                                                  prefix->fp_len));
+    case FIB_PROTOCOL_MPLS:
+        break;
+    }
+    return (FIB_NODE_INDEX_INVALID);
+}
+
+fib_node_index_t
+mfib_table_lookup_exact_match (u32 fib_index,
+                              const mfib_prefix_t *prefix)
+{
+    return (mfib_table_lookup_exact_match_i(mfib_table_get(fib_index,
+                                                          prefix->fp_proto),
+                                            prefix));
+}
+
+static void
+mfib_table_entry_remove (mfib_table_t *mfib_table,
+                         const mfib_prefix_t *prefix,
+                         fib_node_index_t fib_entry_index)
+{
+    vlib_smp_unsafe_warning();
+
+    mfib_table->mft_total_route_counts--;
+
+    switch (prefix->fp_proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        ip4_mfib_table_entry_remove(&mfib_table->v4,
+                                    &prefix->fp_grp_addr.ip4,
+                                    &prefix->fp_src_addr.ip4,
+                                    prefix->fp_len);
+        break;
+    case FIB_PROTOCOL_IP6:
+        ip6_mfib_table_entry_remove(&mfib_table->v6,
+                                    &prefix->fp_grp_addr.ip6,
+                                    &prefix->fp_src_addr.ip6,
+                                    prefix->fp_len);
+        break;
+    case FIB_PROTOCOL_MPLS:
+        ASSERT(0);
+        break;
+    }
+
+    mfib_entry_unlock(fib_entry_index);
+}
+
+static void
+mfib_table_entry_insert (mfib_table_t *mfib_table,
+                         const mfib_prefix_t *prefix,
+                         fib_node_index_t mfib_entry_index)
+{
+    vlib_smp_unsafe_warning();
+
+    mfib_entry_lock(mfib_entry_index);
+    mfib_table->mft_total_route_counts++;
+
+    switch (prefix->fp_proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        ip4_mfib_table_entry_insert(&mfib_table->v4,
+                                    &prefix->fp_grp_addr.ip4,
+                                    &prefix->fp_src_addr.ip4,
+                                    prefix->fp_len,
+                                    mfib_entry_index);
+        break;
+    case FIB_PROTOCOL_IP6:
+        ip6_mfib_table_entry_insert(&mfib_table->v6,
+                                    &prefix->fp_grp_addr.ip6,
+                                    &prefix->fp_src_addr.ip6,
+                                    prefix->fp_len,
+                                    mfib_entry_index);
+        break;
+    case FIB_PROTOCOL_MPLS:
+        break;
+    }
+}
+
+fib_node_index_t
+mfib_table_entry_update (u32 fib_index,
+                         const mfib_prefix_t *prefix,
+                         mfib_source_t source,
+                         mfib_entry_flags_t entry_flags)
+{
+    fib_node_index_t mfib_entry_index;
+    mfib_table_t *mfib_table;
+
+    mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+    mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix);
+
+    if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+    {
+        if (MFIB_ENTRY_FLAG_NONE != entry_flags)
+        {
+            /*
+             * update to a non-existing entry with non-zero flags
+             */
+            mfib_entry_index = mfib_entry_create(fib_index, source,
+                                                 prefix, entry_flags);
+
+            mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index);
+        }
+        /*
+         * else
+         *   the entry doesn't exist and the request is to set no flags
+         *   the result would be an entry that doesn't exist - so do nothing
+         */
+    }
+    else
+    {
+        mfib_entry_lock(mfib_entry_index);
+
+        if (mfib_entry_update(mfib_entry_index, source, entry_flags))
+        {
+            /*
+             * this update means we can now remove the entry.
+             */
+            mfib_table_entry_remove(mfib_table, prefix, mfib_entry_index);
+        }
+
+        mfib_entry_unlock(mfib_entry_index);
+    }
+
+    return (mfib_entry_index);
+}
+
+fib_node_index_t
+mfib_table_entry_path_update (u32 fib_index,
+                              const mfib_prefix_t *prefix,
+                              mfib_source_t source,
+                              const fib_route_path_t *rpath,
+                              mfib_itf_flags_t itf_flags)
+{
+    fib_node_index_t mfib_entry_index;
+    mfib_table_t *mfib_table;
+
+    mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+    mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix);
+
+    if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+    {
+        mfib_entry_index = mfib_entry_create(fib_index,
+                                             source,
+                                             prefix,
+                                             MFIB_ENTRY_FLAG_NONE);
+
+        mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index);
+    }
+
+    mfib_entry_path_update(mfib_entry_index,
+                           source,
+                           rpath,
+                           itf_flags);
+
+    return (mfib_entry_index);
+}
+
+void
+mfib_table_entry_path_remove (u32 fib_index,
+                              const mfib_prefix_t *prefix,
+                              mfib_source_t source,
+                              const fib_route_path_t *rpath)
+{
+    fib_node_index_t mfib_entry_index;
+    mfib_table_t *mfib_table;
+
+    mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+    mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix);
+
+    if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+    {
+        /*
+         * removing an etry that does not exist. i'll allow it.
+         */
+    }
+    else
+    {
+        int no_more_sources;
+
+        /*
+         * don't nobody go nowhere
+         */
+        mfib_entry_lock(mfib_entry_index);
+
+        no_more_sources = mfib_entry_path_remove(mfib_entry_index,
+                                                 source,
+                                                 rpath);
+
+        if (no_more_sources)
+        {
+            /*
+             * last source gone. remove from the table
+             */
+            mfib_table_entry_remove(mfib_table, prefix, mfib_entry_index);
+        }
+
+        mfib_entry_unlock(mfib_entry_index);
+    }
+}
+
+static void
+mfib_table_entry_delete_i (u32 fib_index,
+                           fib_node_index_t mfib_entry_index,
+                           const mfib_prefix_t *prefix,
+                           mfib_source_t source)
+{
+    mfib_table_t *mfib_table;
+
+    mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+
+    /*
+     * don't nobody go nowhere
+     */
+    mfib_entry_lock(mfib_entry_index);
+
+    if (mfib_entry_delete(mfib_entry_index, source))
+    {
+        /*
+         * last source gone. remove from the table
+         */
+        mfib_table_entry_remove(mfib_table, prefix, mfib_entry_index);
+    }
+    /*
+     * else
+     *   still has sources, leave it be.
+     */
+
+    mfib_entry_unlock(mfib_entry_index);
+}
+
+void
+mfib_table_entry_delete (u32 fib_index,
+                         const mfib_prefix_t *prefix,
+                         mfib_source_t source)
+{
+    fib_node_index_t mfib_entry_index;
+
+    mfib_entry_index = mfib_table_lookup_exact_match(fib_index, prefix);
+
+    if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+    {
+        /*
+         * removing an etry that does not exist.
+         * i'll allow it, but i won't like it.
+         */
+        clib_warning("%U not in FIB", format_mfib_prefix, prefix);
+    }
+    else
+    {
+        mfib_table_entry_delete_i(fib_index, mfib_entry_index,
+                                  prefix, source);
+    }
+}
+
+void
+mfib_table_entry_delete_index (fib_node_index_t mfib_entry_index,
+                               mfib_source_t source)
+{
+    mfib_prefix_t prefix;
+
+    mfib_entry_get_prefix(mfib_entry_index, &prefix);
+
+    mfib_table_entry_delete_i(mfib_entry_get_fib_index(mfib_entry_index),
+                              mfib_entry_index, &prefix, source);
+}
+
+u32
+mfib_table_get_index_for_sw_if_index (fib_protocol_t proto,
+                                      u32 sw_if_index)
+{
+    switch (proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        return (ip4_mfib_table_get_index_for_sw_if_index(sw_if_index));
+    case FIB_PROTOCOL_IP6:
+        return (ip6_mfib_table_get_index_for_sw_if_index(sw_if_index));
+    case FIB_PROTOCOL_MPLS:
+        ASSERT(0);
+        break;
+    }
+    return (~0);
+}
+
+u32
+mfib_table_find (fib_protocol_t proto,
+                 u32 table_id)
+{
+    switch (proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        return (ip4_mfib_index_from_table_id(table_id));
+    case FIB_PROTOCOL_IP6:
+        return (ip6_mfib_index_from_table_id(table_id));
+    case FIB_PROTOCOL_MPLS:
+        ASSERT(0);
+        break;
+    }
+    return (~0);
+}
+
+u32
+mfib_table_find_or_create_and_lock (fib_protocol_t proto,
+                                    u32 table_id)
+{
+    mfib_table_t *mfib_table;
+    fib_node_index_t fi;
+
+    switch (proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        fi = ip4_mfib_table_find_or_create_and_lock(table_id);
+        break;
+    case FIB_PROTOCOL_IP6:
+        fi = ip6_mfib_table_find_or_create_and_lock(table_id);
+        break;
+    case FIB_PROTOCOL_MPLS:
+    default:
+        return (~0);
+    }
+
+    mfib_table = mfib_table_get(fi, proto);
+
+    mfib_table->mft_desc = format(NULL, "%U-VRF:%d",
+                                  format_fib_protocol, proto,
+                                  table_id);
+
+    return (fi);
+}
+
+static void
+mfib_table_destroy (mfib_table_t *mfib_table)
+{
+    vec_free(mfib_table->mft_desc);
+
+    switch (mfib_table->mft_proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        ip4_mfib_table_destroy(&mfib_table->v4);
+        break;
+    case FIB_PROTOCOL_IP6:
+        ip6_mfib_table_destroy(&mfib_table->v6);
+        break;
+    case FIB_PROTOCOL_MPLS:
+        ASSERT(0);
+        break;
+    }
+}
+
+void
+mfib_table_unlock (u32 fib_index,
+                   fib_protocol_t proto)
+{
+    mfib_table_t *mfib_table;
+
+    mfib_table = mfib_table_get(fib_index, proto);
+    mfib_table->mft_locks--;
+
+    if (0 == mfib_table->mft_locks)
+    {
+        mfib_table_destroy(mfib_table);
+    }
+}
+
+void
+mfib_table_lock (u32 fib_index,
+                 fib_protocol_t proto)
+{
+    mfib_table_t *mfib_table;
+
+    mfib_table = mfib_table_get(fib_index, proto);
+    mfib_table->mft_locks++;
+}
+
+u8*
+format_mfib_table_name (u8* s, va_list ap)
+{
+    fib_node_index_t fib_index = va_arg(ap, fib_node_index_t);
+    fib_protocol_t proto = va_arg(ap, int); // int promotion
+    mfib_table_t *mfib_table;
+
+    mfib_table = mfib_table_get(fib_index, proto);
+
+    s = format(s, "%v", mfib_table->mft_desc);
+
+    return (s);
+}
+
+static clib_error_t *
+mfib_module_init (vlib_main_t * vm)
+{
+    clib_error_t * error;
+
+    if ((error = vlib_call_init_function (vm, fib_module_init)))
+        return (error);
+    if ((error = vlib_call_init_function (vm, rn_module_init)))
+        return (error);
+
+    mfib_entry_module_init();
+    mfib_signal_module_init();
+
+    return (error);
+}
+
+VLIB_INIT_FUNCTION(mfib_module_init);
diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h
new file mode 100644
index 00000000..4faa69ee
--- /dev/null
+++ b/src/vnet/mfib/mfib_table.h
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_TABLE_H__
+#define __MFIB_TABLE_H__
+
+#include <vnet/ip/ip.h>
+#include <vnet/adj/adj.h>
+
+#include <vnet/mfib/mfib_types.h>
+
+/**
+ * @brief
+ *   A protocol Independent IP multicast FIB table
+ */
+typedef struct mfib_table_t_
+{
+    /**
+     * A union of the protocol specific FIBs that provide the
+     * underlying LPM mechanism.
+     * This element is first in the struct so that it is in the
+     * first cache line.
+     */
+    union {
+        ip4_mfib_t v4;
+        ip6_mfib_t v6;
+    };
+
+    /**
+     * Which protocol this table serves. Used to switch on the union above.
+     */
+    fib_protocol_t mft_proto;
+
+    /**
+     * number of locks on the table
+     */
+    u16 mft_locks;
+
+    /**
+     * Table ID (hash key) for this FIB.
+     */
+    u32 mft_table_id;
+
+    /**
+     * Index into FIB vector.
+     */
+    fib_node_index_t mft_index;
+
+    /**
+     * Total route counters
+     */
+    u32 mft_total_route_counts;
+
+    /**
+     * Table description
+     */
+    u8* mft_desc;
+} mfib_table_t;
+
+/**
+ * @brief
+ *  Format the description/name of the table
+ */
+extern u8* format_mfib_table_name(u8* s, va_list ap);
+
+/**
+ * @brief
+ *  Perfom a longest prefix match in the non-forwarding table
+ *
+ * @param fib_index
+ *  The index of the FIB
+ *
+ * @param prefix
+ *  The prefix to lookup
+ *
+ * @return
+ *  The index of the fib_entry_t for the best match, which may be the default route
+ */
+extern fib_node_index_t mfib_table_lookup(u32 fib_index,
+                                         const mfib_prefix_t *prefix);
+
+/**
+ * @brief
+ *  Perfom an exact match in the non-forwarding table
+ *
+ * @param fib_index
+ *  The index of the FIB
+ *
+ * @param prefix
+ *  The prefix to lookup
+ *
+ * @return
+ *  The index of the fib_entry_t for the exact match, or INVALID
+ *  is there is no match.
+ */
+extern fib_node_index_t mfib_table_lookup_exact_match(u32 fib_index,
+                                                      const mfib_prefix_t *prefix);
+
+/**
+ * @brief
+ * Add a new (with no replication) or lock an existing entry
+ *
+ * @param prefix
+ *  The prefix for the entry to add
+ *
+ * @return
+ *  the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t mfib_table_entry_update(u32 fib_index,
+                                                const mfib_prefix_t *prefix,
+                                                mfib_source_t source,
+                                                mfib_entry_flags_t flags);
+
+/**
+ * @brief
+ *  Add n paths to an entry (aka route) in the FIB. If the entry does not
+ *  exist, it will be created.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ *  The index of the FIB
+ *
+ * @param prefix
+ *  The prefix for the entry to add
+ *
+ * @param source
+ *  The ID of the client/source adding the entry.
+ *
+ * @param flags
+ *  Flags for the entry.
+ *
+ * @param rpaths
+ *  A vector of paths.
+ *
+ * @return
+ *  the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t mfib_table_entry_path_update(u32 fib_index,
+                                                     const mfib_prefix_t *prefix,
+                                                     mfib_source_t source,
+                                                     const fib_route_path_t *rpath,
+                                                     mfib_itf_flags_t flags);
+
+/**
+ * @brief
+ * Remove n paths to an entry (aka route) in the FIB. If this is the entry's
+ * last path, then the entry will be removed, unless it has other sources.
+ * See the documentation for fib_route_path_t for more descirptions of
+ * the path parameters.
+ *
+ * @param fib_index
+ *  The index of the FIB
+ *
+ * @param prefix
+ *  The prefix for the entry to add
+ *
+ * @param source
+ *  The ID of the client/source adding the entry.
+ *
+ * @param rpaths
+ *  A vector of paths.
+ */
+extern void mfib_table_entry_path_remove(u32 fib_index,
+                                         const mfib_prefix_t *prefix,
+                                         mfib_source_t source,
+                                         const fib_route_path_t *paths);
+
+
+
+/**
+ * @brief
+ *  Delete a FIB entry. If the entry has no more sources, then it is
+ * removed from the table.
+ *
+ * @param fib_index
+ *  The index of the FIB
+ *
+ * @param prefix
+ *  The prefix for the entry to remove
+ *
+ * @param source
+ *  The ID of the client/source adding the entry.
+ */
+extern void mfib_table_entry_delete(u32 fib_index,
+                                    const mfib_prefix_t *prefix,
+                                    mfib_source_t source);
+
+/**
+ * @brief
+ *  Delete a FIB entry. If the entry has no more sources, then it is
+ * removed from the table.
+ *
+ * @param entry_index
+ *  The index of the FIB entry
+ *
+ * @param source
+ *  The ID of the client/source adding the entry.
+ */
+extern void mfib_table_entry_delete_index(fib_node_index_t entry_index,
+                                          mfib_source_t source);
+
+/**
+ * @brief
+ *  Flush all entries from a table for the source
+ *
+ * @param fib_index
+ *  The index of the FIB
+ *
+ * @paran proto
+ *  The protocol of the entries in the table
+ *
+ * @param source
+ *  the source to flush
+ */
+extern void mfib_table_flush(u32 fib_index,
+                             fib_protocol_t proto);
+
+/**
+ * @brief
+ *  Get the index of the FIB bound to the interface
+ *
+ * @paran proto
+ *  The protocol of the FIB (and thus the entries therein)
+ *
+ * @param sw_if_index
+ *  The interface index
+ *
+ * @return fib_index
+ *  The index of the FIB
+ */
+extern u32 mfib_table_get_index_for_sw_if_index(fib_protocol_t proto,
+                                                u32 sw_if_index);
+
+/**
+ * @brief
+ *  Get the index of the FIB for a Table-ID. This DOES NOT create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ *  The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ *  The Table-ID
+ *
+ * @return fib_index
+ *  The index of the FIB, which may be INVALID.
+ */
+extern u32 mfib_table_find(fib_protocol_t proto, u32 table_id);
+
+
+/**
+ * @brief
+ *  Get the index of the FIB for a Table-ID. This DOES create the
+ * FIB if it does not exist.
+ *
+ * @paran proto
+ *  The protocol of the FIB (and thus the entries therein)
+ *
+ * @param table-id
+ *  The Table-ID
+ *
+ * @return fib_index
+ *  The index of the FIB
+ */
+extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto,
+                                              u32 table_id);
+
+
+/**
+ * @brief
+ * Take a reference counting lock on the table
+ *
+ * @param fib_index
+ *  The index of the FIB
+ *
+ * @paran proto
+ *  The protocol of the FIB (and thus the entries therein)
+ */
+extern void mfib_table_unlock(u32 fib_index,
+                              fib_protocol_t proto);
+
+/**
+ * @brief
+ * Release a reference counting lock on the table. When the last lock
+ * has gone. the FIB is deleted.
+ *
+ * @param fib_index
+ *  The index of the FIB
+ *
+ * @paran proto
+ *  The protocol of the FIB (and thus the entries therein)
+ */
+extern void mfib_table_lock(u32 fib_index,
+                            fib_protocol_t proto);
+
+/**
+ * @brief
+ * Return the number of entries in the FIB added by a given source.
+ *
+ * @param fib_index
+ *  The index of the FIB
+ *
+ * @paran proto
+ *  The protocol of the FIB (and thus the entries therein)
+ *
+ * @return number of sourced entries.
+ */
+extern u32 mfib_table_get_num_entries(u32 fib_index,
+                                      fib_protocol_t proto);
+
+/**
+ * @brief
+ * Get a pointer to a FIB table
+ */
+extern mfib_table_t *mfib_table_get(fib_node_index_t index,
+                                    fib_protocol_t proto);
+
+#endif
diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c
new file mode 100644
index 00000000..8735bfa7
--- /dev/null
+++ b/src/vnet/mfib/mfib_test.c
@@ -0,0 +1,1225 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mpls/mpls_types.h>
+
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/mfib_entry.h>
+#include <vnet/mfib/mfib_signal.h>
+#include <vnet/mfib/ip6_mfib.h>
+
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/adj/adj_mcast.h>
+
+#define MFIB_TEST_I(_cond, _comment, _args...)			\
+({								\
+    int _evald = (_cond);					\
+    if (!(_evald)) {						\
+        fformat(stderr, "FAIL:%d: " _comment "\n",		\
+                __LINE__, ##_args);				\
+    } else {							\
+        fformat(stderr, "PASS:%d: " _comment "\n",		\
+                __LINE__, ##_args);				\
+    }								\
+    _evald;							\
+})
+#define MFIB_TEST(_cond, _comment, _args...)			\
+{								\
+    if (!MFIB_TEST_I(_cond, _comment, ##_args)) {		\
+        return 1;\
+        ASSERT(!("FAIL: " _comment));				\
+    }								\
+}
+#define MFIB_TEST_NS(_cond)                                     \
+{								\
+    if (!MFIB_TEST_I(_cond, "")) {                              \
+        return 1;\
+        ASSERT(!("FAIL: "));                                    \
+    }								\
+}
+
+/**
+ * A 'i'm not fussed is this is not efficient' store of test data
+ */
+typedef struct test_main_t_ {
+    /**
+     * HW if indicies
+     */
+    u32 hw_if_indicies[4];
+    /**
+     * HW interfaces
+     */
+    vnet_hw_interface_t * hw[4];
+
+} test_main_t;
+static test_main_t test_main;
+
+/* fake ethernet device class, distinct from "fake-ethX" */
+static u8 * format_test_interface_name (u8 * s, va_list * args)
+{
+  u32 dev_instance = va_arg (*args, u32);
+  return format (s, "test-eth%d", dev_instance);
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+                                 vlib_node_runtime_t * node,
+                                 vlib_frame_t * frame)
+{
+  clib_warning ("you shouldn't be here, leaking buffers...");
+  return frame->n_vectors;
+}
+
+static clib_error_t *
+test_interface_admin_up_down (vnet_main_t * vnm,
+                              u32 hw_if_index,
+                              u32 flags)
+{
+  u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+    VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+  vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+  return 0;
+}
+
+VNET_DEVICE_CLASS (test_interface_device_class,static) = {
+  .name = "Test interface",
+  .format_device_name = format_test_interface_name,
+  .tx_function = dummy_interface_tx,
+  .admin_up_down_function = test_interface_admin_up_down,
+};
+
+static u8 *hw_address;
+
+static int
+mfib_test_mk_intf (u32 ninterfaces)
+{
+    clib_error_t * error = NULL;
+    test_main_t *tm = &test_main;
+    u8 byte;
+    u32 i;
+
+    ASSERT(ninterfaces <= ARRAY_LEN(tm->hw_if_indicies));
+
+    for (i=0; i<6; i++)
+    {
+        byte = 0xd0+i;
+        vec_add1(hw_address, byte);
+    }
+
+    for (i = 0; i < ninterfaces; i++)
+    {
+        hw_address[5] = i;
+
+        error = ethernet_register_interface(vnet_get_main(),
+                                            test_interface_device_class.index,
+                                            i /* instance */,
+                                            hw_address,
+                                            &tm->hw_if_indicies[i],
+                                            /* flag change */ 0);
+
+        MFIB_TEST((NULL == error), "ADD interface %d", i);
+
+        error = vnet_hw_interface_set_flags(vnet_get_main(),
+                                            tm->hw_if_indicies[i],
+                                            VNET_HW_INTERFACE_FLAG_LINK_UP);
+        tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
+                                          tm->hw_if_indicies[i]);
+        vec_validate (ip4_main.fib_index_by_sw_if_index,
+                      tm->hw[i]->sw_if_index);
+        vec_validate (ip6_main.fib_index_by_sw_if_index,
+                      tm->hw[i]->sw_if_index);
+        ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+        ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+
+        vec_validate (ip4_main.mfib_index_by_sw_if_index,
+                      tm->hw[i]->sw_if_index);
+        vec_validate (ip6_main.mfib_index_by_sw_if_index,
+                      tm->hw[i]->sw_if_index);
+        ip4_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+        ip6_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+
+        error = vnet_sw_interface_set_flags(vnet_get_main(),
+                                            tm->hw[i]->sw_if_index,
+                                            VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+        MFIB_TEST((NULL == error), "UP interface %d", i);
+    }
+    /*
+     * re-eval after the inevitable realloc
+     */
+    for (i = 0; i < ninterfaces; i++)
+    {
+        tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
+                                          tm->hw_if_indicies[i]);
+    }
+
+    return (0);
+}
+
+#define MFIB_TEST_REP(_cond, _comment, _args...)		\
+{								\
+    if (!MFIB_TEST_I(_cond, _comment, ##_args)) {		\
+        return (0);						\
+    }								\
+}
+
+static int
+mfib_test_validate_rep_v (const replicate_t *rep,
+                          u16 n_buckets,
+                          va_list ap)
+{
+    const dpo_id_t *dpo;
+    adj_index_t ai;
+    dpo_type_t dt;
+    int bucket;
+
+    MFIB_TEST_REP((n_buckets == rep->rep_n_buckets),
+                  "n_buckets = %d", rep->rep_n_buckets);
+
+    for (bucket = 0; bucket < n_buckets; bucket++)
+    {
+        dt = va_arg(ap, int);  // type promotion
+        ai = va_arg(ap, adj_index_t);
+        dpo = replicate_get_bucket_i(rep, bucket);
+
+        MFIB_TEST_REP((dt == dpo->dpoi_type),
+                      "bucket %d stacks on %U",
+                      bucket,
+                      format_dpo_type, dpo->dpoi_type);
+
+        if (DPO_RECEIVE != dt)
+        {
+            MFIB_TEST_REP((ai == dpo->dpoi_index),
+                          "bucket %d stacks on %U",
+                          bucket,
+                          format_dpo_id, dpo, 0);
+        }
+    }
+    return (!0);
+}
+
+static fib_forward_chain_type_t
+fib_forw_chain_type_from_fib_proto (fib_protocol_t proto)
+{
+    switch (proto)
+    {
+    case FIB_PROTOCOL_IP4:
+        return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+    case FIB_PROTOCOL_IP6:
+        return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
+    default:
+        break;
+    }
+    ASSERT(0);
+    return (0);
+}
+
+
+static int
+mfib_test_entry (fib_node_index_t fei,
+                 mfib_entry_flags_t eflags,
+                 u16 n_buckets,
+                 ...)
+{
+    const mfib_entry_t *mfe;
+    const replicate_t *rep;
+    mfib_prefix_t pfx;
+    va_list ap;
+
+    va_start(ap, n_buckets);
+
+    mfe = mfib_entry_get(fei);
+    mfib_entry_get_prefix(fei, &pfx);
+
+    MFIB_TEST_REP((eflags == mfe->mfe_flags),
+                  "%U has %U expect %U",
+                  format_mfib_prefix, &pfx,
+                  format_mfib_entry_flags, mfe->mfe_flags,
+                  format_mfib_entry_flags, eflags);
+
+    if (0 == n_buckets)
+    {
+        MFIB_TEST_REP((DPO_DROP == mfe->mfe_rep.dpoi_type),
+                      "%U links to %U",
+                      format_mfib_prefix, &pfx,
+                      format_dpo_id, &mfe->mfe_rep, 0);
+        return (!0);
+    }
+    else
+    {
+        dpo_id_t tmp = DPO_INVALID;
+        int res;
+
+        mfib_entry_contribute_forwarding(
+            fei,
+            fib_forw_chain_type_from_fib_proto(pfx.fp_proto),
+            &tmp);
+        rep = replicate_get(tmp.dpoi_index);
+
+        MFIB_TEST_REP((DPO_REPLICATE == tmp.dpoi_type),
+                      "%U links to %U",
+                      format_mfib_prefix, &pfx,
+                      format_dpo_type, tmp.dpoi_type);
+
+        res = mfib_test_validate_rep_v(rep, n_buckets, ap);
+
+        dpo_reset(&tmp);
+
+        return (res);
+    }
+}
+
+static int
+mfib_test_entry_itf (fib_node_index_t fei,
+                     u32 sw_if_index,
+                     mfib_itf_flags_t flags)
+{
+    const mfib_entry_t *mfe;
+    const mfib_itf_t *mfi;
+    mfib_prefix_t pfx;
+
+    mfe = mfib_entry_get(fei);
+    mfi = mfib_entry_get_itf(mfe, sw_if_index);
+    mfib_entry_get_prefix(fei, &pfx);
+
+    MFIB_TEST_REP((NULL != mfi),
+                  "%U has interface %d",
+                  format_mfib_prefix, &pfx, sw_if_index);
+
+    MFIB_TEST_REP((flags == mfi->mfi_flags),
+                  "%U interface %d has flags %U expect %U",
+                  format_mfib_prefix, &pfx, sw_if_index,
+                  format_mfib_itf_flags, flags,
+                  format_mfib_itf_flags, mfi->mfi_flags);
+
+    return (!0);
+}
+
+static int
+mfib_test_entry_no_itf (fib_node_index_t fei,
+                        u32 sw_if_index)
+{
+    const mfib_entry_t *mfe;
+    const mfib_itf_t *mfi;
+    mfib_prefix_t pfx;
+
+    mfe = mfib_entry_get(fei);
+    mfi = mfib_entry_get_itf(mfe, sw_if_index);
+    mfib_entry_get_prefix(fei, &pfx);
+
+    MFIB_TEST_REP((NULL == mfi),
+                  "%U has no interface %d",
+                  format_mfib_prefix, &pfx, sw_if_index);
+
+    return (!0);
+}
+
+static int
+mfib_test_i (fib_protocol_t PROTO,
+             vnet_link_t LINKT,
+             const mfib_prefix_t *pfx_no_forward,
+             const mfib_prefix_t *pfx_s_g,
+             const mfib_prefix_t *pfx_star_g_1,
+             const mfib_prefix_t *pfx_star_g_2,
+             const mfib_prefix_t *pfx_star_g_3,
+             const mfib_prefix_t *pfx_star_g_slash_m)
+{
+    fib_node_index_t mfei, mfei_dflt, mfei_no_f, mfei_s_g, mfei_g_1, mfei_g_2, mfei_g_3, mfei_g_m;
+    u32 fib_index, n_entries, n_itfs, n_reps;
+    fib_node_index_t ai_1, ai_2, ai_3;
+    test_main_t *tm;
+
+    mfib_prefix_t all_1s;
+    memset(&all_1s, 0xfd, sizeof(all_1s));
+
+    n_entries = pool_elts(mfib_entry_pool);
+    n_itfs = pool_elts(mfib_itf_pool);
+    n_reps = pool_elts(replicate_pool);
+    tm = &test_main;
+
+    ai_1 = adj_mcast_add_or_lock(PROTO,
+                                 LINKT,
+                                 tm->hw[1]->sw_if_index);
+    ai_2 = adj_mcast_add_or_lock(PROTO,
+                                 LINKT,
+                                 tm->hw[2]->sw_if_index);
+    ai_3 = adj_mcast_add_or_lock(PROTO,
+                                 LINKT,
+                                 tm->hw[3]->sw_if_index);
+
+    MFIB_TEST(3 == adj_mcast_db_size(), "3 MCAST adjs");
+
+    /* Find or create FIB table 11 */
+    fib_index = mfib_table_find_or_create_and_lock(PROTO, 11);
+
+    mfib_prefix_t pfx_dft = {
+        .fp_len = 0,
+        .fp_proto = PROTO,
+    };
+    mfei_dflt = mfib_table_lookup_exact_match(fib_index, &pfx_dft);
+    MFIB_TEST(FIB_NODE_INDEX_INVALID != mfei_dflt, "(*,*) presnet");
+    MFIB_TEST(mfib_test_entry(mfei_dflt,
+                              MFIB_ENTRY_FLAG_DROP,
+                              0),
+              "(*,*) no replcaitions");
+
+    MFIB_TEST(FIB_NODE_INDEX_INVALID != mfei_dflt, "(*,*) presnet");
+    MFIB_TEST(mfib_test_entry(mfei_dflt,
+                              MFIB_ENTRY_FLAG_DROP,
+                              0),
+              "(*,*) no replcaitions");
+
+
+    fib_route_path_t path_via_if0 = {
+        .frp_proto = PROTO,
+        .frp_addr = zero_addr,
+        .frp_sw_if_index = tm->hw[0]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 0,
+        .frp_flags = 0,
+    };
+
+    mfib_table_entry_path_update(fib_index,
+                                 pfx_no_forward,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if0,
+                                 MFIB_ITF_FLAG_ACCEPT);
+
+    mfei_no_f = mfib_table_lookup_exact_match(fib_index, pfx_no_forward);
+    MFIB_TEST(mfib_test_entry(mfei_no_f,
+                              MFIB_ENTRY_FLAG_NONE,
+                              0),
+              "%U no replcaitions",
+              format_mfib_prefix, pfx_no_forward);
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei_no_f, tm->hw[0]->sw_if_index,
+                                     MFIB_ITF_FLAG_ACCEPT));
+
+    fib_route_path_t path_via_if1 = {
+        .frp_proto = PROTO,
+        .frp_addr = zero_addr,
+        .frp_sw_if_index = tm->hw[1]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 0,
+        .frp_flags = 0,
+    };
+    fib_route_path_t path_via_if2 = {
+        .frp_proto = PROTO,
+        .frp_addr = zero_addr,
+        .frp_sw_if_index = tm->hw[2]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 0,
+        .frp_flags = 0,
+    };
+    fib_route_path_t path_via_if3 = {
+        .frp_proto = PROTO,
+        .frp_addr = zero_addr,
+        .frp_sw_if_index = tm->hw[3]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 0,
+        .frp_flags = 0,
+    };
+    fib_route_path_t path_for_us = {
+        .frp_proto = PROTO,
+        .frp_addr = zero_addr,
+        .frp_sw_if_index = 0xffffffff,
+        .frp_fib_index = ~0,
+        .frp_weight = 0,
+        .frp_flags = FIB_ROUTE_PATH_LOCAL,
+    };
+
+    /*
+     * An (S,G) with 1 accepting and 3 forwarding paths
+     */
+    mfib_table_entry_path_update(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if0,
+                                 MFIB_ITF_FLAG_ACCEPT);
+    mfib_table_entry_path_update(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if1,
+                                 MFIB_ITF_FLAG_FORWARD);
+    mfib_table_entry_path_update(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if2,
+                                 MFIB_ITF_FLAG_FORWARD);
+    mfib_table_entry_path_update(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if3,
+                                 (MFIB_ITF_FLAG_FORWARD |
+                                  MFIB_ITF_FLAG_NEGATE_SIGNAL));
+
+    mfei_s_g = mfib_table_lookup_exact_match(fib_index, pfx_s_g);
+
+    MFIB_TEST(FIB_NODE_INDEX_INVALID != mfei_s_g,
+              "%U present",
+              format_mfib_prefix, pfx_s_g);
+    MFIB_TEST(mfib_test_entry(mfei_s_g,
+                              MFIB_ENTRY_FLAG_NONE,
+                              3,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2,
+                              DPO_ADJACENCY_MCAST, ai_3),
+              "%U replicate ok",
+              format_mfib_prefix, pfx_s_g);
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[0]->sw_if_index,
+                                     MFIB_ITF_FLAG_ACCEPT));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[1]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[2]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[3]->sw_if_index,
+                                     (MFIB_ITF_FLAG_FORWARD |
+                                      MFIB_ITF_FLAG_NEGATE_SIGNAL)));
+
+    /*
+     * A (*,G), which the same G as the (S,G).
+     * different paths. test our LPM.
+     */
+    mfei_g_1 = mfib_table_entry_path_update(fib_index,
+                                            pfx_star_g_1,
+                                            MFIB_SOURCE_API,
+                                            &path_via_if0,
+                                            MFIB_ITF_FLAG_ACCEPT);
+    mfib_table_entry_path_update(fib_index,
+                                 pfx_star_g_1,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if1,
+                                 MFIB_ITF_FLAG_FORWARD);
+
+    /*
+     * test we find the *,G and S,G via LPM and exact matches
+     */
+    mfei = mfib_table_lookup_exact_match(fib_index,
+                                         pfx_star_g_1);
+    MFIB_TEST(mfei == mfei_g_1,
+              "%U found via exact match",
+              format_mfib_prefix, pfx_star_g_1);
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              1,
+                              DPO_ADJACENCY_MCAST, ai_1),
+              "%U replicate ok",
+              format_mfib_prefix, pfx_star_g_1);
+
+    mfei = mfib_table_lookup(fib_index,
+                             pfx_star_g_1);
+    MFIB_TEST(mfei == mfei_g_1,
+              "%U found via LP match",
+              format_mfib_prefix, pfx_star_g_1);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              1,
+                              DPO_ADJACENCY_MCAST, ai_1),
+              "%U replicate ok",
+              format_mfib_prefix, pfx_star_g_1);
+
+    mfei = mfib_table_lookup_exact_match(fib_index, pfx_s_g);
+    MFIB_TEST(mfei == mfei_s_g,
+              "%U found via exact match",
+              format_mfib_prefix, pfx_s_g);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              3,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2,
+                              DPO_ADJACENCY_MCAST, ai_3),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+    mfei = mfib_table_lookup(fib_index, pfx_s_g);
+    MFIB_TEST(mfei == mfei_s_g,
+              "%U found via LP match",
+              format_mfib_prefix, pfx_s_g);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              3,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2,
+                              DPO_ADJACENCY_MCAST, ai_3),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+
+    /*
+     * A (*,G/m), which the same root G as the (*,G).
+     * different paths. test our LPM.
+     */
+    mfei_g_m = mfib_table_entry_path_update(fib_index,
+                                            pfx_star_g_slash_m,
+                                            MFIB_SOURCE_API,
+                                            &path_via_if2,
+                                            MFIB_ITF_FLAG_ACCEPT);
+    mfib_table_entry_path_update(fib_index,
+                                 pfx_star_g_slash_m,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if3,
+                                 MFIB_ITF_FLAG_FORWARD);
+
+    /*
+     * test we find the (*,G/m), (*,G) and (S,G) via LPM and exact matches
+     */
+    mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_1);
+    MFIB_TEST((mfei_g_1 == mfei),
+              "%U found via DP LPM: %d",
+              format_mfib_prefix, pfx_star_g_1, mfei);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              1,
+                              DPO_ADJACENCY_MCAST, ai_1),
+              "%U replicate ok",
+              format_mfib_prefix, pfx_star_g_1);
+
+    mfei = mfib_table_lookup(fib_index, pfx_star_g_1);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              1,
+                              DPO_ADJACENCY_MCAST, ai_1),
+              "%U replicate ok",
+              format_mfib_prefix, pfx_star_g_1);
+
+    mfei = mfib_table_lookup_exact_match(fib_index, pfx_s_g);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              3,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2,
+                              DPO_ADJACENCY_MCAST, ai_3),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+    mfei = mfib_table_lookup(fib_index, pfx_s_g);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              3,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2,
+                              DPO_ADJACENCY_MCAST, ai_3),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+
+    mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_slash_m);
+    MFIB_TEST(mfei = mfei_g_m,
+              "%U Found via exact match",
+              format_mfib_prefix, pfx_star_g_slash_m);
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              1,
+                              DPO_ADJACENCY_MCAST, ai_3),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_star_g_slash_m);
+    MFIB_TEST(mfei_g_m == mfib_table_lookup(fib_index, pfx_star_g_slash_m),
+              "%U found via LPM",
+              format_mfib_prefix, pfx_star_g_slash_m);
+
+    /*
+     * Add a for-us path
+     */
+    mfei = mfib_table_entry_path_update(fib_index,
+                                        pfx_s_g,
+                                        MFIB_SOURCE_API,
+                                        &path_for_us,
+                                        MFIB_ITF_FLAG_FORWARD);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              4,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2,
+                              DPO_ADJACENCY_MCAST, ai_3,
+                              DPO_RECEIVE, 0),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+
+    /*
+     * remove a for-us path
+     */
+    mfib_table_entry_path_remove(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_for_us);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              3,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2,
+                              DPO_ADJACENCY_MCAST, ai_3),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+
+    /*
+     * update an existing forwarding path to be only accepting
+     *   - expect it to be removed from the replication set.
+     */
+    mfib_table_entry_path_update(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if3,
+                                 MFIB_ITF_FLAG_ACCEPT);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              2,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index,
+                                     MFIB_ITF_FLAG_ACCEPT));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[1]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[3]->sw_if_index,
+                                     MFIB_ITF_FLAG_ACCEPT));
+    /*
+     * Make the path forwarding again
+     *  - expect it to be added back to the replication set
+     */
+    mfib_table_entry_path_update(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if3,
+                                 (MFIB_ITF_FLAG_FORWARD |
+                                  MFIB_ITF_FLAG_ACCEPT |
+                                  MFIB_ITF_FLAG_NEGATE_SIGNAL));
+
+    mfei = mfib_table_lookup_exact_match(fib_index,
+                                         pfx_s_g);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              3,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2,
+                              DPO_ADJACENCY_MCAST, ai_3),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index,
+                                     MFIB_ITF_FLAG_ACCEPT));
+     MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[1]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[3]->sw_if_index,
+                                     (MFIB_ITF_FLAG_FORWARD |
+                                      MFIB_ITF_FLAG_ACCEPT |
+                                      MFIB_ITF_FLAG_NEGATE_SIGNAL)));
+
+    /*
+     * update flags on the entry
+     */
+    mfib_table_entry_update(fib_index,
+                            pfx_s_g,
+                            MFIB_SOURCE_API,
+                            MFIB_ENTRY_FLAG_SIGNAL);
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_SIGNAL,
+                              3,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2,
+                              DPO_ADJACENCY_MCAST, ai_3),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+
+    /*
+     * remove paths
+     */
+    mfib_table_entry_path_remove(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if3);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_SIGNAL,
+                              2,
+                              DPO_ADJACENCY_MCAST, ai_1,
+                              DPO_ADJACENCY_MCAST, ai_2),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index,
+                                     MFIB_ITF_FLAG_ACCEPT));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[1]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index));
+
+    mfib_table_entry_path_remove(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if1);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_SIGNAL,
+                              1,
+                              DPO_ADJACENCY_MCAST, ai_2),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index,
+                                     MFIB_ITF_FLAG_ACCEPT));
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index));
+
+    /*
+     * remove the accpeting only interface
+     */
+    mfib_table_entry_path_remove(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if0);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_SIGNAL,
+                              1,
+                              DPO_ADJACENCY_MCAST, ai_2),
+              "%U replicate OK",
+              format_mfib_prefix, pfx_s_g);
+    MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index,
+                                     MFIB_ITF_FLAG_FORWARD));
+    MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[0]->sw_if_index));
+    MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[1]->sw_if_index));
+    MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index));
+
+    /*
+     * remove the last path, the entry still has flags so it remains
+     */
+    mfib_table_entry_path_remove(fib_index,
+                                 pfx_s_g,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if2);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_SIGNAL,
+                              0),
+              "%U no replications",
+              format_mfib_prefix, pfx_s_g);
+
+    /*
+     * update flags on the entry
+     */
+    mfib_table_entry_update(fib_index,
+                            pfx_s_g,
+                            MFIB_SOURCE_API,
+                            (MFIB_ENTRY_FLAG_SIGNAL |
+                             MFIB_ENTRY_FLAG_CONNECTED));
+    MFIB_TEST(mfib_test_entry(mfei,
+                              (MFIB_ENTRY_FLAG_SIGNAL |
+                               MFIB_ENTRY_FLAG_CONNECTED),
+                              0),
+              "%U no replications",
+              format_mfib_prefix, pfx_s_g);
+
+    /*
+     * An entry with a NS interface
+     */
+    mfei_g_2 = mfib_table_entry_path_update(fib_index,
+                                            pfx_star_g_2,
+                                            MFIB_SOURCE_API,
+                                            &path_via_if0,
+                                            (MFIB_ITF_FLAG_ACCEPT |
+                                             MFIB_ITF_FLAG_NEGATE_SIGNAL));
+    MFIB_TEST(mfib_test_entry(mfei_g_2,
+                              MFIB_ENTRY_FLAG_NONE,
+                              0),
+              "%U No replications",
+              format_mfib_prefix, pfx_star_g_2);
+
+    /*
+     * Simulate a signal from the data-plane
+     */
+    {
+        mfib_entry_t *mfe;
+        mfib_itf_t *mfi;
+
+        mfe = mfib_entry_get(mfei_g_2);
+        mfi = mfib_entry_get_itf(mfe, path_via_if0.frp_sw_if_index);
+
+        mfib_signal_push(mfe, mfi, NULL);
+    }
+
+    /*
+     * An entry with a NS interface
+     */
+    mfei_g_3 = mfib_table_entry_path_update(fib_index,
+                                            pfx_star_g_3,
+                                            MFIB_SOURCE_API,
+                                            &path_via_if0,
+                                            (MFIB_ITF_FLAG_ACCEPT |
+                                             MFIB_ITF_NEGATE_SIGNAL));
+    MFIB_TEST(mfib_test_entry(mfei_g_3,
+                              MFIB_ENTRY_FLAG_NONE,
+                              0),
+              "%U No replications",
+              format_mfib_prefix, pfx_star_g_3);
+
+    /*
+     * Simulate a signal from the data-plane
+     */
+    {
+        mfib_entry_t *mfe;
+        mfib_itf_t *mfi;
+
+        mfe = mfib_entry_get(mfei_g_3);
+        mfi = mfib_entry_get_itf(mfe, path_via_if0.frp_sw_if_index);
+
+        mfib_signal_push(mfe, mfi, NULL);
+    }
+
+    if (FIB_PROTOCOL_IP6 == PROTO)
+    {
+        /*
+         * All the entries are present. let's ensure we can find them all
+         * via exact and longest prefix matches.
+         */
+        /*
+         * A source address we will never match
+         */
+        ip6_address_t src = {
+            .as_u64[0] = clib_host_to_net_u64(0x3001000000000000),
+            .as_u64[1] = clib_host_to_net_u64(0xffffffffffffffff),
+        };
+
+        /*
+         * Find the (*,G/m)
+         */
+        MFIB_TEST((mfei_g_m == ip6_mfib_table_lookup2(
+                                   ip6_mfib_get(fib_index),
+                                   &src,
+                                   &pfx_star_g_slash_m->fp_grp_addr.ip6)),
+                  "%U found via DP LPM grp=%U",
+                  format_mfib_prefix, pfx_star_g_slash_m,
+                  format_ip6_address, &pfx_star_g_slash_m->fp_grp_addr.ip6);
+
+        ip6_address_t tmp = pfx_star_g_slash_m->fp_grp_addr.ip6;
+        tmp.as_u8[15] = 0xff;
+
+        MFIB_TEST((mfei_g_m == ip6_mfib_table_lookup2(
+                                   ip6_mfib_get(fib_index),
+                                   &pfx_s_g->fp_src_addr.ip6,
+                                   &tmp)),
+                  "%U found via DP LPM grp=%U",
+                  format_mfib_prefix, pfx_star_g_slash_m,
+                  format_ip6_address, &tmp);
+
+        /*
+         * Find the (S,G).
+         */
+        mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index),
+                                      &pfx_s_g->fp_src_addr.ip6,
+                                      &pfx_s_g->fp_grp_addr.ip6);
+        MFIB_TEST((mfei_s_g == mfei),
+                  "%U found via DP LPM: %d",
+                  format_mfib_prefix, pfx_s_g, mfei);
+
+        /*
+         * Find the 3 (*,G) s
+         */
+        mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index),
+                                      &src,
+                                      &pfx_star_g_1->fp_grp_addr.ip6);
+        MFIB_TEST((mfei_g_1 == mfei),
+                  "%U found via DP LPM: %d",
+                  format_mfib_prefix, pfx_star_g_1, mfei);
+        mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index),
+                                      &src,
+                                      &pfx_star_g_2->fp_grp_addr.ip6);
+        MFIB_TEST((mfei_g_2 == mfei),
+                  "%U found via DP LPM: %d",
+                  format_mfib_prefix, pfx_star_g_2, mfei);
+        mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index),
+                                      &src,
+                                      &pfx_star_g_3->fp_grp_addr.ip6);
+        MFIB_TEST((mfei_g_3 == mfei),
+                  "%U found via DP LPM: %d",
+                  format_mfib_prefix, pfx_star_g_3, mfei);
+    }
+
+    /*
+     * remove flags on the entry. This is the last of the
+     * state associated with the entry, so now it goes.
+     */
+    mfib_table_entry_update(fib_index,
+                            pfx_s_g,
+                            MFIB_SOURCE_API,
+                            MFIB_ENTRY_FLAG_NONE);
+    mfei = mfib_table_lookup_exact_match(fib_index,
+                                         pfx_s_g);
+    MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+              "%U gone",
+              format_mfib_prefix, pfx_s_g);
+
+    /*
+     * remove the last path on the no forward entry - the last entry
+     */
+    mfib_table_entry_path_remove(fib_index,
+                                 pfx_no_forward,
+                                 MFIB_SOURCE_API,
+                                 &path_via_if0);
+
+    mfei = mfib_table_lookup_exact_match(fib_index, pfx_no_forward);
+    MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+              "%U gone",
+              format_mfib_prefix, pfx_no_forward);
+
+    /*
+     * hard delete the (*,232.1.1.1)
+     */
+    mfib_table_entry_delete(fib_index,
+                            pfx_star_g_1,
+                            MFIB_SOURCE_API);
+
+    mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_1);
+    MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+              "%U gone",
+              format_mfib_prefix, pfx_star_g_1);
+    /*
+     * remove the entry whilst the signal is pending
+     */
+    mfib_table_entry_delete(fib_index,
+                            pfx_star_g_2,
+                            MFIB_SOURCE_API);
+
+    mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_2);
+    MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+              "%U Gone",
+              format_mfib_prefix, pfx_star_g_2);
+    mfib_table_entry_delete(fib_index,
+                            pfx_star_g_3,
+                            MFIB_SOURCE_API);
+
+    mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_3);
+    MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+              "%U Gone",
+              format_mfib_prefix, pfx_star_g_3);
+
+    mfib_table_entry_delete(fib_index,
+                            pfx_star_g_slash_m,
+                            MFIB_SOURCE_API);
+
+    mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_slash_m);
+    MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei,
+              "%U Gone",
+              format_mfib_prefix, pfx_star_g_slash_m);
+
+    /*
+     * Unlock the table - it's the last lock so should be gone thereafter
+     */
+    mfib_table_unlock(fib_index, PROTO);
+
+    MFIB_TEST((FIB_NODE_INDEX_INVALID ==
+               mfib_table_find(PROTO, fib_index)),
+              "MFIB table %d gone", fib_index);
+
+    adj_unlock(ai_1);
+    adj_unlock(ai_2);
+    adj_unlock(ai_3);
+
+    /*
+     * test we've leaked no resources
+     */
+    MFIB_TEST(0 == adj_mcast_db_size(), "%d MCAST adjs", adj_mcast_db_size());
+    MFIB_TEST(n_reps == pool_elts(replicate_pool), "%d=%d replicates",
+              n_reps, pool_elts(replicate_pool));
+    MFIB_TEST(n_entries == pool_elts(mfib_entry_pool),
+              " No more entries %d!=%d",
+              n_entries, pool_elts(mfib_entry_pool));
+    MFIB_TEST(n_itfs == pool_elts(mfib_itf_pool),
+              " No more Interfaces %d!=%d",
+              n_itfs, pool_elts(mfib_itf_pool));
+
+    return (0);
+}
+
+static int
+mfib_test_v4 (void)
+{
+    const mfib_prefix_t pfx_224_s_8 = {
+        .fp_len = 8,
+        .fp_proto = FIB_PROTOCOL_IP4,
+        .fp_grp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0xe0000000),
+        }
+    };
+    const mfib_prefix_t pfx_1_1_1_1_c_239_1_1_1 = {
+        .fp_len = 64,
+        .fp_proto = FIB_PROTOCOL_IP4,
+        .fp_grp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0xef010101),
+        },
+        .fp_src_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0x01010101),
+        },
+    };
+    const mfib_prefix_t pfx_239_1_1_1 = {
+        .fp_len = 32,
+        .fp_proto = FIB_PROTOCOL_IP4,
+        .fp_grp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0xef010101),
+        },
+        .fp_src_addr = {
+            .ip4.as_u32 = 0,
+        },
+    };
+    const mfib_prefix_t pfx_239_1_1_2 = {
+        .fp_len = 32,
+        .fp_proto = FIB_PROTOCOL_IP4,
+        .fp_grp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0xef010102),
+        },
+        .fp_src_addr = {
+            .ip4.as_u32 = 0,
+        },
+    };
+    const mfib_prefix_t pfx_239_1_1_3 = {
+        .fp_len = 32,
+        .fp_proto = FIB_PROTOCOL_IP4,
+        .fp_grp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0xef010103),
+        },
+        .fp_src_addr = {
+            .ip4.as_u32 = 0,
+        },
+    };
+    const mfib_prefix_t pfx_239 = {
+        .fp_len = 8,
+        .fp_proto = FIB_PROTOCOL_IP4,
+        .fp_grp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0xef000000),
+        },
+        .fp_src_addr = {
+            .ip4.as_u32 = 0,
+        },
+    };
+
+    return (mfib_test_i(FIB_PROTOCOL_IP4,
+                        VNET_LINK_IP4,
+                        &pfx_224_s_8,
+                        &pfx_1_1_1_1_c_239_1_1_1,
+                        &pfx_239_1_1_1,
+                        &pfx_239_1_1_2,
+                        &pfx_239_1_1_3,
+                        &pfx_239));
+}
+
+static int
+mfib_test_v6 (void)
+{
+    const mfib_prefix_t pfx_ffd_s_12 = {
+        .fp_len = 12,
+        .fp_proto = FIB_PROTOCOL_IP6,
+        .fp_grp_addr = {
+            .ip6.as_u64[0] = clib_host_to_net_u64(0xffd0000000000000),
+        }
+    };
+    const mfib_prefix_t pfx_2001_1_c_ff_1 = {
+        .fp_len = 256,
+        .fp_proto = FIB_PROTOCOL_IP6,
+        .fp_grp_addr = {
+            .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000),
+            .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000001),
+        },
+        .fp_src_addr = {
+            .ip6.as_u64[0] = clib_host_to_net_u64(0x2001000000000000),
+            .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000001),
+        },
+    };
+    const mfib_prefix_t pfx_ff_1 = {
+        .fp_len = 128,
+        .fp_proto = FIB_PROTOCOL_IP6,
+        .fp_grp_addr = {
+            .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000),
+            .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000001),
+        },
+    };
+    const mfib_prefix_t pfx_ff_2 = {
+        .fp_len = 128,
+        .fp_proto = FIB_PROTOCOL_IP6,
+        .fp_grp_addr = {
+            .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000),
+            .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000002),
+        },
+    };
+    const mfib_prefix_t pfx_ff_3 = {
+        /*
+         * this is the ALL DHCP routers address
+         */
+        .fp_len = 128,
+        .fp_proto = FIB_PROTOCOL_IP6,
+        .fp_grp_addr = {
+            .ip6.as_u64[0] = clib_host_to_net_u64(0xff02000100000000),
+            .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000002),
+        },
+    };
+    const mfib_prefix_t pfx_ff = {
+        .fp_len = 16,
+        .fp_proto = FIB_PROTOCOL_IP6,
+        .fp_grp_addr = {
+            .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000),
+            .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000000),
+        },
+    };
+
+    return (mfib_test_i(FIB_PROTOCOL_IP6,
+                        VNET_LINK_IP6,
+                        &pfx_ffd_s_12,
+                        &pfx_2001_1_c_ff_1,
+                        &pfx_ff_1,
+                        &pfx_ff_2,
+                        &pfx_ff_3,
+                        &pfx_ff));
+}
+
+static clib_error_t *
+mfib_test (vlib_main_t * vm,
+           unformat_input_t * input,
+           vlib_cli_command_t * cmd_arg)
+{
+    int res = 0;
+
+    res += mfib_test_mk_intf(4);
+    res += mfib_test_v4();
+    res += mfib_test_v6();
+
+    if (res)
+    {
+        return clib_error_return(0, "MFIB Unit Test Failed");
+    }
+    else
+    {
+        return (NULL);
+    }
+}
+
+VLIB_CLI_COMMAND (test_fib_command, static) = {
+    .path = "test mfib",
+    .short_help = "fib unit tests - DO NOT RUN ON A LIVE SYSTEM",
+    .function = mfib_test,
+};
+
+clib_error_t *
+mfib_test_init (vlib_main_t *vm)
+{
+    return 0;
+}
+
+VLIB_INIT_FUNCTION (mfib_test_init);
diff --git a/src/vnet/mfib/mfib_types.c b/src/vnet/mfib/mfib_types.c
new file mode 100644
index 00000000..6d77c3d8
--- /dev/null
+++ b/src/vnet/mfib/mfib_types.c
@@ -0,0 +1,213 @@
+ /*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mfib/mfib_types.h>
+
+#include <vnet/ip/ip.h>
+
+/**
+ * String names for each flag
+ */
+static const char *mfib_flag_names[] = MFIB_ENTRY_NAMES_SHORT;
+static const char *mfib_flag_names_long[] = MFIB_ENTRY_NAMES_LONG;
+
+static const char *mfib_itf_flag_long_names[] = MFIB_ITF_NAMES_LONG;
+static const char *mfib_itf_flag_names[] = MFIB_ITF_NAMES_SHORT;
+
+u8 *
+format_mfib_prefix (u8 * s, va_list * args)
+{
+    mfib_prefix_t *fp = va_arg (*args, mfib_prefix_t *);
+
+    /*
+     * protocol specific so it prints ::/0 correctly.
+     */
+    switch (fp->fp_proto)
+    {
+    case FIB_PROTOCOL_IP6:
+    {
+        ip6_address_t p6 = fp->fp_grp_addr.ip6;
+        u32 len = (fp->fp_len > 128 ? 128 : fp->fp_len);
+
+        ip6_address_mask(&p6, &(ip6_main.fib_masks[len]));
+
+        if (ip6_address_is_zero(&fp->fp_src_addr.ip6))
+        {
+            s = format(s, "(*, ");
+        }
+        else
+        {
+            s = format (s, "(%U, ", format_ip6_address, &fp->fp_src_addr.ip6);
+        }
+        s = format (s, "%U", format_ip6_address, &p6);
+        s = format (s, "/%d)", len);
+        break;
+    }
+    case FIB_PROTOCOL_IP4:
+    {
+        ip4_address_t p4 = fp->fp_grp_addr.ip4;
+        u32 len = (fp->fp_len > 32 ? 32 : fp->fp_len);
+
+        p4.as_u32 &= ip4_main.fib_masks[len];
+
+        if (0 == fp->fp_src_addr.ip4.as_u32)
+        {
+            s = format(s, "(*, ");
+        }
+        else
+        {
+            s = format (s, "(%U, ", format_ip4_address, &fp->fp_src_addr.ip4);
+        }
+        s = format (s, "%U", format_ip4_address, &p4);
+        s = format (s, "/%d)", len);
+        break;
+    }
+    case FIB_PROTOCOL_MPLS:
+        break;
+    }
+
+    return (s);
+}
+
+u8 *
+format_mfib_entry_flags (u8 * s, va_list * args)
+{
+    mfib_entry_attribute_t attr;
+    mfib_entry_flags_t flags;
+
+    flags = va_arg (*args, mfib_entry_flags_t);
+
+    if (MFIB_ENTRY_FLAG_NONE != flags) {
+        s = format(s, " flags:");
+        FOR_EACH_MFIB_ATTRIBUTE(attr) {
+            if ((1<<attr) & flags) {
+                s = format (s, "%s,", mfib_flag_names[attr]);
+            }
+        }
+    }
+
+    return (s);
+}
+
+u8 *
+format_mfib_itf_flags (u8 * s, va_list * args)
+{
+    mfib_itf_attribute_t attr;
+    mfib_itf_flags_t flags;
+
+    flags = va_arg (*args, mfib_itf_flags_t);
+
+    FOR_EACH_MFIB_ITF_ATTRIBUTE(attr) {
+        if ((1<<attr) & flags) {
+            s = format (s, "%s,", mfib_itf_flag_long_names[attr]);
+        }
+    }
+
+    return (s);
+}
+
+uword
+unformat_mfib_itf_flags (unformat_input_t * input,
+                         va_list * args)
+{
+    mfib_itf_flags_t old, *iflags = va_arg (*args, mfib_itf_flags_t*);
+    mfib_itf_attribute_t attr;
+
+    old = *iflags;
+    FOR_EACH_MFIB_ITF_ATTRIBUTE(attr) {
+        if (unformat (input, mfib_itf_flag_long_names[attr]))
+            *iflags |= (1 << attr);
+    }
+    FOR_EACH_MFIB_ITF_ATTRIBUTE(attr) {
+        if (unformat (input, mfib_itf_flag_names[attr]))
+            *iflags |= (1 << attr);
+    }
+
+    return (old == *iflags ? 0 : 1);
+}
+
+uword
+unformat_mfib_entry_flags (unformat_input_t * input,
+                           va_list * args)
+{
+    mfib_entry_flags_t old, *eflags = va_arg (*args, mfib_entry_flags_t*);
+    mfib_entry_attribute_t attr;
+
+    old = *eflags;
+    FOR_EACH_MFIB_ATTRIBUTE(attr) {
+        if (unformat (input, mfib_flag_names[attr]))
+            *eflags |= (1 << attr);
+    }
+
+    return (old == *eflags ? 0 : 1);
+}
+
+clib_error_t *
+mfib_show_route_flags (vlib_main_t * vm,
+                       unformat_input_t * main_input,
+                       vlib_cli_command_t * cmd)
+{
+    mfib_entry_attribute_t attr;
+
+    FOR_EACH_MFIB_ATTRIBUTE(attr) {
+        vlib_cli_output(vm, "%s = %s",
+                        mfib_flag_names[attr],
+                        mfib_flag_names_long[attr]);
+    }
+
+    return (NULL);
+}
+
+/*?
+ * This command display the set of support flags applicable to the MFIB route
+ */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (mfib_route_flags_command, static) =
+{
+  .path = "sh mfib route flags",
+  .short_help = "Flags applicable to an MFIB route",
+  .function = mfib_show_route_flags,
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+mfib_show_itf_flags (vlib_main_t * vm,
+                     unformat_input_t * main_input,
+                     vlib_cli_command_t * cmd)
+{
+    mfib_itf_attribute_t attr;
+
+    FOR_EACH_MFIB_ITF_ATTRIBUTE(attr) {
+        vlib_cli_output(vm, "%s = %s",
+                        mfib_itf_flag_names[attr],
+                        mfib_itf_flag_long_names[attr]);
+    }
+
+    return (NULL);
+}
+
+/*?
+ * This command display the set of support flags applicable to the MFIB route
+ */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (mfib_itf_flags_command, static) =
+{
+  .path = "sh mfib itf flags",
+  .short_help = "Flags applicable to an MFIB interfaces",
+  .function = mfib_show_itf_flags,
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h
new file mode 100644
index 00000000..37898a07
--- /dev/null
+++ b/src/vnet/mfib/mfib_types.h
@@ -0,0 +1,185 @@
+ /*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MFIB_TYPES_H__
+#define __MFIB_TYPES_H__
+
+#include <vnet/fib/fib_types.h>
+
+/**
+ * Aggregrate type for a prefix
+ */
+typedef struct mfib_prefix_t_ {
+    /**
+     * The mask length
+     */
+    u16 fp_len;
+
+    /**
+     * protocol type
+     */
+    fib_protocol_t fp_proto;
+
+    /**
+     * Pad to keep the address 4 byte aligned
+     */
+    u8 ___fp___pad;
+
+    /**
+     * The address type is not deriveable from the fp_addr member.
+     * If it's v4, then the first 3 u32s of the address will be 0.
+     * v6 addresses (even v4 mapped ones) have at least 2 u32s assigned
+     * to non-zero values. true. but when it's all zero, one cannot decide.
+     */
+    ip46_address_t fp_grp_addr;
+    ip46_address_t fp_src_addr;
+} mfib_prefix_t;
+
+typedef enum mfib_entry_attribute_t_
+{
+    MFIB_ENTRY_ATTRIBUTE_FIRST = 0,
+    /**
+     * The control planes needs packets mathing this entry to generate
+     * a signal.
+     */
+    MFIB_ENTRY_SIGNAL =  MFIB_ENTRY_ATTRIBUTE_FIRST,
+    /**
+     * Drop all traffic to this route
+     */
+    MFIB_ENTRY_DROP,
+    /**
+     * The control plane needs to be informed of coneected sources
+     */
+    MFIB_ENTRY_CONNECTED,
+    /**
+     * Accept packets from any incpoming interface
+     *        Use with extreme caution
+     */
+    MFIB_ENTRY_ACCEPT_ALL_ITF,
+    MFIB_ENTRY_INHERIT_ACCEPT,
+    MFIB_ENTRY_ATTRIBUTE_LAST = MFIB_ENTRY_INHERIT_ACCEPT,
+} mfib_entry_attribute_t;
+
+#define FOR_EACH_MFIB_ATTRIBUTE(_item)			\
+    for (_item = MFIB_ENTRY_ATTRIBUTE_FIRST;		\
+	 _item <= MFIB_ENTRY_ATTRIBUTE_LAST;		\
+	 _item++)
+
+#define MFIB_ENTRY_NAMES_SHORT  {          \
+    [MFIB_ENTRY_SIGNAL]         = "S",     \
+    [MFIB_ENTRY_CONNECTED]      = "C",     \
+    [MFIB_ENTRY_DROP]           = "D",     \
+    [MFIB_ENTRY_ACCEPT_ALL_ITF] = "AA",    \
+    [MFIB_ENTRY_INHERIT_ACCEPT] = "IA",    \
+}
+
+#define MFIB_ENTRY_NAMES_LONG  {                    \
+    [MFIB_ENTRY_SIGNAL]         = "Signal",         \
+    [MFIB_ENTRY_CONNECTED]      = "Connected",      \
+    [MFIB_ENTRY_DROP]           = "Drop",           \
+    [MFIB_ENTRY_ACCEPT_ALL_ITF] = "Accept-all-itf", \
+    [MFIB_ENTRY_INHERIT_ACCEPT] = "Inherit-Accept", \
+}
+
+typedef enum mfib_entry_flags_t_
+{
+    MFIB_ENTRY_FLAG_NONE,
+    MFIB_ENTRY_FLAG_SIGNAL = (1 << MFIB_ENTRY_SIGNAL),
+    MFIB_ENTRY_FLAG_DROP = (1 << MFIB_ENTRY_DROP),
+    MFIB_ENTRY_FLAG_CONNECTED = (1 << MFIB_ENTRY_CONNECTED),
+    MFIB_ENTRY_FLAG_INHERIT_ACCEPT = (1 << MFIB_ENTRY_INHERIT_ACCEPT),
+    MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF = (1 << MFIB_ENTRY_ACCEPT_ALL_ITF),
+} mfib_entry_flags_t;
+
+typedef enum mfib_itf_attribute_t_
+{
+    MFIB_ITF_ATTRIBUTE_FIRST,
+    MFIB_ITF_NEGATE_SIGNAL = MFIB_ITF_ATTRIBUTE_FIRST,
+    MFIB_ITF_ACCEPT,
+    MFIB_ITF_FORWARD,
+    MFIB_ITF_SIGNAL_PRESENT,
+    MFIB_ITF_DONT_PRESERVE,
+    MFIB_ITF_ATTRIBUTE_LAST = MFIB_ITF_DONT_PRESERVE,
+} mfib_itf_attribute_t;
+
+#define FOR_EACH_MFIB_ITF_ATTRIBUTE(_item)	       	\
+    for (_item = MFIB_ITF_ATTRIBUTE_FIRST;       	\
+	 _item <= MFIB_ITF_ATTRIBUTE_LAST;		\
+	 _item++)
+
+#define MFIB_ITF_NAMES_SHORT  {             \
+    [MFIB_ITF_NEGATE_SIGNAL] = "NS",        \
+    [MFIB_ITF_ACCEPT] = "A",                \
+    [MFIB_ITF_FORWARD] = "F",               \
+    [MFIB_ITF_SIGNAL_PRESENT] = "SP",       \
+    [MFIB_ITF_DONT_PRESERVE] = "DP",        \
+}
+
+#define MFIB_ITF_NAMES_LONG  {                    \
+    [MFIB_ITF_NEGATE_SIGNAL] = "Negate-Signal",   \
+    [MFIB_ITF_ACCEPT] = "Accept",                 \
+    [MFIB_ITF_FORWARD] = "Forward",               \
+    [MFIB_ITF_SIGNAL_PRESENT] = "Signal-Present", \
+    [MFIB_ITF_DONT_PRESERVE] = "Don't-Preserve", \
+}
+
+typedef enum mfib_itf_flags_t_
+{
+    MFIB_ITF_FLAG_NONE,
+    MFIB_ITF_FLAG_NEGATE_SIGNAL = (1 << MFIB_ITF_NEGATE_SIGNAL),
+    MFIB_ITF_FLAG_ACCEPT = (1 << MFIB_ITF_ACCEPT),
+    MFIB_ITF_FLAG_FORWARD = (1 << MFIB_ITF_FORWARD),
+    MFIB_ITF_FLAG_SIGNAL_PRESENT = (1 << MFIB_ITF_SIGNAL_PRESENT),
+    MFIB_ITF_FLAG_DONT_PRESERVE = (1 << MFIB_ITF_DONT_PRESERVE),
+} mfib_itf_flags_t;
+
+/**
+ * Possible [control plane] sources of MFIB entries
+ */
+typedef enum mfib_source_t_
+{
+    MFIB_SOURCE_SPECIAL,
+    MFIB_SOURCE_API,
+    MFIB_SOURCE_CLI,
+    MFIB_SOURCE_VXLAN,
+    MFIB_SOURCE_DHCP,
+    MFIB_SOURCE_DEFAULT_ROUTE,
+} mfib_source_t;
+
+#define MFIB_SOURCE_NAMES {                        \
+    [MFIB_SOURCE_SPECIAL] = "Special",             \
+    [MFIB_SOURCE_API] = "API",                     \
+    [MFIB_SOURCE_CLI] = "CLI",                     \
+    [MFIB_SOURCE_DHCP] = "DHCP",                   \
+    [MFIB_SOURCE_VXLAN] = "VXLAN",                 \
+    [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \
+}
+
+/**
+ * \brief Compare two prefixes for equality
+ */
+extern int mfib_prefix_cmp(const mfib_prefix_t *p1,
+                           const mfib_prefix_t *p2);
+
+extern u8 * format_mfib_prefix(u8 * s, va_list * args);
+
+extern u8 *format_mfib_entry_flags(u8 * s, va_list * args);
+extern u8 *format_mfib_itf_flags(u8 * s, va_list * args);
+extern uword unformat_mfib_itf_flags(unformat_input_t * input,
+                                     va_list * args);
+extern uword unformat_mfib_entry_flags(unformat_input_t * input,
+                                       va_list * args);
+
+#endif
diff --git a/src/vnet/misc.c b/src/vnet/misc.c
index 4c8c4cad..9cfe8394 100644
--- a/src/vnet/misc.c
+++ b/src/vnet/misc.c
@@ -83,6 +83,9 @@ vnet_main_init (vlib_main_t * vm)
   if ((error = vlib_call_init_function (vm, fib_module_init)))
     return error;
 
+  if ((error = vlib_call_init_function (vm, mfib_module_init)))
+    return error;
+
   if ((error = vlib_call_init_function (vm, ip_main_init)))
     return error;
 
diff --git a/src/vnet/rewrite.h b/src/vnet/rewrite.h
index 00c1efbd..ce2bce3a 100644
--- a/src/vnet/rewrite.h
+++ b/src/vnet/rewrite.h
@@ -64,6 +64,16 @@ typedef CLIB_PACKED (struct {
      Used for MTU check after packet rewrite. */
   u16 max_l3_packet_bytes;
 
+  /* When dynamically writing a multicast destination L2 addresss
+   * this is the offset within the address to start writing n
+   * bytes of the IP mcast address */
+  u8 dst_mcast_offset;
+
+  /* When dynamically writing a multicast destination L2 addresss
+   * this is the number of bytes of the dest IP address to write into
+   * the MAC rewrite */
+  u8 dst_mcast_n_bytes;
+
   /* Rewrite string starting at end and going backwards. */
   u8 data[0];
 }) vnet_rewrite_header_t;
@@ -261,6 +271,27 @@ _vnet_rewrite_two_headers (vnet_rewrite_header_t * h0,
 			     sizeof ((rw0).rewrite_data),		\
 			     (most_likely_size))
 
+always_inline void
+_vnet_fixup_one_header (vnet_rewrite_header_t * h0,
+			u8 * addr, u32 addr_len,
+			u8 * packet0, int clear_first_bit)
+{
+  /* location to write to in the packet */
+  u8 *p0 = packet0 - h0->dst_mcast_offset;
+  u8 *p1 = p0;
+  /* location to write from in the L3 dest address */
+  u8 *a0 = addr + addr_len - h0->dst_mcast_n_bytes;
+
+  clib_memcpy (p0, a0, h0->dst_mcast_n_bytes);
+  if (clear_first_bit)
+    *p1 &= 0x7f;
+}
+
+#define vnet_fixup_one_header(rw0,addr,p0,clear_first_bit)              \
+  _vnet_fixup_one_header (&((rw0).rewrite_header),                      \
+                          (u8*)(addr), sizeof((*addr)),                 \
+                          (u8*)(p0), (clear_first_bit))
+
 #define VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST ((void *) 0)
 /** Deprecated */
 void vnet_rewrite_for_sw_interface (struct vnet_main_t *vnm,
diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c
index 5d0275d9..f30c0da9 100644
--- a/src/vnet/sr/sr.c
+++ b/src/vnet/sr/sr.c
@@ -2161,7 +2161,7 @@ sr_fix_dst_addr (vlib_main_t * vm,
 
 	  adj0 =
 	    ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  next0 = adj0->mcast_group_index;
+	  next0 = adj0->if_address_index;
 
 	  /* We should be pointing at an Ethernet header... */
 	  eh0 = vlib_buffer_get_current (b0);
@@ -3088,7 +3088,7 @@ set_ip6_sr_rewrite_fn (vlib_main_t * vm,
   adj->rewrite_header.node_index = sr_fix_dst_addr_node.index;
 
   /* $$$$$ hack... steal the mcast group index */
-  adj->mcast_group_index =
+  adj->if_address_index =
     vlib_node_add_next (vm, sr_fix_dst_addr_node.index,
 			hi->output_node_index);
 
diff --git a/src/vnet/util/radix.c b/src/vnet/util/radix.c
new file mode 100644
index 00000000..ff0b0f7b
--- /dev/null
+++ b/src/vnet/util/radix.c
@@ -0,0 +1,1104 @@
+/*	$NetBSD: radix.c,v 1.47 2016/12/12 03:55:57 ozaki-r Exp $	*/
+
+/*
+ * Copyright (c) 1988, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)radix.c	8.6 (Berkeley) 10/17/95
+ */
+
+/*
+ * Routines to build and maintain radix trees for routing lookups.
+ */
+
+#include <vnet/util/radix.h>
+
+typedef void (*rn_printer_t)(void *, const char *fmt, ...);
+
+static int max_keylen = 33; // me
+struct radix_mask *rn_mkfreelist;
+struct radix_node_head *mask_rnhead;
+static char *addmask_key;
+static const char normal_chars[] =
+    {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
+static char *rn_zeros, *rn_ones;
+
+#define rn_masktop (mask_rnhead->rnh_treetop)
+
+static int rn_satisfies_leaf(const char *, struct radix_node *, int);
+static int rn_lexobetter(const void *, const void *);
+static struct radix_mask *rn_new_radix_mask(struct radix_node *,
+    struct radix_mask *);
+static struct radix_node *rn_walknext(struct radix_node *, rn_printer_t,
+    void *);
+static struct radix_node *rn_walkfirst(struct radix_node *, rn_printer_t,
+    void *);
+static void rn_nodeprint(struct radix_node *, rn_printer_t, void *,
+    const char *);
+
+#define	SUBTREE_OPEN	"[ "
+#define	SUBTREE_CLOSE	" ]"
+
+#ifdef RN_DEBUG
+static void rn_treeprint(struct radix_node_head *, rn_printer_t, void *);
+#endif /* RN_DEBUG */
+
+#define MIN(x,y) (((x)<(y))?(x):(y))
+
+static struct radix_mask*
+rm_alloc (void)
+{
+    struct radix_mask *rm = clib_mem_alloc(sizeof(struct radix_mask));
+
+    memset(rm, 0, sizeof(*rm));
+
+    return (rm);
+}
+
+static void
+rm_free (struct radix_mask *rm)
+{
+    clib_mem_free(rm);
+}
+
+#define R_Malloc(p, t, n)                               \
+{                                                       \
+    p = (t) clib_mem_alloc((unsigned int)(n));          \
+    memset(p, 0, n);                                    \
+}
+#define Free(p) clib_mem_free((p))
+#define log(a,b, c...)
+#define bool i32
+
+/*
+ * The data structure for the keys is a radix tree with one way
+ * branching removed.  The index rn_b at an internal node n represents a bit
+ * position to be tested.  The tree is arranged so that all descendants
+ * of a node n have keys whose bits all agree up to position rn_b - 1.
+ * (We say the index of n is rn_b.)
+ *
+ * There is at least one descendant which has a one bit at position rn_b,
+ * and at least one with a zero there.
+ *
+ * A route is determined by a pair of key and mask.  We require that the
+ * bit-wise logical and of the key and mask to be the key.
+ * We define the index of a route to associated with the mask to be
+ * the first bit number in the mask where 0 occurs (with bit number 0
+ * representing the highest order bit).
+ *
+ * We say a mask is normal if every bit is 0, past the index of the mask.
+ * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b,
+ * and m is a normal mask, then the route applies to every descendant of n.
+ * If the index(m) < rn_b, this implies the trailing last few bits of k
+ * before bit b are all 0, (and hence consequently true of every descendant
+ * of n), so the route applies to all descendants of the node as well.
+ *
+ * Similar logic shows that a non-normal mask m such that
+ * index(m) <= index(n) could potentially apply to many children of n.
+ * Thus, for each non-host route, we attach its mask to a list at an internal
+ * node as high in the tree as we can go.
+ *
+ * The present version of the code makes use of normal routes in short-
+ * circuiting an explicit mask and compare operation when testing whether
+ * a key satisfies a normal route, and also in remembering the unique leaf
+ * that governs a subtree.
+ */
+
+struct radix_node *
+rn_search(
+	const void *v_arg,
+	struct radix_node *head)
+{
+	const u8 * const v = v_arg;
+	struct radix_node *x;
+
+	for (x = head; x->rn_b >= 0;) {
+		if (x->rn_bmask & v[x->rn_off])
+			x = x->rn_r;
+		else
+			x = x->rn_l;
+	}
+	return x;
+}
+
+struct radix_node *
+rn_search_m(
+	const void *v_arg,
+	struct radix_node *head,
+	const void *m_arg)
+{
+	struct radix_node *x;
+	const u8 * const v = v_arg;
+	const u8 * const m = m_arg;
+
+	for (x = head; x->rn_b >= 0;) {
+		if ((x->rn_bmask & m[x->rn_off]) &&
+		    (x->rn_bmask & v[x->rn_off]))
+			x = x->rn_r;
+		else
+			x = x->rn_l;
+	}
+	return x;
+}
+
+int
+rn_refines(
+	const void *m_arg,
+	const void *n_arg)
+{
+	const char *m = m_arg;
+	const char *n = n_arg;
+	const char *lim = n + *(const u8 *)n;
+	const char *lim2 = lim;
+	int longer = (*(const u8 *)n++) - (int)(*(const u8 *)m++);
+	int masks_are_equal = 1;
+
+	if (longer > 0)
+		lim -= longer;
+	while (n < lim) {
+		if (*n & ~(*m))
+			return 0;
+		if (*n++ != *m++)
+			masks_are_equal = 0;
+	}
+	while (n < lim2)
+		if (*n++)
+			return 0;
+	if (masks_are_equal && (longer < 0))
+		for (lim2 = m - longer; m < lim2; )
+			if (*m++)
+				return 1;
+	return !masks_are_equal;
+}
+
+struct radix_node *
+rn_lookup(
+	const void *v_arg,
+	const void *m_arg,
+	struct radix_node_head *head)
+{
+	struct radix_node *x;
+	const char *netmask = NULL;
+
+	if (m_arg) {
+		if ((x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off)) == 0)
+			return NULL;
+		netmask = x->rn_key;
+	}
+	x = rn_match(v_arg, head);
+	if (x != NULL && netmask != NULL) {
+		while (x != NULL && x->rn_mask != netmask)
+			x = x->rn_dupedkey;
+	}
+	return x;
+}
+
+static int
+rn_satisfies_leaf(
+	const char *trial,
+	struct radix_node *leaf,
+	int skip)
+{
+	const char *cp = trial;
+	const char *cp2 = leaf->rn_key;
+	const char *cp3 = leaf->rn_mask;
+	const char *cplim;
+	int length = MIN(*(const u8 *)cp, *(const u8 *)cp2);
+
+	if (cp3 == 0)
+		cp3 = rn_ones;
+	else
+		length = MIN(length, *(const u8 *)cp3);
+	cplim = cp + length; cp3 += skip; cp2 += skip;
+	for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
+		if ((*cp ^ *cp2) & *cp3)
+			return 0;
+	return 1;
+}
+
+struct radix_node *
+rn_match(
+	const void *v_arg,
+	struct radix_node_head *head)
+{
+	const char * const v = v_arg;
+	struct radix_node *t = head->rnh_treetop;
+	struct radix_node *top = t;
+	struct radix_node *x;
+	struct radix_node *saved_t;
+	const char *cp = v;
+	const char *cp2;
+	const char *cplim;
+	int off = t->rn_off;
+	int vlen = *(const u8 *)cp;
+	int matched_off;
+	int test, b, rn_b;
+
+	/*
+	 * Open code rn_search(v, top) to avoid overhead of extra
+	 * subroutine call.
+	 */
+	for (; t->rn_b >= 0; ) {
+		if (t->rn_bmask & cp[t->rn_off])
+			t = t->rn_r;
+		else
+			t = t->rn_l;
+	}
+	/*
+	 * See if we match exactly as a host destination
+	 * or at least learn how many bits match, for normal mask finesse.
+	 *
+	 * It doesn't hurt us to limit how many bytes to check
+	 * to the length of the mask, since if it matches we had a genuine
+	 * match and the leaf we have is the most specific one anyway;
+	 * if it didn't match with a shorter length it would fail
+	 * with a long one.  This wins big for class B&C netmasks which
+	 * are probably the most common case...
+	 */
+	if (t->rn_mask)
+		vlen = *(const u8 *)t->rn_mask;
+	cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
+	for (; cp < cplim; cp++, cp2++)
+		if (*cp != *cp2)
+			goto on1;
+	/*
+	 * This extra grot is in case we are explicitly asked
+	 * to look up the default.  Ugh!
+	 */
+	if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey)
+		t = t->rn_dupedkey;
+	return t;
+on1:
+	test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
+	for (b = 7; (test >>= 1) > 0;)
+		b--;
+	matched_off = cp - v;
+	b += matched_off << 3;
+	rn_b = -1 - b;
+	/*
+	 * If there is a host route in a duped-key chain, it will be first.
+	 */
+	if ((saved_t = t)->rn_mask == 0)
+		t = t->rn_dupedkey;
+	for (; t; t = t->rn_dupedkey)
+		/*
+		 * Even if we don't match exactly as a host,
+		 * we may match if the leaf we wound up at is
+		 * a route to a net.
+		 */
+		if (t->rn_flags & RNF_NORMAL) {
+			if (rn_b <= t->rn_b)
+				return t;
+		} else if (rn_satisfies_leaf(v, t, matched_off))
+				return t;
+	t = saved_t;
+	/* start searching up the tree */
+	do {
+		struct radix_mask *m;
+		t = t->rn_p;
+		m = t->rn_mklist;
+		if (m) {
+			/*
+			 * If non-contiguous masks ever become important
+			 * we can restore the masking and open coding of
+			 * the search and satisfaction test and put the
+			 * calculation of "off" back before the "do".
+			 */
+			do {
+				if (m->rm_flags & RNF_NORMAL) {
+					if (rn_b <= m->rm_b)
+						return m->rm_leaf;
+				} else {
+					off = MIN(t->rn_off, matched_off);
+					x = rn_search_m(v, t, m->rm_mask);
+					while (x && x->rn_mask != m->rm_mask)
+						x = x->rn_dupedkey;
+					if (x && rn_satisfies_leaf(v, x, off))
+						return x;
+				}
+				m = m->rm_mklist;
+			} while (m);
+		}
+	} while (t != top);
+	return NULL;
+}
+
+static void
+rn_nodeprint(struct radix_node *rn, rn_printer_t printer, void *arg,
+    const char *delim)
+{
+	(*printer)(arg, "%s(%s%p: p<%p> l<%p> r<%p>)",
+	    delim, ((void *)rn == arg) ? "*" : "", rn, rn->rn_p,
+	    rn->rn_l, rn->rn_r);
+}
+
+#ifdef RN_DEBUG
+int	rn_debug =  1;
+
+static void
+rn_dbg_print(void *arg, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vlog(LOG_DEBUG, fmt, ap);
+	va_end(ap);
+}
+
+static void
+rn_treeprint(struct radix_node_head *h, rn_printer_t printer, void *arg)
+{
+	struct radix_node *dup, *rn;
+	const char *delim;
+
+	if (printer == NULL)
+		return;
+
+	rn = rn_walkfirst(h->rnh_treetop, printer, arg);
+	for (;;) {
+		/* Process leaves */
+		delim = "";
+		for (dup = rn; dup != NULL; dup = dup->rn_dupedkey) {
+			if ((dup->rn_flags & RNF_ROOT) != 0)
+				continue;
+			rn_nodeprint(dup, printer, arg, delim);
+			delim = ", ";
+		}
+		rn = rn_walknext(rn, printer, arg);
+		if (rn->rn_flags & RNF_ROOT)
+			return;
+	}
+	/* NOTREACHED */
+}
+
+#define	traverse(__head, __rn)	rn_treeprint((__head), rn_dbg_print, (__rn))
+#endif /* RN_DEBUG */
+
+struct radix_node *
+rn_newpair(
+	const void *v,
+	int b,
+	struct radix_node nodes[2])
+{
+	struct radix_node *tt = nodes;
+	struct radix_node *t = tt + 1;
+	t->rn_b = b; t->rn_bmask = 0x80 >> (b & 7);
+	t->rn_l = tt; t->rn_off = b >> 3;
+	tt->rn_b = -1; tt->rn_key = v; tt->rn_p = t;
+	tt->rn_flags = t->rn_flags = RNF_ACTIVE;
+	return t;
+}
+
+struct radix_node *
+rn_insert(
+	const void *v_arg,
+	struct radix_node_head *head,
+	int *dupentry,
+	struct radix_node nodes[2])
+{
+	struct radix_node *top = head->rnh_treetop;
+	struct radix_node *t = rn_search(v_arg, top);
+	struct radix_node *tt;
+	const char *v = v_arg;
+	int head_off = top->rn_off;
+	int vlen = *((const u8 *)v);
+	const char *cp = v + head_off;
+	int b;
+    	/*
+	 * Find first bit at which v and t->rn_key differ
+	 */
+    {
+	const char *cp2 = t->rn_key + head_off;
+	const char *cplim = v + vlen;
+	int cmp_res;
+
+	while (cp < cplim)
+		if (*cp2++ != *cp++)
+			goto on1;
+	*dupentry = 1;
+	return t;
+on1:
+	*dupentry = 0;
+	cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
+	for (b = (cp - v) << 3; cmp_res; b--)
+		cmp_res >>= 1;
+    }
+    {
+	struct radix_node *p, *x = top;
+	cp = v;
+	do {
+		p = x;
+		if (cp[x->rn_off] & x->rn_bmask)
+			x = x->rn_r;
+		else x = x->rn_l;
+	} while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */
+#ifdef RN_DEBUG
+	if (rn_debug)
+		log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, p);
+#endif
+	t = rn_newpair(v_arg, b, nodes); tt = t->rn_l;
+	if ((cp[p->rn_off] & p->rn_bmask) == 0)
+		p->rn_l = t;
+	else
+		p->rn_r = t;
+	x->rn_p = t; t->rn_p = p; /* frees x, p as temp vars below */
+	if ((cp[t->rn_off] & t->rn_bmask) == 0) {
+		t->rn_r = x;
+	} else {
+		t->rn_r = tt; t->rn_l = x;
+	}
+#ifdef RN_DEBUG
+	if (rn_debug) {
+		log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
+		    traverse(head, p);
+	}
+#endif /* RN_DEBUG */
+    }
+	return tt;
+}
+
+struct radix_node *
+rn_addmask(
+	const void *n_arg,
+	int search,
+	int skip)
+{
+	const char *netmask = n_arg;
+	const char *cp;
+	const char *cplim;
+	struct radix_node *x;
+	struct radix_node *saved_x;
+	int b = 0, mlen, j;
+	int maskduplicated, m0, isnormal;
+	static int last_zeroed = 0;
+
+	if ((mlen = *(const u8 *)netmask) > max_keylen)
+		mlen = max_keylen;
+	if (skip == 0)
+		skip = 1;
+	if (mlen <= skip)
+		return mask_rnhead->rnh_nodes;
+	if (skip > 1)
+		memmove(addmask_key + 1, rn_ones + 1, skip - 1);
+	if ((m0 = mlen) > skip)
+		memmove(addmask_key + skip, netmask + skip, mlen - skip);
+	/*
+	 * Trim trailing zeroes.
+	 */
+	for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
+		cp--;
+	mlen = cp - addmask_key;
+	if (mlen <= skip) {
+		if (m0 >= last_zeroed)
+			last_zeroed = mlen;
+		return mask_rnhead->rnh_nodes;
+	}
+	if (m0 < last_zeroed)
+		memset(addmask_key + m0, 0, last_zeroed - m0);
+	*addmask_key = last_zeroed = mlen;
+	x = rn_search(addmask_key, rn_masktop);
+	if (memcmp(addmask_key, x->rn_key, mlen) != 0)
+		x = 0;
+	if (x || search)
+		return x;
+	R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
+	if ((saved_x = x) == NULL)
+		return NULL;
+	memset(x, 0, max_keylen + 2 * sizeof (*x));
+	cp = netmask = (void *)(x + 2);
+	memmove(x + 2, addmask_key, mlen);
+	x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
+	if (maskduplicated) {
+                log(LOG_ERR, "rn_addmask: mask impossibly already in tree\n");
+		Free(saved_x);
+		return x;
+	}
+	/*
+	 * Calculate index of mask, and check for normalcy.
+	 */
+	cplim = netmask + mlen; isnormal = 1;
+	for (cp = netmask + skip; (cp < cplim) && *(const u8 *)cp == 0xff;)
+		cp++;
+	if (cp != cplim) {
+		for (j = 0x80; (j & *cp) != 0; j >>= 1)
+			b++;
+		if (*cp != normal_chars[b] || cp != (cplim - 1))
+			isnormal = 0;
+	}
+	b += (cp - netmask) << 3;
+	x->rn_b = -1 - b;
+	if (isnormal)
+		x->rn_flags |= RNF_NORMAL;
+	return x;
+}
+
+static int	/* XXX: arbitrary ordering for non-contiguous masks */
+rn_lexobetter(
+	const void *m_arg,
+	const void *n_arg)
+{
+	const u8 *mp = m_arg;
+	const u8 *np = n_arg;
+	const u8 *lim;
+
+	if (*mp > *np)
+		return 1;  /* not really, but need to check longer one first */
+	if (*mp == *np)
+		for (lim = mp + *mp; mp < lim;)
+			if (*mp++ > *np++)
+				return 1;
+	return 0;
+}
+
+static struct radix_mask *
+rn_new_radix_mask(
+	struct radix_node *tt,
+	struct radix_mask *next)
+{
+	struct radix_mask *m;
+
+	m = rm_alloc();
+	if (m == NULL) {
+		log(LOG_ERR, "Mask for route not entered\n");
+		return NULL;
+	}
+	memset(m, 0, sizeof(*m));
+	m->rm_b = tt->rn_b;
+	m->rm_flags = tt->rn_flags;
+	if (tt->rn_flags & RNF_NORMAL)
+		m->rm_leaf = tt;
+	else
+		m->rm_mask = tt->rn_mask;
+	m->rm_mklist = next;
+	tt->rn_mklist = m;
+	return m;
+}
+
+struct radix_node *
+rn_addroute(
+	const void *v_arg,
+	const void *n_arg,
+	struct radix_node_head *head,
+	struct radix_node treenodes[2])
+{
+	const char *v = v_arg, *netmask = n_arg;
+	struct radix_node *t, *x = NULL, *tt;
+	struct radix_node *saved_tt, *top = head->rnh_treetop;
+	short b = 0, b_leaf = 0;
+	int keyduplicated;
+	const char *mmask;
+	struct radix_mask *m, **mp;
+
+	/*
+	 * In dealing with non-contiguous masks, there may be
+	 * many different routes which have the same mask.
+	 * We will find it useful to have a unique pointer to
+	 * the mask to speed avoiding duplicate references at
+	 * nodes and possibly save time in calculating indices.
+	 */
+	if (netmask != NULL) {
+		if ((x = rn_addmask(netmask, 0, top->rn_off)) == NULL)
+			return NULL;
+		b_leaf = x->rn_b;
+		b = -1 - x->rn_b;
+		netmask = x->rn_key;
+	}
+	/*
+	 * Deal with duplicated keys: attach node to previous instance
+	 */
+	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
+	if (keyduplicated) {
+		for (t = tt; tt != NULL; t = tt, tt = tt->rn_dupedkey) {
+			if (tt->rn_mask == netmask)
+				return NULL;
+			if (netmask == NULL ||
+			    (tt->rn_mask != NULL &&
+			     (b_leaf < tt->rn_b || /* index(netmask) > node */
+			       rn_refines(netmask, tt->rn_mask) ||
+			       rn_lexobetter(netmask, tt->rn_mask))))
+				break;
+		}
+		/*
+		 * If the mask is not duplicated, we wouldn't
+		 * find it among possible duplicate key entries
+		 * anyway, so the above test doesn't hurt.
+		 *
+		 * We sort the masks for a duplicated key the same way as
+		 * in a masklist -- most specific to least specific.
+		 * This may require the unfortunate nuisance of relocating
+		 * the head of the list.
+		 *
+		 * We also reverse, or doubly link the list through the
+		 * parent pointer.
+		 */
+		if (tt == saved_tt) {
+			struct	radix_node *xx = x;
+			/* link in at head of list */
+			(tt = treenodes)->rn_dupedkey = t;
+			tt->rn_flags = t->rn_flags;
+			tt->rn_p = x = t->rn_p;
+			t->rn_p = tt;
+			if (x->rn_l == t)
+				x->rn_l = tt;
+			else
+				x->rn_r = tt;
+			saved_tt = tt;
+			x = xx;
+		} else {
+			(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
+			t->rn_dupedkey = tt;
+			tt->rn_p = t;
+			if (tt->rn_dupedkey)
+				tt->rn_dupedkey->rn_p = tt;
+		}
+		tt->rn_key = v;
+		tt->rn_b = -1;
+		tt->rn_flags = RNF_ACTIVE;
+	}
+	/*
+	 * Put mask in tree.
+	 */
+	if (netmask != NULL) {
+		tt->rn_mask = netmask;
+		tt->rn_b = x->rn_b;
+		tt->rn_flags |= x->rn_flags & RNF_NORMAL;
+	}
+	t = saved_tt->rn_p;
+	if (keyduplicated)
+		goto on2;
+	b_leaf = -1 - t->rn_b;
+	if (t->rn_r == saved_tt)
+		x = t->rn_l;
+	else
+		x = t->rn_r;
+	/* Promote general routes from below */
+	if (x->rn_b < 0) {
+		for (mp = &t->rn_mklist; x != NULL; x = x->rn_dupedkey) {
+			if (x->rn_mask != NULL && x->rn_b >= b_leaf &&
+			    x->rn_mklist == NULL) {
+				*mp = m = rn_new_radix_mask(x, NULL);
+				if (m != NULL)
+					mp = &m->rm_mklist;
+			}
+		}
+	} else if (x->rn_mklist != NULL) {
+		/*
+		 * Skip over masks whose index is > that of new node
+		 */
+		for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
+			if (m->rm_b >= b_leaf)
+				break;
+		t->rn_mklist = m;
+		*mp = NULL;
+	}
+on2:
+	/* Add new route to highest possible ancestor's list */
+	if (netmask == NULL || b > t->rn_b)
+		return tt; /* can't lift at all */
+	b_leaf = tt->rn_b;
+	do {
+		x = t;
+		t = t->rn_p;
+	} while (b <= t->rn_b && x != top);
+	/*
+	 * Search through routes associated with node to
+	 * insert new route according to index.
+	 * Need same criteria as when sorting dupedkeys to avoid
+	 * double loop on deletion.
+	 */
+	for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
+		if (m->rm_b < b_leaf)
+			continue;
+		if (m->rm_b > b_leaf)
+			break;
+		if (m->rm_flags & RNF_NORMAL) {
+			mmask = m->rm_leaf->rn_mask;
+			if (tt->rn_flags & RNF_NORMAL) {
+				log(LOG_ERR, "Non-unique normal route,"
+				    " mask not entered\n");
+				return tt;
+			}
+		} else
+			mmask = m->rm_mask;
+		if (mmask == netmask) {
+			m->rm_refs++;
+			tt->rn_mklist = m;
+			return tt;
+		}
+		if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask))
+			break;
+	}
+	*mp = rn_new_radix_mask(tt, *mp);
+	return tt;
+}
+
+struct radix_node *
+rn_delete1(
+	const void *v_arg,
+	const void *netmask_arg,
+	struct radix_node_head *head,
+	struct radix_node *rn)
+{
+	struct radix_node *t, *p, *x, *tt;
+	struct radix_mask *m, *saved_m, **mp;
+	struct radix_node *dupedkey, *saved_tt, *top;
+	const char *v, *netmask;
+	int b, head_off, vlen;
+
+	v = v_arg;
+	netmask = netmask_arg;
+	x = head->rnh_treetop;
+	tt = rn_search(v, x);
+	head_off = x->rn_off;
+	vlen =  *(const u8 *)v;
+	saved_tt = tt;
+	top = x;
+	if (tt == NULL ||
+	    memcmp(v + head_off, tt->rn_key + head_off, vlen - head_off) != 0)
+		return NULL;
+	/*
+	 * Delete our route from mask lists.
+	 */
+	if (netmask != NULL) {
+		if ((x = rn_addmask(netmask, 1, head_off)) == NULL)
+			return NULL;
+		netmask = x->rn_key;
+		while (tt->rn_mask != netmask)
+			if ((tt = tt->rn_dupedkey) == NULL)
+				return NULL;
+	}
+	if (tt->rn_mask == NULL || (saved_m = m = tt->rn_mklist) == NULL)
+		goto on1;
+	if (tt->rn_flags & RNF_NORMAL) {
+		if (m->rm_leaf != tt || m->rm_refs > 0) {
+			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+			return NULL;  /* dangling ref could cause disaster */
+		}
+	} else {
+		if (m->rm_mask != tt->rn_mask) {
+			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+			goto on1;
+		}
+		if (--m->rm_refs >= 0)
+			goto on1;
+	}
+	b = -1 - tt->rn_b;
+	t = saved_tt->rn_p;
+	if (b > t->rn_b)
+		goto on1; /* Wasn't lifted at all */
+	do {
+		x = t;
+		t = t->rn_p;
+	} while (b <= t->rn_b && x != top);
+	for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
+		if (m == saved_m) {
+			*mp = m->rm_mklist;
+			rm_free(m);
+			break;
+		}
+	}
+	if (m == NULL) {
+		log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
+		if (tt->rn_flags & RNF_NORMAL)
+			return NULL; /* Dangling ref to us */
+	}
+on1:
+	/*
+	 * Eliminate us from tree
+	 */
+	if (tt->rn_flags & RNF_ROOT)
+		return NULL;
+#ifdef RN_DEBUG
+	if (rn_debug)
+		log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, tt);
+#endif
+	t = tt->rn_p;
+	dupedkey = saved_tt->rn_dupedkey;
+	if (dupedkey != NULL) {
+		/*
+		 * Here, tt is the deletion target, and
+		 * saved_tt is the head of the dupedkey chain.
+		 */
+		if (tt == saved_tt) {
+			x = dupedkey;
+			x->rn_p = t;
+			if (t->rn_l == tt)
+				t->rn_l = x;
+			else
+				t->rn_r = x;
+		} else {
+			/* find node in front of tt on the chain */
+			for (x = p = saved_tt;
+			     p != NULL && p->rn_dupedkey != tt;)
+				p = p->rn_dupedkey;
+			if (p != NULL) {
+				p->rn_dupedkey = tt->rn_dupedkey;
+				if (tt->rn_dupedkey != NULL)
+					tt->rn_dupedkey->rn_p = p;
+			} else
+				log(LOG_ERR, "rn_delete: couldn't find us\n");
+		}
+		t = tt + 1;
+		if  (t->rn_flags & RNF_ACTIVE) {
+			*++x = *t;
+			p = t->rn_p;
+			if (p->rn_l == t)
+				p->rn_l = x;
+			else
+				p->rn_r = x;
+			x->rn_l->rn_p = x;
+			x->rn_r->rn_p = x;
+		}
+		goto out;
+	}
+	if (t->rn_l == tt)
+		x = t->rn_r;
+	else
+		x = t->rn_l;
+	p = t->rn_p;
+	if (p->rn_r == t)
+		p->rn_r = x;
+	else
+		p->rn_l = x;
+	x->rn_p = p;
+	/*
+	 * Demote routes attached to us.
+	 */
+	if (t->rn_mklist == NULL)
+		;
+	else if (x->rn_b >= 0) {
+		for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
+			;
+		*mp = t->rn_mklist;
+	} else {
+		/* If there are any key,mask pairs in a sibling
+		   duped-key chain, some subset will appear sorted
+		   in the same order attached to our mklist */
+		for (m = t->rn_mklist;
+		     m != NULL && x != NULL;
+		     x = x->rn_dupedkey) {
+			if (m == x->rn_mklist) {
+				struct radix_mask *mm = m->rm_mklist;
+				x->rn_mklist = NULL;
+				if (--(m->rm_refs) < 0)
+					rm_free(m);
+				m = mm;
+			}
+		}
+		if (m != NULL) {
+			log(LOG_ERR, "rn_delete: Orphaned Mask %p at %p\n",
+			    m, x);
+		}
+	}
+	/*
+	 * We may be holding an active internal node in the tree.
+	 */
+	x = tt + 1;
+	if (t != x) {
+		*t = *x;
+		t->rn_l->rn_p = t;
+		t->rn_r->rn_p = t;
+		p = x->rn_p;
+		if (p->rn_l == x)
+			p->rn_l = t;
+		else
+			p->rn_r = t;
+	}
+out:
+#ifdef RN_DEBUG
+	if (rn_debug) {
+		log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
+		    traverse(head, tt);
+	}
+#endif /* RN_DEBUG */
+	tt->rn_flags &= ~RNF_ACTIVE;
+	tt[1].rn_flags &= ~RNF_ACTIVE;
+	return tt;
+}
+
+struct radix_node *
+rn_delete(
+	const void *v_arg,
+	const void *netmask_arg,
+	struct radix_node_head *head)
+{
+	return rn_delete1(v_arg, netmask_arg, head, NULL);
+}
+
+static struct radix_node *
+rn_walknext(struct radix_node *rn, rn_printer_t printer, void *arg)
+{
+	/* If at right child go back up, otherwise, go right */
+	while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) {
+		if (printer != NULL)
+			(*printer)(arg, SUBTREE_CLOSE);
+		rn = rn->rn_p;
+	}
+	if (printer)
+		rn_nodeprint(rn->rn_p, printer, arg, "");
+	/* Find the next *leaf* since next node might vanish, too */
+	for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) {
+		if (printer != NULL)
+			(*printer)(arg, SUBTREE_OPEN);
+		rn = rn->rn_l;
+	}
+	return rn;
+}
+
+static struct radix_node *
+rn_walkfirst(struct radix_node *rn, rn_printer_t printer, void *arg)
+{
+	/* First time through node, go left */
+	while (rn->rn_b >= 0) {
+		if (printer != NULL)
+			(*printer)(arg, SUBTREE_OPEN);
+		rn = rn->rn_l;
+	}
+	return rn;
+}
+
+int
+rn_walktree(
+	struct radix_node_head *h,
+	int (*f)(struct radix_node *, void *),
+	void *w)
+{
+	int error;
+	struct radix_node *base, *next, *rn;
+	/*
+	 * This gets complicated because we may delete the node
+	 * while applying the function f to it, so we need to calculate
+	 * the successor node in advance.
+	 */
+	rn = rn_walkfirst(h->rnh_treetop, NULL, NULL);
+	for (;;) {
+		base = rn;
+		next = rn_walknext(rn, NULL, NULL);
+		/* Process leaves */
+		while ((rn = base) != NULL) {
+			base = rn->rn_dupedkey;
+			if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w)))
+				return error;
+		}
+		rn = next;
+		if (rn->rn_flags & RNF_ROOT)
+			return 0;
+	}
+	/* NOTREACHED */
+}
+
+struct radix_node *
+rn_search_matched(struct radix_node_head *h,
+    int (*matcher)(struct radix_node *, void *), void *w)
+{
+	bool matched;
+	struct radix_node *base, *next, *rn;
+	/*
+	 * This gets complicated because we may delete the node
+	 * while applying the function f to it, so we need to calculate
+	 * the successor node in advance.
+	 */
+	rn = rn_walkfirst(h->rnh_treetop, NULL, NULL);
+	for (;;) {
+		base = rn;
+		next = rn_walknext(rn, NULL, NULL);
+		/* Process leaves */
+		while ((rn = base) != NULL) {
+			base = rn->rn_dupedkey;
+			if (!(rn->rn_flags & RNF_ROOT)) {
+				matched = (*matcher)(rn, w);
+				if (matched)
+					return rn;
+			}
+		}
+		rn = next;
+		if (rn->rn_flags & RNF_ROOT)
+			return NULL;
+	}
+	/* NOTREACHED */
+}
+
+int
+rn_inithead(void **head, int off)
+{
+	struct radix_node_head *rnh;
+
+	if (*head != NULL)
+		return 1;
+	R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
+	if (rnh == NULL)
+		return 0;
+	*head = rnh;
+	return rn_inithead0(rnh, off);
+}
+
+int
+rn_inithead0(struct radix_node_head *rnh, int off)
+{
+	struct radix_node *t;
+	struct radix_node *tt;
+	struct radix_node *ttt;
+
+	memset(rnh, 0, sizeof(*rnh));
+	t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
+	ttt = rnh->rnh_nodes + 2;
+	t->rn_r = ttt;
+	t->rn_p = t;
+	tt = t->rn_l;
+	tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
+	tt->rn_b = -1 - off;
+	*ttt = *tt;
+	ttt->rn_key = rn_ones;
+	rnh->rnh_addaddr = rn_addroute;
+	rnh->rnh_deladdr = rn_delete;
+	rnh->rnh_matchaddr = rn_match;
+	rnh->rnh_lookup = rn_lookup;
+	rnh->rnh_treetop = t;
+	return 1;
+}
+
+static clib_error_t *
+rn_module_init (vlib_main_t * vm)
+{
+	char *cp, *cplim;
+
+	R_Malloc(rn_zeros, char *, 3 * max_keylen);
+	if (rn_zeros == NULL)
+            return (clib_error_return (0, "RN Zeros..."));
+
+	memset(rn_zeros, 0, 3 * max_keylen);
+	rn_ones = cp = rn_zeros + max_keylen;
+	addmask_key = cplim = rn_ones + max_keylen;
+	while (cp < cplim)
+		*cp++ = -1;
+	if (rn_inithead((void *)&mask_rnhead, 0) == 0)
+            return (clib_error_return (0, "RN Init 2"));
+
+        return (NULL);
+}
+
+VLIB_INIT_FUNCTION(rn_module_init);
diff --git a/src/vnet/util/radix.h b/src/vnet/util/radix.h
new file mode 100644
index 00000000..d9ba6659
--- /dev/null
+++ b/src/vnet/util/radix.h
@@ -0,0 +1,147 @@
+/*	$NetBSD: radix.h,v 1.23 2016/11/15 01:50:06 ozaki-r Exp $	*/
+
+/*
+ * Copyright (c) 1988, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)radix.h	8.2 (Berkeley) 10/31/94
+ */
+
+#ifndef _NET_RADIX_H_
+#define	_NET_RADIX_H_
+
+#include <vlib/vlib.h>
+
+/*
+ * Radix search tree node layout.
+ */
+
+struct radix_node {
+	struct	radix_mask *rn_mklist;	/* list of masks contained in subtree */
+	struct	radix_node *rn_p;	/* parent */
+	i16	rn_b;			/* bit offset; -1-index(netmask) */
+	u8	rn_bmask;		/* node: mask for bit test*/
+	u8	rn_flags;		/* enumerated next */
+#define RNF_NORMAL	1		/* leaf contains normal route */
+#define RNF_ROOT	2		/* leaf is root leaf for tree */
+#define RNF_ACTIVE	4		/* This node is alive (for rtfree) */
+	union {
+		struct {			/* leaf only data: */
+			const char *rn_Key;	/* object of search */
+			const char *rn_Mask;	/* netmask, if present */
+			struct	radix_node *rn_Dupedkey;
+		} rn_leaf;
+		struct {			/* node only data: */
+			int	rn_Off;		/* where to start compare */
+			struct	radix_node *rn_L;/* progeny */
+			struct	radix_node *rn_R;/* progeny */
+		} rn_node;
+	} rn_u;
+#ifdef RN_DEBUG
+	i32 rn_info;
+	struct radix_node *rn_twin;
+	struct radix_node *rn_ybro;
+#endif
+};
+
+#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey
+#define rn_key rn_u.rn_leaf.rn_Key
+#define rn_mask rn_u.rn_leaf.rn_Mask
+#define rn_off rn_u.rn_node.rn_Off
+#define rn_l rn_u.rn_node.rn_L
+#define rn_r rn_u.rn_node.rn_R
+
+/*
+ * Annotations to tree concerning potential routes applying to subtrees.
+ */
+
+struct radix_mask {
+	i16	rm_b;			/* bit offset; -1-index(netmask) */
+	i8	rm_unused;		/* cf. rn_bmask */
+	u8	rm_flags;		/* cf. rn_flags */
+	struct	radix_mask *rm_mklist;	/* more masks to try */
+	union	{
+		const char *rmu_mask;		/* the mask */
+		struct	radix_node *rmu_leaf;	/* for normal routes */
+	}	rm_rmu;
+	i32	rm_refs;		/* # of references to this struct */
+};
+
+#define rm_mask rm_rmu.rmu_mask
+#define rm_leaf rm_rmu.rmu_leaf		/* extra field would make 32 bytes */
+
+struct radix_node_head {
+	struct	radix_node *rnh_treetop;
+	i32	rnh_addrsize;		/* permit, but not require fixed keys */
+	i32	rnh_pktsize;		/* permit, but not require fixed keys */
+	struct	radix_node *(*rnh_addaddr)	/* add based on sockaddr */
+		(const void *v, const void *mask,
+		     struct radix_node_head *head, struct radix_node nodes[]);
+	struct	radix_node *(*rnh_addpkt)	/* add based on packet hdr */
+		(const void *v, const void *mask,
+		     struct radix_node_head *head, struct radix_node nodes[]);
+	struct	radix_node *(*rnh_deladdr)	/* remove based on sockaddr */
+		(const void *v, const void *mask, struct radix_node_head *head);
+	struct	radix_node *(*rnh_delpkt)	/* remove based on packet hdr */
+		(const void *v, const void *mask, struct radix_node_head *head);
+	struct	radix_node *(*rnh_matchaddr)	/* locate based on sockaddr */
+		(const void *v, struct radix_node_head *head);
+	struct	radix_node *(*rnh_lookup)	/* locate based on sockaddr */
+		(const void *v, const void *mask, struct radix_node_head *head);
+	struct	radix_node *(*rnh_matchpkt)	/* locate based on packet hdr */
+		(const void *v, struct radix_node_head *head);
+	struct	radix_node rnh_nodes[3];	/* empty tree for common case */
+};
+
+void	rn_init(void);
+int	rn_inithead(void **, int);
+void	rn_delayedinit(void **, int);
+int	rn_inithead0(struct radix_node_head *, int);
+int	rn_refines(const void *, const void *);
+int	rn_walktree(struct radix_node_head *,
+	            int (*)(struct radix_node *, void *),
+		    void *);
+struct radix_node *
+	rn_search_matched(struct radix_node_head *,
+	                  int (*)(struct radix_node *, void *),
+		          void *);
+struct radix_node
+	 *rn_addmask(const void *, int, int),
+	 *rn_addroute(const void *, const void *, struct radix_node_head *,
+			struct radix_node [2]),
+	 *rn_delete1(const void *, const void *, struct radix_node_head *,
+			struct radix_node *),
+	 *rn_delete(const void *, const void *, struct radix_node_head *),
+	 *rn_insert(const void *, struct radix_node_head *, int *,
+			struct radix_node [2]),
+	 *rn_lookup(const void *, const void *, struct radix_node_head *),
+	 *rn_match(const void *, struct radix_node_head *),
+	 *rn_newpair(const void *, int, struct radix_node[2]),
+	 *rn_search(const void *, struct radix_node *),
+	 *rn_search_m(const void *, struct radix_node *, const void *);
+
+#endif /* !_NET_RADIX_H_ */
diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c
index f749429f..9ed42875 100644
--- a/src/vnet/vxlan/vxlan.c
+++ b/src/vnet/vxlan/vxlan.c
@@ -16,7 +16,8 @@
 #include <vnet/ip/format.h>
 #include <vnet/fib/fib_entry.h>
 #include <vnet/fib/fib_table.h>
-#include <vnet/dpo/receive_dpo.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/adj/adj_mcast.h>
 #include <vlib/vlib.h>
 
 /**
@@ -337,7 +338,7 @@ vtep_addr_unref(ip46_address_t *ip)
 
 typedef CLIB_PACKED(union {
   struct {
-    fib_node_index_t fib_entry_index;
+    fib_node_index_t mfib_entry_index;
     adj_index_t mcast_adj_index;
   };
   u64 as_u64;
@@ -353,11 +354,28 @@ mcast_shared_get(ip46_address_t * ip)
 }
 
 static inline void
-ip46_multicast_ethernet_address(u8 * ethernet_address, ip46_address_t * ip) {
-          if (ip46_address_is_ip4(ip))
-              ip4_multicast_ethernet_address(ethernet_address, &ip->ip4);
-          else
-              ip6_multicast_ethernet_address(ethernet_address, ip->ip6.as_u32[0]);
+mcast_shared_add(ip46_address_t *dst,
+                 fib_node_index_t mfei,
+                 adj_index_t ai)
+{
+    mcast_shared_t new_ep = {
+        .mcast_adj_index = ai,
+        .mfib_entry_index = mfei,
+    };
+
+    hash_set_key_copy (&vxlan_main.mcast_shared, dst, new_ep.as_u64);
+}
+
+static inline void
+mcast_shared_remove(ip46_address_t *dst)
+{
+    mcast_shared_t ep = mcast_shared_get(dst);
+
+    adj_unlock(ep.mcast_adj_index);
+    mfib_table_entry_delete_index(ep.mfib_entry_index,
+                                  MFIB_SOURCE_VXLAN);
+
+    hash_unset_key_free (&vxlan_main.mcast_shared, dst);
 }
 
 int vnet_vxlan_add_del_tunnel 
@@ -503,28 +521,65 @@ int vnet_vxlan_add_del_tunnel
 	   */
           fib_protocol_t fp = (is_ip6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
           dpo_id_t dpo = DPO_INVALID;
-	  dpo_proto_t dproto = fib_proto_to_dpo(fp);
+          mcast_shared_t ep;
 
 	  if (vtep_addr_ref(&t->dst) == 1)
-	    {
-               u8 mcast_mac[6];
-
-               ip46_multicast_ethernet_address(mcast_mac, &t->dst);
-               receive_dpo_add_or_lock(dproto, ~0, NULL, &dpo);
-	       mcast_shared_t new_ep = {
-		 .mcast_adj_index = adj_rewrite_add_and_lock
-                   (fp, fib_proto_to_link(fp), a->mcast_sw_if_index, mcast_mac),
-                 /* Add VRF local mcast adj. */
-                 .fib_entry_index = fib_table_entry_special_dpo_add
-                   (t->encap_fib_index, &tun_dst_pfx,
-                    FIB_SOURCE_SPECIAL, FIB_ENTRY_FLAG_NONE, &dpo)
-		   };
-	       hash_set_key_copy (&vxm->mcast_shared, &t->dst, new_ep.as_u64);
-	       dpo_reset(&dpo);
-	    }
+          {
+              fib_node_index_t mfei;
+              adj_index_t ai;
+              fib_route_path_t path = {
+                  .frp_proto = fp,
+                  .frp_addr = zero_addr,
+                  .frp_sw_if_index = 0xffffffff,
+                  .frp_fib_index = ~0,
+                  .frp_weight = 0,
+                  .frp_flags = FIB_ROUTE_PATH_LOCAL,
+              };
+              const mfib_prefix_t mpfx = {
+                  .fp_proto = fp,
+                  .fp_len = (is_ip6 ? 128 : 32),
+                  .fp_grp_addr = tun_dst_pfx.fp_addr,
+              };
+
+              /*
+               * Setup the (*,G) to receive traffic on the mcast group
+               *  - the forwarding interface is for-us
+               *  - the accepting interface is that from the API
+               */
+              mfib_table_entry_path_update(t->encap_fib_index,
+                                           &mpfx,
+                                           MFIB_SOURCE_VXLAN,
+                                           &path,
+                                           MFIB_ITF_FLAG_FORWARD);
+
+              path.frp_sw_if_index = a->mcast_sw_if_index;
+              path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
+              mfei = mfib_table_entry_path_update(t->encap_fib_index,
+                                                  &mpfx,
+                                                  MFIB_SOURCE_VXLAN,
+                                                  &path,
+                                                  MFIB_ITF_FLAG_ACCEPT);
+
+              /*
+               * Create the mcast adjacency to send traffic to the group
+               */
+              ai = adj_mcast_add_or_lock(fp,
+                                         fib_proto_to_link(fp),
+                                         a->mcast_sw_if_index);
+
+              /*
+               * create a new end-point
+               */
+              mcast_shared_add(&t->dst, mfei, ai);
+          }
+
+          ep = mcast_shared_get(&t->dst);
+
           /* Stack shared mcast dst mac addr rewrite on encap */
-	  mcast_shared_t ep = mcast_shared_get(&t->dst);
-          dpo_set (&dpo, DPO_ADJACENCY, dproto, ep.mcast_adj_index);
+          dpo_set (&dpo, DPO_ADJACENCY,
+                   fib_proto_to_dpo(fp),
+                   ep.mcast_adj_index);
+
           dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
           dpo_reset (&dpo);
 	  flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
@@ -563,10 +618,7 @@ int vnet_vxlan_add_del_tunnel
         }
       else if (vtep_addr_unref(&t->dst) == 0)
         {
-	  mcast_shared_t ep = mcast_shared_get(&t->dst);
-	  adj_unlock(ep.mcast_adj_index);
-	  fib_table_entry_delete_index(ep.fib_entry_index, FIB_SOURCE_SPECIAL);
-          hash_unset_key_free (&vxm->mcast_shared, &t->dst);
+	  mcast_shared_remove(&t->dst);
         }
 
       fib_node_deinit(&t->node);
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
index e6227a68..6c8800ec 100644
--- a/src/vpp/api/api.c
+++ b/src/vpp/api/api.c
@@ -964,7 +964,6 @@ ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp)
 
     for (i = 0; i < vec_len (sw_if_indices_to_shut); i++) {
       sw_if_index = sw_if_indices_to_shut[i];
-      // vec_foreach (sw_if_index, sw_if_indices_to_shut) {
 
       u32 flags = vnet_sw_interface_get_flags (vnm, sw_if_index);
       flags &= ~(VNET_SW_INTERFACE_FLAG_ADMIN_UP);
@@ -972,7 +971,6 @@ ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp)
     }
 
     fib_table_flush(fib->index, FIB_PROTOCOL_IP4, FIB_SOURCE_API);
-    fib_table_flush(fib->index, FIB_PROTOCOL_IP4, FIB_SOURCE_INTERFACE);
 
     rv = 0;
     break;
@@ -1013,15 +1011,14 @@ ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp)
 
     /* Shut down interfaces in this FIB / clean out intfc routes */
     pool_foreach (si, im->sw_interfaces,
-                  ({
-                    if (im6->fib_index_by_sw_if_index[si->sw_if_index] ==
-                        fib->index)
-                      vec_add1 (sw_if_indices_to_shut, si->sw_if_index);
-                  }));
+    ({
+      if (im6->fib_index_by_sw_if_index[si->sw_if_index] ==
+          fib->index)
+        vec_add1 (sw_if_indices_to_shut, si->sw_if_index);
+    }));
 
     for (i = 0; i < vec_len (sw_if_indices_to_shut); i++) {
       sw_if_index = sw_if_indices_to_shut[i];
-      // vec_foreach (sw_if_index, sw_if_indices_to_shut) {
 
       u32 flags = vnet_sw_interface_get_flags (vnm, sw_if_index);
       flags &= ~(VNET_SW_INTERFACE_FLAG_ADMIN_UP);
@@ -1029,7 +1026,6 @@ ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp)
     }
 
     fib_table_flush(fib->index, FIB_PROTOCOL_IP6, FIB_SOURCE_API);
-    fib_table_flush(fib->index, FIB_PROTOCOL_IP6, FIB_SOURCE_INTERFACE);
 
     rv = 0;
     break;
diff --git a/src/vppinfra.am b/src/vppinfra.am
index 19485d2d..724f21c9 100644
--- a/src/vppinfra.am
+++ b/src/vppinfra.am
@@ -38,7 +38,7 @@ TESTS  +=  test_bihash_template \
 	   test_time \
 	   test_timing_wheel \
 	   test_vec \
-	   test_zvec 
+	   test_zvec
 endif
 
 noinst_PROGRAMS = $(TESTS)
diff --git a/src/vppinfra/dlist.h b/src/vppinfra/dlist.h
index 7d09b2bb..e445b39f 100644
--- a/src/vppinfra/dlist.h
+++ b/src/vppinfra/dlist.h
@@ -121,7 +121,7 @@ clib_dlist_remove_head (dlist_elt_t * pool, u32 head_index)
 
   ASSERT (head->value == ~0);
 
-  if (head->next == ~0)
+  if (head->next == ~0 || (head->next == head_index))
     return ~0;
 
   rv = head->next;
diff --git a/src/vppinfra/format.c b/src/vppinfra/format.c
index 78e52e9a..999b093c 100644
--- a/src/vppinfra/format.c
+++ b/src/vppinfra/format.c
@@ -150,13 +150,13 @@ justify (u8 * s, format_info_t * fi, uword s_len_orig)
   return s;
 }
 
-static u8 *
-do_percent (u8 ** _s, u8 * fmt, va_list * va)
+static const u8 *
+do_percent (u8 ** _s, const u8 * fmt, va_list * va)
 {
   u8 *s = *_s;
   uword c;
 
-  u8 *f = fmt;
+  const u8 *f = fmt;
 
   format_info_t fi = {
     .justify = '+',
@@ -385,7 +385,7 @@ done:
 u8 *
 va_format (u8 * s, const char *fmt, va_list * va)
 {
-  u8 *f = (u8 *) fmt, *g;
+  const u8 *f = (u8 *) fmt, *g;
   u8 c;
 
   g = f;
diff --git a/src/vppinfra/format.h b/src/vppinfra/format.h
index bc0d6d15..bec1b6b4 100644
--- a/src/vppinfra/format.h
+++ b/src/vppinfra/format.h
@@ -234,7 +234,7 @@ typedef uword (unformat_function_t) (unformat_input_t * input,
 /* External functions. */
 
 /* General unformatting function with programmable input stream. */
-uword unformat (unformat_input_t * i, char *fmt, ...);
+uword unformat (unformat_input_t * i, const char *fmt, ...);
 
 /* Call user defined parse function.
    unformat_user (i, f, ...) is equivalent to unformat (i, "%U", f, ...) */
@@ -242,7 +242,7 @@ uword unformat_user (unformat_input_t * input, unformat_function_t * func,
 		     ...);
 
 /* Alternate version which allows for extensions. */
-uword va_unformat (unformat_input_t * i, char *fmt, va_list * args);
+uword va_unformat (unformat_input_t * i, const char *fmt, va_list * args);
 
 /* Setup for unformat of Unix style command line. */
 void unformat_init_command_line (unformat_input_t * input, char *argv[]);
diff --git a/src/vppinfra/unformat.c b/src/vppinfra/unformat.c
index ac8b7ddc..7c636ccc 100644
--- a/src/vppinfra/unformat.c
+++ b/src/vppinfra/unformat.c
@@ -681,8 +681,8 @@ error:
   return 0;
 }
 
-static char *
-match_input_with_format (unformat_input_t * input, char *f)
+static const char *
+match_input_with_format (unformat_input_t * input, const char *f)
 {
   uword cf, ci;
 
@@ -703,8 +703,8 @@ match_input_with_format (unformat_input_t * input, char *f)
   return f;
 }
 
-static char *
-do_percent (unformat_input_t * input, va_list * va, char *f)
+static const char *
+do_percent (unformat_input_t * input, va_list * va, const char *f)
 {
   uword cf, n, data_bytes = ~0;
 
@@ -824,9 +824,9 @@ unformat_skip_white_space (unformat_input_t * input)
 }
 
 uword
-va_unformat (unformat_input_t * input, char *fmt, va_list * va)
+va_unformat (unformat_input_t * input, const char *fmt, va_list * va)
 {
-  char *f;
+  const char *f;
   uword input_matches_format;
   uword default_skip_input_white_space;
   uword n_input_white_space_skipped;
@@ -937,7 +937,7 @@ va_unformat (unformat_input_t * input, char *fmt, va_list * va)
 
       else
 	{
-	  char *g = match_input_with_format (input, f);
+	  const char *g = match_input_with_format (input, f);
 	  if (!g)
 	    goto parse_fail;
 	  last_non_white_space_match_format = g > f;
@@ -963,7 +963,7 @@ parse_fail:
 }
 
 uword
-unformat (unformat_input_t * input, char *fmt, ...)
+unformat (unformat_input_t * input, const char *fmt, ...)
 {
   va_list va;
   uword result;
diff --git a/test/test_dhcp.py b/test/test_dhcp.py
index bdff679c..04ab2e11 100644
--- a/test/test_dhcp.py
+++ b/test/test_dhcp.py
@@ -4,8 +4,6 @@ import unittest
 import socket
 
 from framework import VppTestCase, VppTestRunner
-from vpp_ip_route import IpRoute, RoutePath
-from vpp_lo_interface import VppLoInterface
 
 from scapy.layers.l2 import Ether, getmacbyip
 from scapy.layers.inet import IP, UDP, ICMP
@@ -482,17 +480,6 @@ class TestDHCP(VppTestCase):
         server_addr_vrf1 = self.pg1.remote_ip6n
         src_addr_vrf1 = self.pg1.local_ip6n
 
-        #
-        # Add the Route to receive the DHCP packets
-        #
-        route_dhcp_vrf0 = IpRoute(self, dhcp_solicit_dst, 128,
-                                  [], is_local=1, is_ip6=1)
-        route_dhcp_vrf0.add_vpp_config()
-        route_dhcp_vrf1 = IpRoute(self, dhcp_solicit_dst, 128,
-                                  [], is_local=1, is_ip6=1,
-                                  table_id=1)
-        route_dhcp_vrf1.add_vpp_config()
-
         dmac = in6_getnsmac(inet_pton(socket.AF_INET6, dhcp_solicit_dst))
         p_solicit_vrf0 = (Ether(dst=dmac, src=self.pg2.remote_mac) /
                           IPv6(src=dhcp_solicit_src_vrf0,
@@ -732,8 +719,5 @@ class TestDHCP(VppTestCase):
                                     is_ipv6=1,
                                     is_add=0)
 
-        route_dhcp_vrf0.remove_vpp_config()
-        route_dhcp_vrf1.remove_vpp_config()
-
 if __name__ == '__main__':
     unittest.main(testRunner=VppTestRunner)
diff --git a/test/test_ip6.py b/test/test_ip6.py
index ea669b70..e188970a 100644
--- a/test/test_ip6.py
+++ b/test/test_ip6.py
@@ -5,6 +5,7 @@ import socket
 
 from framework import VppTestCase, VppTestRunner
 from vpp_sub_interface import VppSubInterface, VppDot1QSubint
+from vpp_pg_interface import is_ipv6_misc
 
 from scapy.packet import Raw
 from scapy.layers.l2 import Ether, Dot1Q
@@ -12,10 +13,9 @@ from scapy.layers.inet6 import IPv6, UDP, ICMPv6ND_NS, ICMPv6ND_RS, \
     ICMPv6ND_RA, ICMPv6NDOptSrcLLAddr, getmacbyip6, ICMPv6MRD_Solicitation
 from util import ppp
 from scapy.utils6 import in6_getnsma, in6_getnsmac, in6_ptop, in6_islladdr, \
-    in6_mactoifaceid
+    in6_mactoifaceid, in6_ismaddr
 from scapy.utils import inet_pton, inet_ntop
 
-
 def mk_ll_addr(mac):
     euid = in6_mactoifaceid(mac)
     addr = "fe80::" + euid
@@ -287,28 +287,39 @@ class TestIPv6(VppTestCase):
         self.send_and_assert_no_replies(self.pg0, pkts,
                                         "No response to NS for unknown target")
 
-    def send_and_expect_ra(self, intf, pkts, remark, src_ip=None):
-        if not src_ip:
-            src_ip = intf.remote_ip6
-        intf.add_stream(pkts)
-        self.pg0.add_stream(pkts)
-        self.pg_enable_capture(self.pg_interfaces)
-        self.pg_start()
-        rx = intf.get_capture(1)
+    def validate_ra(self, intf, rx, dst_ip=None):
+        if not dst_ip:
+            dst_ip = intf.remote_ip6
 
-        self.assertEqual(len(rx), 1)
-        rx = rx[0]
+        # unicasted packets must come to the unicast mac 
+        self.assertEqual(rx[Ether].dst, intf.remote_mac)
+
+        # and from the router's MAC
+        self.assertEqual(rx[Ether].src, intf.local_mac)
 
         # the rx'd RA should be addressed to the sender's source
         self.assertTrue(rx.haslayer(ICMPv6ND_RA))
         self.assertEqual(in6_ptop(rx[IPv6].dst),
-                         in6_ptop(src_ip))
+                         in6_ptop(dst_ip))
 
         # and come from the router's link local
         self.assertTrue(in6_islladdr(rx[IPv6].src))
         self.assertEqual(in6_ptop(rx[IPv6].src),
                          in6_ptop(mk_ll_addr(intf.local_mac)))
 
+
+    def send_and_expect_ra(self, intf, pkts, remark, dst_ip=None,
+                           filter_out_fn=is_ipv6_misc):
+        intf.add_stream(pkts)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        rx = intf.get_capture(1, filter_out_fn=filter_out_fn)
+
+        self.assertEqual(len(rx), 1)
+        rx = rx[0]
+        self.validate_ra(intf, rx, dst_ip)
+
     def test_rs(self):
         """ IPv6 Router Solicitation Exceptions
 
@@ -319,6 +330,9 @@ class TestIPv6(VppTestCase):
         # Before we begin change the IPv6 RA responses to use the unicast
         # address - that way we will not confuse them with the periodic
         # RAs which go to the mcast address
+        # Sit and wait for the first periodic RA.
+        #
+        # TODO
         #
         self.pg0.ip6_ra_config(send_unicast=1)
 
@@ -365,8 +379,23 @@ class TestIPv6(VppTestCase):
              IPv6(dst=self.pg0.local_ip6, src=ll) /
              ICMPv6ND_RS())
         pkts = [p]
-        self.send_and_expect_ra(
-            self.pg0, pkts, "RS sourced from link-local", src_ip=ll)
+        self.send_and_expect_ra(self.pg0, pkts,
+                                "RS sourced from link-local",
+                                dst_ip=ll)
+
+        #
+        # Send the RS multicast
+        #
+        self.pg0.ip6_ra_config(send_unicast=1)
+        dmac = in6_getnsmac(inet_pton(socket.AF_INET6, "ff02::2"))
+        ll = mk_ll_addr(self.pg0.remote_mac)
+        p = (Ether(dst=dmac, src=self.pg0.remote_mac) /
+             IPv6(dst="ff02::2", src=ll) /
+             ICMPv6ND_RS())
+        pkts = [p]
+        self.send_and_expect_ra(self.pg0, pkts,
+                                "RS sourced from link-local",
+                                dst_ip=ll)
 
         #
         # Source from the unspecified address ::. This happens when the RS
@@ -376,74 +405,20 @@ class TestIPv6(VppTestCase):
         # If we happen to pick up the periodic RA at this point then so be it,
         # it's not an error.
         #
-        self.pg0.ip6_ra_config(send_unicast=1)
-        p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
-             IPv6(dst=self.pg0.local_ip6, src="::") /
+        self.pg0.ip6_ra_config(send_unicast=1, suppress=1)
+        p = (Ether(dst=dmac, src=self.pg0.remote_mac) /
+             IPv6(dst="ff02::2", src="::") /
              ICMPv6ND_RS())
         pkts = [p]
-
-        self.pg0.add_stream(pkts)
-        self.pg0.add_stream(pkts)
-        self.pg_enable_capture(self.pg_interfaces)
-        self.pg_start()
-        capture = self.pg0.get_capture(1, filter_out_fn=None)
-        found = 0
-        for rx in capture:
-            if (rx.haslayer(ICMPv6ND_RA)):
-                # and come from the router's link local
-                self.assertTrue(in6_islladdr(rx[IPv6].src))
-                self.assertEqual(in6_ptop(rx[IPv6].src),
-                                 in6_ptop(mk_ll_addr(self.pg0.local_mac)))
-                # sent to the all hosts mcast
-                self.assertEqual(in6_ptop(rx[IPv6].dst), "ff02::1")
-
-                found = 1
-        self.assertTrue(found)
-
-    @unittest.skip("Unsupported")
-    def test_mrs(self):
-        """ IPv6 Multicast Router Solicitation Exceptions
-
-        Test scenario:
-        """
-
-        #
-        # An RS from a link source address
-        #  - expect an RA in return
-        #
-        nsma = in6_getnsma(inet_pton(socket.AF_INET6, self.pg0.local_ip6))
-        d = inet_ntop(socket.AF_INET6, nsma)
-
-        p = (Ether(dst=getmacbyip6("ff02::2")) /
-             IPv6(dst=d, src=self.pg0.remote_ip6) /
-             ICMPv6MRD_Solicitation())
-        pkts = [p]
-
-        self.pg0.add_stream(pkts)
-        self.pg_enable_capture(self.pg_interfaces)
-        self.pg_start()
-        self.pg0.assert_nothing_captured(
-            remark="No response to NS source by address not on sub-net")
+        self.send_and_expect_ra(self.pg0, pkts,
+                                "RS sourced from unspecified",
+                                dst_ip="ff02::1",
+                                filter_out_fn=None)
 
         #
-        # An RS from a non link source address
+        # Reset the periodic advertisements back to default values 
         #
-        nsma = in6_getnsma(inet_pton(socket.AF_INET6, self.pg0.local_ip6))
-        d = inet_ntop(socket.AF_INET6, nsma)
-
-        p = (Ether(dst=getmacbyip6("ff02::2")) /
-             IPv6(dst=d, src="2002::2") /
-             ICMPv6MRD_Solicitation())
-        pkts = [p]
-
-        self.send_and_assert_no_replies(self.pg0, pkts,
-                                        "RA rate limited")
-        self.pg0.add_stream(pkts)
-        self.pg_enable_capture(self.pg_interfaces)
-        self.pg_start()
-        self.pg0.assert_nothing_captured(
-            remark="No response to NS source by address not on sub-net")
-
+        self.pg0.ip6_ra_config(no=1, suppress=1, send_unicast=0)
 
 if __name__ == '__main__':
     unittest.main(testRunner=VppTestRunner)
diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py
new file mode 100644
index 00000000..028853d2
--- /dev/null
+++ b/test/test_ip_mcast.py
@@ -0,0 +1,612 @@
+#!/usr/bin/env python
+
+import unittest
+
+from framework import VppTestCase, VppTestRunner
+from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint
+from vpp_ip_route import IpMRoute, MRoutePath, MFibSignal
+
+from scapy.packet import Raw
+from scapy.layers.l2 import Ether
+from scapy.layers.inet import IP, UDP, getmacbyip
+from scapy.layers.inet6 import IPv6, getmacbyip6
+from util import ppp
+
+
+class MRouteItfFlags:
+    MFIB_ITF_FLAG_NONE = 0
+    MFIB_ITF_FLAG_NEGATE_SIGNAL = 1
+    MFIB_ITF_FLAG_ACCEPT = 2
+    MFIB_ITF_FLAG_FORWARD = 4
+    MFIB_ITF_FLAG_SIGNAL_PRESENT = 8
+    MFIB_ITF_FLAG_INTERNAL_COPY = 16
+
+
+class MRouteEntryFlags:
+    MFIB_ENTRY_FLAG_NONE = 0
+    MFIB_ENTRY_FLAG_SIGNAL = 1
+    MFIB_ENTRY_FLAG_DROP = 2
+    MFIB_ENTRY_FLAG_CONNECTED = 4
+    MFIB_ENTRY_FLAG_INHERIT_ACCEPT = 8
+
+
+class TestIPMcast(VppTestCase):
+    """ IP Multicast Test Case """
+
+    def setUp(self):
+        super(TestIPMcast, self).setUp()
+
+        # create 4 pg interfaces
+        self.create_pg_interfaces(range(4))
+
+        # setup interfaces
+        for i in self.pg_interfaces:
+            i.admin_up()
+            i.config_ip4()
+            i.config_ip6()
+            i.resolve_arp()
+            i.resolve_ndp()
+
+    def create_stream_ip4(self, src_if, src_ip, dst_ip):
+        pkts = []
+        for i in range(0, 65):
+            info = self.create_packet_info(src_if, src_if)
+            payload = self.info_to_payload(info)
+            p = (Ether(dst=src_if.local_mac, src=src_if.remote_mac) /
+                 IP(src=src_ip, dst=dst_ip) /
+                 UDP(sport=1234, dport=1234) /
+                 Raw(payload))
+            info.data = p.copy()
+            pkts.append(p)
+        return pkts
+
+    def create_stream_ip6(self, src_if, src_ip, dst_ip):
+        pkts = []
+        for i in range(0, 65):
+            info = self.create_packet_info(src_if, src_if)
+            payload = self.info_to_payload(info)
+            p = (Ether(dst=src_if.local_mac, src=src_if.remote_mac) /
+                 IPv6(src=src_ip, dst=dst_ip) /
+                 UDP(sport=1234, dport=1234) /
+                 Raw(payload))
+            info.data = p.copy()
+            pkts.append(p)
+        return pkts
+
+    def verify_filter(self, capture, sent):
+        if not len(capture) == len(sent):
+            # filter out any IPv6 RAs from the captur
+            for p in capture:
+                if (p.haslayer(IPv6)):
+                    capture.remove(p)
+        return capture
+
+    def verify_capture_ip4(self, src_if, sent):
+        rxd = self.pg1.get_capture(65)
+
+        try:
+            capture = self.verify_filter(rxd, sent)
+
+            self.assertEqual(len(capture), len(sent))
+
+            for i in range(len(capture)):
+                tx = sent[i]
+                rx = capture[i]
+
+                # the rx'd packet has the MPLS label popped
+                eth = rx[Ether]
+                self.assertEqual(eth.type, 0x800)
+
+                tx_ip = tx[IP]
+                rx_ip = rx[IP]
+
+                # check the MAC address on the RX'd packet is correctly formed
+                self.assertEqual(eth.dst, getmacbyip(rx_ip.dst))
+
+                self.assertEqual(rx_ip.src, tx_ip.src)
+                self.assertEqual(rx_ip.dst, tx_ip.dst)
+                # IP processing post pop has decremented the TTL
+                self.assertEqual(rx_ip.ttl + 1, tx_ip.ttl)
+
+        except:
+            raise
+
+    def verify_capture_ip6(self, src_if, sent):
+        capture = self.pg1.get_capture(65)
+
+        self.assertEqual(len(capture), len(sent))
+
+        for i in range(len(capture)):
+            tx = sent[i]
+            rx = capture[i]
+
+            # the rx'd packet has the MPLS label popped
+            eth = rx[Ether]
+            self.assertEqual(eth.type, 0x86DD)
+
+            tx_ip = tx[IPv6]
+            rx_ip = rx[IPv6]
+
+            # check the MAC address on the RX'd packet is correctly formed
+            self.assertEqual(eth.dst, getmacbyip6(rx_ip.dst))
+
+            self.assertEqual(rx_ip.src, tx_ip.src)
+            self.assertEqual(rx_ip.dst, tx_ip.dst)
+            # IP processing post pop has decremented the TTL
+            self.assertEqual(rx_ip.hlim + 1, tx_ip.hlim)
+
+    def test_ip_mcast(self):
+        """ IP Multicast Replication """
+
+        #
+        # a stream that matches the default route. gets dropped.
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip4(self.pg0, "1.1.1.1", "232.1.1.1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        self.pg0.assert_nothing_captured(
+            remark="IP multicast packets forwarded on default route")
+
+        #
+        # A (*,G).
+        # one accepting interface, pg0, 3 forwarding interfaces
+        #
+        route_232_1_1_1 = IpMRoute(
+            self,
+            "0.0.0.0",
+            "232.1.1.1", 32,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [MRoutePath(self.pg0.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             MRoutePath(self.pg1.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD),
+             MRoutePath(self.pg2.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD),
+             MRoutePath(self.pg3.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)])
+        route_232_1_1_1.add_vpp_config()
+
+        #
+        # An (S,G).
+        # one accepting interface, pg0, 2 forwarding interfaces
+        #
+        route_1_1_1_1_232_1_1_1 = IpMRoute(
+            self,
+            "1.1.1.1",
+            "232.1.1.1", 64,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [MRoutePath(self.pg0.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             MRoutePath(self.pg1.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD),
+             MRoutePath(self.pg2.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)])
+        route_1_1_1_1_232_1_1_1.add_vpp_config()
+
+        #
+        # An (*,G/m).
+        # one accepting interface, pg0, 1 forwarding interfaces
+        #
+        route_232 = IpMRoute(
+            self,
+            "0.0.0.0",
+            "232.0.0.0", 8,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [MRoutePath(self.pg0.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             MRoutePath(self.pg1.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)])
+        route_232.add_vpp_config()
+
+        #
+        # a stream that matches the route for (1.1.1.1,232.1.1.1)
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip4(self.pg0, "1.1.1.1", "232.1.1.1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # We expect replications on Pg1, 2,
+        self.verify_capture_ip4(self.pg1, tx)
+        self.verify_capture_ip4(self.pg2, tx)
+
+        # no replications on Pg0
+        self.pg0.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG0")
+        self.pg3.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG3")
+
+        #
+        # a stream that matches the route for (*,232.0.0.0/8)
+        # Send packets with the 9th bit set so we test the correct clearing
+        # of that bit in the mac rewrite
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip4(self.pg0, "1.1.1.1", "232.255.255.255")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # We expect replications on Pg1 only
+        self.verify_capture_ip4(self.pg1, tx)
+
+        # no replications on Pg0, Pg2 not Pg3
+        self.pg0.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG0")
+        self.pg2.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG2")
+        self.pg3.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG3")
+
+        #
+        # a stream that matches the route for (*,232.1.1.1)
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip4(self.pg0, "1.1.1.2", "232.1.1.1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # We expect replications on Pg1, 2, 3.
+        self.verify_capture_ip4(self.pg1, tx)
+        self.verify_capture_ip4(self.pg2, tx)
+        self.verify_capture_ip4(self.pg3, tx)
+
+        # no replications on Pg0
+        self.pg0.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG0")
+
+        route_232_1_1_1.remove_vpp_config()
+        route_1_1_1_1_232_1_1_1.remove_vpp_config()
+        route_232.remove_vpp_config()
+
+    def test_ip6_mcast(self):
+        """ IPv6 Multicast Replication """
+
+        #
+        # a stream that matches the default route. gets dropped.
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip6(self.pg0, "2001::1", "ff01::1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        self.pg0.assert_nothing_captured(
+            remark="IPv6 multicast packets forwarded on default route")
+
+        #
+        # A (*,G).
+        # one accepting interface, pg0, 3 forwarding interfaces
+        #
+        route_ff01_1 = IpMRoute(
+            self,
+            "::",
+            "ff01::1", 128,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [MRoutePath(self.pg0.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             MRoutePath(self.pg1.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD),
+             MRoutePath(self.pg2.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD),
+             MRoutePath(self.pg3.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)],
+            is_ip6=1)
+        route_ff01_1.add_vpp_config()
+
+        #
+        # An (S,G).
+        # one accepting interface, pg0, 2 forwarding interfaces
+        #
+        route_2001_ff01_1 = IpMRoute(
+            self,
+            "2001::1",
+            "ff01::1", 256,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [MRoutePath(self.pg0.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             MRoutePath(self.pg1.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD),
+             MRoutePath(self.pg2.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)],
+            is_ip6=1)
+        route_2001_ff01_1.add_vpp_config()
+
+        #
+        # An (*,G/m).
+        # one accepting interface, pg0, 1 forwarding interface
+        #
+        route_ff01 = IpMRoute(
+            self,
+            "::",
+            "ff01::", 16,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [MRoutePath(self.pg0.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             MRoutePath(self.pg1.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)],
+            is_ip6=1)
+        route_ff01.add_vpp_config()
+
+        #
+        # a stream that matches the route for (*, ff01::/16)
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip6(self.pg0, "2002::1", "ff01:2::255")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # We expect replications on Pg1
+        self.verify_capture_ip6(self.pg1, tx)
+
+        # no replications on Pg0, Pg3
+        self.pg0.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG0")
+        self.pg2.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG2")
+        self.pg3.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG3")
+
+        #
+        # a stream that matches the route for (*,ff01::1)
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip6(self.pg0, "2002::2", "ff01::1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # We expect replications on Pg1, 2, 3.
+        self.verify_capture_ip6(self.pg1, tx)
+        self.verify_capture_ip6(self.pg2, tx)
+        self.verify_capture_ip6(self.pg3, tx)
+
+        # no replications on Pg0
+        self.pg0.assert_nothing_captured(
+            remark="IPv6 multicast packets forwarded on PG0")
+
+        #
+        # a stream that matches the route for (2001::1, ff00::1)
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip6(self.pg0, "2001::1", "ff01::1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # We expect replications on Pg1, 2,
+        self.verify_capture_ip6(self.pg1, tx)
+        self.verify_capture_ip6(self.pg2, tx)
+
+        # no replications on Pg0, Pg3
+        self.pg0.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG0")
+        self.pg3.assert_nothing_captured(
+            remark="IP multicast packets forwarded on PG3")
+
+        route_ff01.remove_vpp_config()
+        route_ff01_1.remove_vpp_config()
+        route_2001_ff01_1.remove_vpp_config()
+
+    def _mcast_connected_send_stream(self, dst_ip):
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip4(self.pg0,
+                                    self.pg0.remote_ip4,
+                                    dst_ip)
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # We expect replications on Pg1.
+        self.verify_capture_ip4(self.pg1, tx)
+
+        return tx
+
+    def test_ip_mcast_connected(self):
+        """ IP Multicast Connected Source check """
+
+        #
+        # A (*,G).
+        # one accepting interface, pg0, 1 forwarding interfaces
+        #
+        route_232_1_1_1 = IpMRoute(
+            self,
+            "0.0.0.0",
+            "232.1.1.1", 32,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [MRoutePath(self.pg0.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             MRoutePath(self.pg1.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)])
+
+        route_232_1_1_1.add_vpp_config()
+        route_232_1_1_1.update_entry_flags(
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_CONNECTED)
+
+        #
+        # Now the (*,G) is present, send from connected source
+        #
+        tx = self._mcast_connected_send_stream("232.1.1.1")
+
+        #
+        # Constrct a representation of the signal we expect on pg0
+        #
+        signal_232_1_1_1_itf_0 = MFibSignal(self,
+                                            route_232_1_1_1,
+                                            self.pg0.sw_if_index,
+                                            tx[0])
+
+        #
+        # read the only expected signal
+        #
+        signals = self.vapi.mfib_signal_dump()
+
+        self.assertEqual(1, len(signals))
+
+        signal_232_1_1_1_itf_0.compare(signals[0])
+
+        #
+        # reading the signal allows for the generation of another
+        # so send more packets and expect the next signal
+        #
+        tx = self._mcast_connected_send_stream("232.1.1.1")
+
+        signals = self.vapi.mfib_signal_dump()
+        self.assertEqual(1, len(signals))
+        signal_232_1_1_1_itf_0.compare(signals[0])
+
+        #
+        # A Second entry with connected check
+        # one accepting interface, pg0, 1 forwarding interfaces
+        #
+        route_232_1_1_2 = IpMRoute(
+            self,
+            "0.0.0.0",
+            "232.1.1.2", 32,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [MRoutePath(self.pg0.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             MRoutePath(self.pg1.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)])
+
+        route_232_1_1_2.add_vpp_config()
+        route_232_1_1_2.update_entry_flags(
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_CONNECTED)
+
+        #
+        # Send traffic to both entries. One read should net us two signals
+        #
+        signal_232_1_1_2_itf_0 = MFibSignal(self,
+                                            route_232_1_1_2,
+                                            self.pg0.sw_if_index,
+                                            tx[0])
+        tx = self._mcast_connected_send_stream("232.1.1.1")
+        tx2 = self._mcast_connected_send_stream("232.1.1.2")
+
+        #
+        # read the only expected signal
+        #
+        signals = self.vapi.mfib_signal_dump()
+
+        self.assertEqual(2, len(signals))
+
+        signal_232_1_1_1_itf_0.compare(signals[1])
+        signal_232_1_1_2_itf_0.compare(signals[0])
+
+        route_232_1_1_1.remove_vpp_config()
+        route_232_1_1_2.remove_vpp_config()
+
+    def test_ip_mcast_signal(self):
+        """ IP Multicast Signal """
+
+        #
+        # A (*,G).
+        # one accepting interface, pg0, 1 forwarding interfaces
+        #
+        route_232_1_1_1 = IpMRoute(
+            self,
+            "0.0.0.0",
+            "232.1.1.1", 32,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [MRoutePath(self.pg0.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             MRoutePath(self.pg1.sw_if_index,
+                        MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)])
+
+        route_232_1_1_1.add_vpp_config()
+        route_232_1_1_1.update_entry_flags(
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_SIGNAL)
+
+        #
+        # Now the (*,G) is present, send from connected source
+        #
+        tx = self._mcast_connected_send_stream("232.1.1.1")
+
+        #
+        # Constrct a representation of the signal we expect on pg0
+        #
+        signal_232_1_1_1_itf_0 = MFibSignal(self,
+                                            route_232_1_1_1,
+                                            self.pg0.sw_if_index,
+                                            tx[0])
+
+        #
+        # read the only expected signal
+        #
+        signals = self.vapi.mfib_signal_dump()
+
+        self.assertEqual(1, len(signals))
+
+        signal_232_1_1_1_itf_0.compare(signals[0])
+
+        #
+        # reading the signal allows for the generation of another
+        # so send more packets and expect the next signal
+        #
+        tx = self._mcast_connected_send_stream("232.1.1.1")
+
+        signals = self.vapi.mfib_signal_dump()
+        self.assertEqual(1, len(signals))
+        signal_232_1_1_1_itf_0.compare(signals[0])
+
+        #
+        # Set the negate-signal on the accepting interval - the signals
+        # should stop
+        #
+        route_232_1_1_1.update_path_flags(
+            self.pg0.sw_if_index,
+            (MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT |
+             MRouteItfFlags.MFIB_ITF_FLAG_NEGATE_SIGNAL))
+
+        tx = self._mcast_connected_send_stream("232.1.1.1")
+
+        signals = self.vapi.mfib_signal_dump()
+        self.assertEqual(0, len(signals))
+
+        #
+        # Clear the SIGNAL flag on the entry and the signals should
+        # come back since the interface is still NEGATE-SIGNAL
+        #
+        route_232_1_1_1.update_entry_flags(
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE)
+
+        tx = self._mcast_connected_send_stream("232.1.1.1")
+
+        signals = self.vapi.mfib_signal_dump()
+        self.assertEqual(1, len(signals))
+        signal_232_1_1_1_itf_0.compare(signals[0])
+
+        #
+        # Lastly remove the NEGATE-SIGNAL from the interface and the
+        # signals should stop
+        #
+        route_232_1_1_1.update_path_flags(self.pg0.sw_if_index,
+                                          MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT)
+
+        tx = self._mcast_connected_send_stream("232.1.1.1")
+        signals = self.vapi.mfib_signal_dump()
+        self.assertEqual(0, len(signals))
+
+        #
+        # Cleanup
+        #
+        route_232_1_1_1.remove_vpp_config()
+
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)
diff --git a/test/test_mfib.py b/test/test_mfib.py
new file mode 100644
index 00000000..4d0d2a3f
--- /dev/null
+++ b/test/test_mfib.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+import unittest
+
+from framework import VppTestCase, VppTestRunner
+
+
+class TestMFIB(VppTestCase):
+    """ MFIB Test Case """
+
+    def setUp(self):
+        super(TestMFIB, self).setUp()
+
+    def test_mfib(self):
+        """ MFIB Unit Tests """
+        error = self.vapi.cli("test mfib")
+
+        if error:
+            self.logger.critical(error)
+        self.assertEqual(error.find("Failed"), -1)
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)
diff --git a/test/vpp_interface.py b/test/vpp_interface.py
index ee4a9ef6..29edb70e 100644
--- a/test/vpp_interface.py
+++ b/test/vpp_interface.py
@@ -260,9 +260,10 @@ class VppInterface(object):
         """Configure IPv6 RA suppress on the VPP interface."""
         self.test.vapi.sw_interface_ra_suppress(self.sw_if_index)
 
-    def ip6_ra_config(self, suppress=0, send_unicast=0):
+    def ip6_ra_config(self, no=0, suppress=0, send_unicast=0):
         """Configure IPv6 RA suppress on the VPP interface."""
         self.test.vapi.ip6_sw_interface_ra_config(self.sw_if_index,
+                                                  no,
                                                   suppress,
                                                   send_unicast)
 
diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py
index fc9133fb..1804fbbd 100644
--- a/test/vpp_ip_route.py
+++ b/test/vpp_ip_route.py
@@ -11,7 +11,7 @@ MPLS_IETF_MAX_LABEL = 0xfffff
 MPLS_LABEL_INVALID = MPLS_IETF_MAX_LABEL + 1
 
 
-class RoutePath:
+class RoutePath(object):
 
     def __init__(
             self,
@@ -31,6 +31,14 @@ class RoutePath:
             self.nh_addr = socket.inet_pton(socket.AF_INET, nh_addr)
 
 
+class MRoutePath(RoutePath):
+
+    def __init__(self, nh_sw_if_index, flags):
+        super(MRoutePath, self).__init__("0.0.0.0",
+                                         nh_sw_if_index)
+        self.nh_i_flags = flags
+
+
 class IpRoute:
     """
     IP Route
@@ -94,6 +102,97 @@ class IpRoute:
                                                  is_add=0)
 
 
+class IpMRoute:
+    """
+    IP Multicast Route
+    """
+
+    def __init__(self, test, src_addr, grp_addr,
+                 grp_addr_len, e_flags, paths, table_id=0, is_ip6=0):
+        self._test = test
+        self.paths = paths
+        self.grp_addr_len = grp_addr_len
+        self.table_id = table_id
+        self.e_flags = e_flags
+        self.is_ip6 = is_ip6
+
+        if is_ip6:
+            self.grp_addr = socket.inet_pton(socket.AF_INET6, grp_addr)
+            self.src_addr = socket.inet_pton(socket.AF_INET6, src_addr)
+        else:
+            self.grp_addr = socket.inet_pton(socket.AF_INET, grp_addr)
+            self.src_addr = socket.inet_pton(socket.AF_INET, src_addr)
+
+    def add_vpp_config(self):
+        for path in self.paths:
+            self._test.vapi.ip_mroute_add_del(self.src_addr,
+                                              self.grp_addr,
+                                              self.grp_addr_len,
+                                              self.e_flags,
+                                              path.nh_itf,
+                                              path.nh_i_flags,
+                                              table_id=self.table_id,
+                                              is_ipv6=self.is_ip6)
+
+    def remove_vpp_config(self):
+        for path in self.paths:
+            self._test.vapi.ip_mroute_add_del(self.src_addr,
+                                              self.grp_addr,
+                                              self.grp_addr_len,
+                                              self.e_flags,
+                                              path.nh_itf,
+                                              path.nh_i_flags,
+                                              table_id=self.table_id,
+                                              is_add=0,
+                                              is_ipv6=self.is_ip6)
+
+    def update_entry_flags(self, flags):
+        self.e_flags = flags
+        self._test.vapi.ip_mroute_add_del(self.src_addr,
+                                          self.grp_addr,
+                                          self.grp_addr_len,
+                                          self.e_flags,
+                                          0xffffffff,
+                                          0,
+                                          table_id=self.table_id,
+                                          is_ipv6=self.is_ip6)
+
+    def update_path_flags(self, itf, flags):
+        for path in self.paths:
+            if path.nh_itf == itf:
+                path.nh_i_flags = flags
+                break
+        self._test.vapi.ip_mroute_add_del(self.src_addr,
+                                          self.grp_addr,
+                                          self.grp_addr_len,
+                                          self.e_flags,
+                                          path.nh_itf,
+                                          path.nh_i_flags,
+                                          table_id=self.table_id,
+                                          is_ipv6=self.is_ip6)
+
+
+class MFibSignal:
+    def __init__(self, test, route, interface, packet):
+        self.route = route
+        self.interface = interface
+        self.packet = packet
+        self.test = test
+
+    def compare(self, signal):
+        self.test.assertEqual(self.interface, signal.sw_if_index)
+        self.test.assertEqual(self.route.table_id, signal.table_id)
+        self.test.assertEqual(self.route.grp_addr_len,
+                              signal.grp_address_len)
+        for i in range(self.route.grp_addr_len / 8):
+            self.test.assertEqual(self.route.grp_addr[i],
+                                  signal.grp_address[i])
+        if (self.route.grp_addr_len > 32):
+            for i in range(4):
+                self.test.assertEqual(self.route.src_addr[i],
+                                      signal.src_address[i])
+
+
 class MplsIpBind:
     """
     MPLS to IP Binding
diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py
index 1b2895e9..90c954dc 100644
--- a/test/vpp_papi_provider.py
+++ b/test/vpp_papi_provider.py
@@ -252,10 +252,12 @@ class VppPapiProvider(object):
                         {'sw_if_index': sw_if_index})
 
     def ip6_sw_interface_ra_config(self, sw_if_index,
+                                   no,
                                    suppress,
-                                   send_unicast,):
+                                   send_unicast):
         return self.api(self.papi.sw_interface_ip6nd_ra_config,
                         {'sw_if_index': sw_if_index,
+                         'is_no': no,
                          'suppress': suppress,
                          'send_unicast': send_unicast})
 
@@ -1178,3 +1180,33 @@ class VppPapiProvider(object):
                 'is_add': is_add,
                 'oui': oui,
             })
+
+    def ip_mroute_add_del(self,
+                          src_address,
+                          grp_address,
+                          grp_address_length,
+                          e_flags,
+                          next_hop_sw_if_index,
+                          i_flags,
+                          table_id=0,
+                          create_vrf_if_needed=0,
+                          is_add=1,
+                          is_ipv6=0,
+                          is_local=0):
+        """
+        """
+        return self.api(
+            self.papi.ip_mroute_add_del,
+            {'next_hop_sw_if_index': next_hop_sw_if_index,
+             'entry_flags': e_flags,
+             'itf_flags': i_flags,
+             'create_vrf_if_needed': create_vrf_if_needed,
+             'is_add': is_add,
+             'is_ipv6': is_ipv6,
+             'is_local': is_local,
+             'grp_address_length': grp_address_length,
+             'grp_address': grp_address,
+             'src_address': src_address})
+
+    def mfib_signal_dump(self):
+        return self.api(self.papi.mfib_signal_dump, {})
-- 
cgit 1.2.3-korg


From a9374df5f351d25e968f5f90a827796203cbafdd Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Thu, 2 Feb 2017 02:18:18 -0800
Subject: Fix SR multicast post mfib commit  1 - use the SR policy to construct
 the replicate DPO. Each bucket therein is a SR tunnel.  2 - install a special
 mfib entry that links via this replicate  3 - forwarding is now mfib-lookup
 -> replicate -> sr_rewrite (per-tunnel)         no need for a separate
 sr_replicate node.  4 - Stack the sr tunnel on the forwarding DPO of the
 first-hop FIB entry.         no need for a second lookup post SR encap.  5 -
 fix some path-list lock leaks in the MFIB entry.

Change-Id: I20de96ea4c4be4fae252625bde159d9c435c8315
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/scripts/vnet/sr/mcast    |  58 +++++
 src/vnet.am                  |   1 -
 src/vnet/dpo/replicate_dpo.c |  14 +-
 src/vnet/mfib/mfib_entry.c   |  90 ++++++--
 src/vnet/mfib/mfib_entry.h   |   5 +-
 src/vnet/mfib/mfib_table.c   |  35 ++-
 src/vnet/mfib/mfib_table.h   |  32 +++
 src/vnet/mfib/mfib_test.c    |  56 ++++-
 src/vnet/mfib/mfib_types.h   |  14 +-
 src/vnet/sr/sr.c             | 327 ++++++++++++++++++----------
 src/vnet/sr/sr.h             |  30 ++-
 src/vnet/sr/sr_api.c         |   5 -
 src/vnet/sr/sr_replicate.c   | 491 -------------------------------------------
 13 files changed, 508 insertions(+), 650 deletions(-)
 create mode 100644 src/scripts/vnet/sr/mcast
 delete mode 100644 src/vnet/sr/sr_replicate.c

(limited to 'src/vnet/dpo')

diff --git a/src/scripts/vnet/sr/mcast b/src/scripts/vnet/sr/mcast
new file mode 100644
index 00000000..50e73efa
--- /dev/null
+++ b/src/scripts/vnet/sr/mcast
@@ -0,0 +1,58 @@
+
+loop create
+loop create
+loop create
+loop create
+
+set int state loop0 up
+set int state loop1 up
+set int state loop2 up
+set int state loop3 up
+
+set int ip address loop0 2001::1/64
+set int ip address loop1 2001:1::1/64
+set int ip address loop2 2001:2::1/64
+set int ip address loop3 2001:3::1/64
+
+set ip6 neighbor loop1 2001:1::2 00:00:dd:ee:cc:d1
+set ip6 neighbor loop2 2001:2::2 00:00:dd:ee:cc:d2
+set ip6 neighbor loop3 2001:3::2 00:00:dd:ee:cc:d3
+
+ip route 3001::1/128 via 2001:1::2 loop1
+ip route 3001::2/128 via 2001:2::2 loop2
+ip route 3001::3/128 via 2001:3::2 loop3
+
+sr tunnel name SR1 src aaaa::2:1 dst ff19::1/128 next 3001::1 clean
+sr tunnel name SR2 src aaaa::2:2 dst ff19::2/128 next 3001::2 clean
+sr tunnel name SR3 src aaaa::2:3 dst ff19::3/128 next 3001::3 clean
+
+sr policy name MCAST1 tunnel SR1 tunnel SR2 tunnel SR3
+
+sr multicast-map address ff18::1 sr-policy MCAST1
+
+packet-generator new {
+  name x
+  limit 1
+  node ethernet-input
+  size 64-64
+  no-recycle
+  data {
+    IP6: 1.2.3 -> 4.5.6
+    ICMP: 3002::2 -> ff18::1
+    ICMP echo_request
+    incrementing 100
+  }
+}
+trace add pg-input 100
+
+sr multicast-map del address ff18::1 sr-policy MCAST1
+sr policy del name MCAST1 tunnel SR1 tunnel SR2 tunnel SR3
+
+ip route del 3001::1/128 via 2001:1::2 loop1
+ip route del 3001::2/128 via 2001:2::2 loop2
+ip route del 3001::3/128 via 2001:3::2 loop3
+
+sr tunnel del name SR1 src aaaa::2:1 dst ff19::1/128 next 3001::1 clean
+sr tunnel del name SR2 src aaaa::2:2 dst ff19::2/128 next 3001::2 clean
+sr tunnel del name SR3 src aaaa::2:3 dst ff19::3/128 next 3001::3 clean
+
diff --git a/src/vnet.am b/src/vnet.am
index 78d864dc..9b148f69 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -694,7 +694,6 @@ nobase_include_HEADERS +=			\
 if WITH_LIBSSL
 libvnet_la_SOURCES +=				\
  vnet/sr/sr.c					\
- vnet/sr/sr_replicate.c				\
  vnet/sr/sr_api.c
 endif
 
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index a2d5fdb6..8bad75ee 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -601,8 +601,7 @@ VLIB_CLI_COMMAND (replicate_show_command, static) = {
 typedef struct replicate_trace_t_
 {
     index_t rep_index;
-    index_t dpo_index;
-    dpo_type_t dpo_type;
+    dpo_id_t dpo;
 } replicate_trace_t;
 
 static uword
@@ -656,8 +655,7 @@ replicate_inline (vlib_main_t * vm,
             {
                 replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
                 t->rep_index = repi0;
-                t->dpo_index = dpo0->dpoi_index;
-                t->dpo_type = dpo0->dpoi_type;
+                t->dpo = *dpo0;
             }
             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
                                              to_next, n_left_to_next,
@@ -682,8 +680,7 @@ replicate_inline (vlib_main_t * vm,
                 {
                     replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
                     t->rep_index = repi0;
-                    t->dpo_index = dpo0->dpoi_index;
-                    t->dpo_type = dpo0->dpoi_type;
+                    t->dpo = *dpo0;
                 }
 
                 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
@@ -705,10 +702,9 @@ format_replicate_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   replicate_trace_t *t = va_arg (*args, replicate_trace_t *);
 
-  s = format (s, "replicate: %d via %U:%d",
+  s = format (s, "replicate: %d via %U",
               t->rep_index,
-              format_dpo_type, t->dpo_type,
-              t->dpo_index);
+              format_dpo_id, &t->dpo);
   return s;
 }
 
diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c
index 479ce5f1..5170080c 100644
--- a/src/vnet/mfib/mfib_entry.c
+++ b/src/vnet/mfib/mfib_entry.c
@@ -292,6 +292,7 @@ mfib_entry_src_flush (mfib_entry_src_t *msrc)
     ({
         mfib_itf_delete(mfib_itf_get(mfii));
     }));
+    fib_path_list_unlock(msrc->mfes_pl);
 }
 
 static void
@@ -474,37 +475,60 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index,
 static void
 mfib_entry_stack (mfib_entry_t *mfib_entry)
 {
-    mfib_entry_collect_forwarding_ctx_t ctx = {
-        .next_hops = NULL,
-        .fct = mfib_entry_get_default_chain_type(mfib_entry),
-    };
     dpo_proto_t dp;
 
     dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry));
 
     if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent)
     {
+        mfib_entry_collect_forwarding_ctx_t ctx = {
+            .next_hops = NULL,
+            .fct = mfib_entry_get_default_chain_type(mfib_entry),
+        };
+
         fib_path_list_walk(mfib_entry->mfe_parent,
                            mfib_entry_src_collect_forwarding,
                            &ctx);
 
-        if (!dpo_id_is_valid(&mfib_entry->mfe_rep) ||
-            dpo_is_drop(&mfib_entry->mfe_rep))
+        if (!(MFIB_ENTRY_FLAG_EXCLUSIVE & mfib_entry->mfe_flags))
         {
-            dpo_id_t tmp_dpo = DPO_INVALID;
+            /*
+             * each path contirbutes a next-hop. form a replicate
+             * from those choices.
+             */
+            if (!dpo_id_is_valid(&mfib_entry->mfe_rep) ||
+                dpo_is_drop(&mfib_entry->mfe_rep))
+            {
+                dpo_id_t tmp_dpo = DPO_INVALID;
 
-            dpo_set(&tmp_dpo,
-                    DPO_REPLICATE, dp,
-                    replicate_create(0, dp));
+                dpo_set(&tmp_dpo,
+                        DPO_REPLICATE, dp,
+                        replicate_create(0, dp));
+
+                dpo_stack(DPO_MFIB_ENTRY, dp,
+                          &mfib_entry->mfe_rep,
+                          &tmp_dpo);
+
+                dpo_reset(&tmp_dpo);
+            }
+            replicate_multipath_update(&mfib_entry->mfe_rep,
+                                       ctx.next_hops);
+        }
+        else
+        {
+            /*
+             * for exclusive routes the source provided a replicate DPO
+             * we we stashed inthe special path list with one path
+             * so we can stack directly on that.
+             */
+            ASSERT(1 == vec_len(ctx.next_hops));
 
             dpo_stack(DPO_MFIB_ENTRY, dp,
                       &mfib_entry->mfe_rep,
-                      &tmp_dpo);
-
-            dpo_reset(&tmp_dpo);
+                      &ctx.next_hops[0].path_dpo);
+            dpo_reset(&ctx.next_hops[0].path_dpo);
+            vec_free(ctx.next_hops);
         }
-        replicate_multipath_update(&mfib_entry->mfe_rep,
-                                   ctx.next_hops);
     }
     else
     {
@@ -521,6 +545,8 @@ mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc,
     fib_node_index_t old_pl_index;
     fib_route_path_t *rpaths;
 
+    ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags));
+
     /*
      * path-lists require a vector of paths
      */
@@ -555,6 +581,8 @@ mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc,
     fib_node_index_t old_pl_index;
     fib_route_path_t *rpaths;
 
+    ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags));
+
     /*
      * path-lists require a vector of paths
      */
@@ -650,7 +678,8 @@ mfib_entry_src_ok_for_delete (const mfib_entry_src_t *msrc)
 int
 mfib_entry_update (fib_node_index_t mfib_entry_index,
                    mfib_source_t source,
-                   mfib_entry_flags_t entry_flags)
+                   mfib_entry_flags_t entry_flags,
+                   index_t repi)
 {
     mfib_entry_t *mfib_entry;
     mfib_entry_src_t *msrc;
@@ -659,6 +688,35 @@ mfib_entry_update (fib_node_index_t mfib_entry_index,
     msrc = mfib_entry_src_find_or_create(mfib_entry, source);
     msrc->mfes_flags = entry_flags;
 
+    if (INDEX_INVALID != repi)
+    {
+        /*
+         * The source is providing its own replicate DPO.
+         * Create a sepcial path-list to manage it, that way
+         * this entry and the source are equivalent to a normal
+         * entry
+         */
+        fib_node_index_t old_pl_index;
+        fib_protocol_t fp;
+        dpo_id_t dpo = DPO_INVALID;
+
+        fp = mfib_entry_get_proto(mfib_entry);
+        old_pl_index = msrc->mfes_pl;
+
+        dpo_set(&dpo, DPO_REPLICATE,
+                fib_proto_to_dpo(fp),
+                repi);
+
+        msrc->mfes_pl =
+            fib_path_list_create_special(fp,
+                                         FIB_PATH_LIST_FLAG_EXCLUSIVE,
+                                         &dpo);
+
+        dpo_reset(&dpo);
+        fib_path_list_lock(msrc->mfes_pl);
+        fib_path_list_unlock(old_pl_index);
+    }
+
     if (mfib_entry_src_ok_for_delete(msrc))
     {
         /*
diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h
index cc5d5326..36fc73e1 100644
--- a/src/vnet/mfib/mfib_entry.h
+++ b/src/vnet/mfib/mfib_entry.h
@@ -65,7 +65,7 @@ typedef struct mfib_entry_t_ {
     CLIB_CACHE_LINE_ALIGN_MARK(cacheline1);
 
     /**
-     * The Replicate used for forwarding.
+     * The Replicate DPO used for forwarding.
      */
     dpo_id_t mfe_rep;
 
@@ -94,7 +94,8 @@ extern fib_node_index_t mfib_entry_create(u32 fib_index,
 
 extern int mfib_entry_update(fib_node_index_t fib_entry_index,
                              mfib_source_t source,
-                             mfib_entry_flags_t entry_flags);
+                             mfib_entry_flags_t entry_flags,
+                             index_t rep_dpo);
 
 extern void mfib_entry_path_update(fib_node_index_t fib_entry_index,
                                    mfib_source_t source,
diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c
index e4c0936d..b4e855ff 100644
--- a/src/vnet/mfib/mfib_table.c
+++ b/src/vnet/mfib/mfib_table.c
@@ -195,7 +195,10 @@ mfib_table_entry_update (u32 fib_index,
     {
         mfib_entry_lock(mfib_entry_index);
 
-        if (mfib_entry_update(mfib_entry_index, source, entry_flags))
+        if (mfib_entry_update(mfib_entry_index,
+                              source,
+                              entry_flags,
+                              INDEX_INVALID))
         {
             /*
              * this update means we can now remove the entry.
@@ -283,6 +286,36 @@ mfib_table_entry_path_remove (u32 fib_index,
     }
 }
 
+fib_node_index_t
+mfib_table_entry_special_add (u32 fib_index,
+                              const mfib_prefix_t *prefix,
+                              mfib_source_t source,
+                              mfib_entry_flags_t entry_flags,
+                              index_t rep_dpo)
+{
+    fib_node_index_t mfib_entry_index;
+    mfib_table_t *mfib_table;
+
+    mfib_table = mfib_table_get(fib_index, prefix->fp_proto);
+    mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix);
+
+    if (FIB_NODE_INDEX_INVALID == mfib_entry_index)
+    {
+        mfib_entry_index = mfib_entry_create(fib_index,
+                                             source,
+                                             prefix,
+                                             MFIB_ENTRY_FLAG_NONE);
+
+        mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index);
+    }
+
+    mfib_entry_update(mfib_entry_index, source,
+                      (MFIB_ENTRY_FLAG_EXCLUSIVE | entry_flags),
+                      rep_dpo);
+
+    return (mfib_entry_index);
+}
+
 static void
 mfib_table_entry_delete_i (u32 fib_index,
                            fib_node_index_t mfib_entry_index,
diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h
index 4faa69ee..4c51b70f 100644
--- a/src/vnet/mfib/mfib_table.h
+++ b/src/vnet/mfib/mfib_table.h
@@ -18,6 +18,7 @@
 
 #include <vnet/ip/ip.h>
 #include <vnet/adj/adj.h>
+#include <vnet/dpo/replicate_dpo.h>
 
 #include <vnet/mfib/mfib_types.h>
 
@@ -212,6 +213,37 @@ extern void mfib_table_entry_delete(u32 fib_index,
 extern void mfib_table_entry_delete_index(fib_node_index_t entry_index,
                                           mfib_source_t source);
 
+/**
+ * @brief
+ *  Add a 'special' entry to the mFIB that links to the DPO passed
+ *  A special entry is an entry that the FIB is not expect to resolve
+ *  via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup).
+ *  Instead the client/source provides the index of a replicate DPO to link to.
+ *
+  * @param fib_index
+ *  The index of the FIB
+ *
+ * @param prefix
+ *  The prefix to add
+ *
+ * @param source
+ *  The ID of the client/source adding the entry.
+ *
+ * @param flags
+ *  Flags for the entry.
+ *
+ * @param rep_dpo
+ *  The replicate DPO index to link to.
+ *
+ * @return
+ *  the index of the fib_entry_t that is created (or existed already).
+ */
+extern fib_node_index_t mfib_table_entry_special_add(u32 fib_index,
+                                                     const mfib_prefix_t *prefix,
+                                                     mfib_source_t source,
+                                                     mfib_entry_flags_t flags,
+                                                     index_t rep_dpo);
+
 /**
  * @brief
  *  Flush all entries from a table for the source
diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c
index 8082a6bb..36a303e8 100644
--- a/src/vnet/mfib/mfib_test.c
+++ b/src/vnet/mfib/mfib_test.c
@@ -19,6 +19,7 @@
 #include <vnet/mfib/mfib_entry.h>
 #include <vnet/mfib/mfib_signal.h>
 #include <vnet/mfib/ip6_mfib.h>
+#include <vnet/fib/fib_path_list.h>
 
 #include <vnet/dpo/replicate_dpo.h>
 #include <vnet/adj/adj_mcast.h>
@@ -337,7 +338,7 @@ mfib_test_i (fib_protocol_t PROTO,
              const mfib_prefix_t *pfx_star_g_slash_m)
 {
     fib_node_index_t mfei, mfei_dflt, mfei_no_f, mfei_s_g, mfei_g_1, mfei_g_2, mfei_g_3, mfei_g_m;
-    u32 fib_index, n_entries, n_itfs, n_reps;
+    u32 fib_index, n_entries, n_itfs, n_reps, n_pls;
     fib_node_index_t ai_1, ai_2, ai_3;
     test_main_t *tm;
 
@@ -347,6 +348,7 @@ mfib_test_i (fib_protocol_t PROTO,
     n_entries = pool_elts(mfib_entry_pool);
     n_itfs = pool_elts(mfib_itf_pool);
     n_reps = pool_elts(replicate_pool);
+    n_pls = fib_path_list_pool_size();
     tm = &test_main;
 
     ai_1 = adj_mcast_add_or_lock(PROTO,
@@ -1023,6 +1025,54 @@ mfib_test_i (fib_protocol_t PROTO,
               "%U Gone",
               format_mfib_prefix, pfx_star_g_slash_m);
 
+    /*
+     * Add a prefix as a special/exclusive route
+     */
+    dpo_id_t td = DPO_INVALID;
+    index_t repi = replicate_create(1, fib_proto_to_dpo(PROTO));
+
+    dpo_set(&td, DPO_ADJACENCY_MCAST, fib_proto_to_dpo(PROTO), ai_2);
+    replicate_set_bucket(repi, 0, &td);
+
+    mfei = mfib_table_entry_special_add(fib_index,
+                                        pfx_star_g_3,
+                                        MFIB_SOURCE_SRv6,
+                                        MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF,
+                                        repi);
+    MFIB_TEST(mfib_test_entry(mfei,
+                              (MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF |
+                               MFIB_ENTRY_FLAG_EXCLUSIVE),
+                              1,
+                              DPO_ADJACENCY_MCAST, ai_2),
+              "%U exclusive replicate OK",
+              format_mfib_prefix, pfx_star_g_3);
+
+    /*
+     * update a special/exclusive route
+     */
+    index_t repi2 = replicate_create(1, fib_proto_to_dpo(PROTO));
+
+    dpo_set(&td, DPO_ADJACENCY_MCAST, fib_proto_to_dpo(PROTO), ai_1);
+    replicate_set_bucket(repi2, 0, &td);
+
+    mfei = mfib_table_entry_special_add(fib_index,
+                                        pfx_star_g_3,
+                                        MFIB_SOURCE_SRv6,
+                                        MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF,
+                                        repi2);
+    MFIB_TEST(mfib_test_entry(mfei,
+                              (MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF |
+                               MFIB_ENTRY_FLAG_EXCLUSIVE),
+                              1,
+                              DPO_ADJACENCY_MCAST, ai_1),
+              "%U exclusive update replicate OK",
+              format_mfib_prefix, pfx_star_g_3);
+
+    mfib_table_entry_delete(fib_index,
+                            pfx_star_g_3,
+                            MFIB_SOURCE_SRv6);
+    dpo_reset(&td);
+
     /*
      * Unlock the table - it's the last lock so should be gone thereafter
      */
@@ -1040,6 +1090,8 @@ mfib_test_i (fib_protocol_t PROTO,
      * test we've leaked no resources
      */
     MFIB_TEST(0 == adj_mcast_db_size(), "%d MCAST adjs", adj_mcast_db_size());
+    MFIB_TEST(n_pls == fib_path_list_pool_size(), "%d=%d path-lists",
+              n_pls, fib_path_list_pool_size());
     MFIB_TEST(n_reps == pool_elts(replicate_pool), "%d=%d replicates",
               n_reps, pool_elts(replicate_pool));
     MFIB_TEST(n_entries == pool_elts(mfib_entry_pool),
@@ -1214,7 +1266,7 @@ mfib_test (vlib_main_t * vm,
 
 VLIB_CLI_COMMAND (test_fib_command, static) = {
     .path = "test mfib",
-    .short_help = "fib unit tests - DO NOT RUN ON A LIVE SYSTEM",
+    .short_help = "mfib unit tests - DO NOT RUN ON A LIVE SYSTEM",
     .function = mfib_test,
 };
 
diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h
index 37898a07..fe53aa68 100644
--- a/src/vnet/mfib/mfib_types.h
+++ b/src/vnet/mfib/mfib_types.h
@@ -68,6 +68,13 @@ typedef enum mfib_entry_attribute_t_
      *        Use with extreme caution
      */
     MFIB_ENTRY_ACCEPT_ALL_ITF,
+    /**
+     * Exclusive - like its unicast counterpart. the source has provided
+     * the forwarding DPO directly. The entry therefore does not resolve
+     * paths via a path-list
+     */
+    MFIB_ENTRY_EXCLUSIVE,
+
     MFIB_ENTRY_INHERIT_ACCEPT,
     MFIB_ENTRY_ATTRIBUTE_LAST = MFIB_ENTRY_INHERIT_ACCEPT,
 } mfib_entry_attribute_t;
@@ -83,6 +90,7 @@ typedef enum mfib_entry_attribute_t_
     [MFIB_ENTRY_DROP]           = "D",     \
     [MFIB_ENTRY_ACCEPT_ALL_ITF] = "AA",    \
     [MFIB_ENTRY_INHERIT_ACCEPT] = "IA",    \
+    [MFIB_ENTRY_EXCLUSIVE]      = "E",     \
 }
 
 #define MFIB_ENTRY_NAMES_LONG  {                    \
@@ -91,6 +99,7 @@ typedef enum mfib_entry_attribute_t_
     [MFIB_ENTRY_DROP]           = "Drop",           \
     [MFIB_ENTRY_ACCEPT_ALL_ITF] = "Accept-all-itf", \
     [MFIB_ENTRY_INHERIT_ACCEPT] = "Inherit-Accept", \
+    [MFIB_ENTRY_EXCLUSIVE]      = "Exclusive",      \
 }
 
 typedef enum mfib_entry_flags_t_
@@ -99,8 +108,9 @@ typedef enum mfib_entry_flags_t_
     MFIB_ENTRY_FLAG_SIGNAL = (1 << MFIB_ENTRY_SIGNAL),
     MFIB_ENTRY_FLAG_DROP = (1 << MFIB_ENTRY_DROP),
     MFIB_ENTRY_FLAG_CONNECTED = (1 << MFIB_ENTRY_CONNECTED),
-    MFIB_ENTRY_FLAG_INHERIT_ACCEPT = (1 << MFIB_ENTRY_INHERIT_ACCEPT),
     MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF = (1 << MFIB_ENTRY_ACCEPT_ALL_ITF),
+    MFIB_ENTRY_FLAG_EXCLUSIVE = (1 << MFIB_ENTRY_EXCLUSIVE),
+    MFIB_ENTRY_FLAG_INHERIT_ACCEPT = (1 << MFIB_ENTRY_INHERIT_ACCEPT),
 } mfib_entry_flags_t;
 
 typedef enum mfib_itf_attribute_t_
@@ -155,6 +165,7 @@ typedef enum mfib_source_t_
     MFIB_SOURCE_CLI,
     MFIB_SOURCE_VXLAN,
     MFIB_SOURCE_DHCP,
+    MFIB_SOURCE_SRv6,
     MFIB_SOURCE_DEFAULT_ROUTE,
 } mfib_source_t;
 
@@ -164,6 +175,7 @@ typedef enum mfib_source_t_
     [MFIB_SOURCE_CLI] = "CLI",                     \
     [MFIB_SOURCE_DHCP] = "DHCP",                   \
     [MFIB_SOURCE_VXLAN] = "VXLAN",                 \
+    [MFIB_SOURCE_SRv6] = "SRv6",                   \
     [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \
 }
 
diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c
index f30c0da9..012d4542 100644
--- a/src/vnet/sr/sr.c
+++ b/src/vnet/sr/sr.c
@@ -23,7 +23,9 @@
 #include <vnet/vnet.h>
 #include <vnet/sr/sr.h>
 #include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/mfib_table.h>
 #include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
 
 #include <openssl/hmac.h>
 
@@ -35,6 +37,11 @@ static vlib_node_registration_t sr_local_node;
  */
 static dpo_type_t sr_dpo_type;
 
+/**
+ * @brief Dynamically added SR FIB Node type
+ */
+static fib_node_type_t sr_fib_node_type;
+
 /**
  * @brief Use passed HMAC key in ip6_sr_header_t in OpenSSL HMAC routines
  *
@@ -258,20 +265,10 @@ format_ip6_sr_header_with_length (u8 * s, va_list * args)
 
 /**
  * @brief Defined valid next nodes
- * @note Cannot call replicate yet without DPDK
 */
-#if DPDK > 0
-#define foreach_sr_rewrite_next                 \
-_(ERROR, "error-drop")                          \
-_(IP6_LOOKUP, "ip6-lookup")                     \
-_(SR_LOCAL, "sr-local")                         \
-_(SR_REPLICATE,"sr-replicate")
-#else
 #define foreach_sr_rewrite_next                 \
 _(ERROR, "error-drop")                          \
-_(IP6_LOOKUP, "ip6-lookup")                     \
 _(SR_LOCAL, "sr-local")
-#endif /* DPDK */
 
 /**
  * @brief Struct for defined valid next nodes
@@ -384,8 +381,8 @@ sr_rewrite (vlib_main_t * vm,
 	  ip6_header_t *ip0, *ip1;
 	  ip6_sr_header_t *sr0, *sr1;
 	  ip6_sr_tunnel_t *t0, *t1;
-	  u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
-	  u32 next1 = SR_REWRITE_NEXT_IP6_LOOKUP;
+	  u32 next0;
+	  u32 next1;
 	  u16 new_l0 = 0;
 	  u16 new_l1 = 0;
 
@@ -433,16 +430,6 @@ sr_rewrite (vlib_main_t * vm,
 
 	  ip0 = vlib_buffer_get_current (b0);
 	  ip1 = vlib_buffer_get_current (b1);
-#if DPDK > 0			/* Cannot call replication node yet without DPDK */
-	  /* add a replication node */
-	  if (PREDICT_FALSE (t0->policy_index != ~0))
-	    {
-	      vnet_buffer (b0)->ip.save_protocol = t0->policy_index;
-	      next0 = SR_REWRITE_NEXT_SR_REPLICATE;
-	      sr0 = (ip6_sr_header_t *) (t0->rewrite);
-	      goto processnext;
-	    }
-#endif /* DPDK */
 
 	  /*
 	   * SR-unaware service chaining case: pkt coming back from
@@ -506,8 +493,11 @@ sr_rewrite (vlib_main_t * vm,
 
 	      sr_fix_hmac (sm, ip0, sr0);
 
-	      next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) :
-		next0;
+	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+		t0->first_hop_dpo.dpoi_index;
+	      next0 = t0->first_hop_dpo.dpoi_next_node;
+	      next0 = (sr_local_cb ?
+		       sr_local_cb (vm, node, b0, ip0, sr0) : next0);
 
 	      /*
 	       * Ignore "do not rewrite" shtik in this path
@@ -519,17 +509,7 @@ sr_rewrite (vlib_main_t * vm,
 		    b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
 		}
 	    }
-#if DPDK > 0			/* Cannot call replication node yet without DPDK */
-	processnext:
-	  /* add a replication node */
-	  if (PREDICT_FALSE (t1->policy_index != ~0))
-	    {
-	      vnet_buffer (b1)->ip.save_protocol = t1->policy_index;
-	      next1 = SR_REWRITE_NEXT_SR_REPLICATE;
-	      sr1 = (ip6_sr_header_t *) (t1->rewrite);
-	      goto trace00;
-	    }
-#endif /* DPDK */
+
 	  if (PREDICT_FALSE (ip1->protocol == IPPROTO_IPV6_ROUTE))
 	    {
 	      vlib_buffer_advance (b1, sizeof (ip1));
@@ -584,8 +564,11 @@ sr_rewrite (vlib_main_t * vm,
 
 	      sr_fix_hmac (sm, ip1, sr1);
 
-	      next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) :
-		next1;
+	      vnet_buffer (b1)->ip.adj_index[VLIB_TX] =
+		t1->first_hop_dpo.dpoi_index;
+	      next1 = t1->first_hop_dpo.dpoi_next_node;
+	      next1 = (sr_local_cb ?
+		       sr_local_cb (vm, node, b1, ip1, sr1) : next1);
 
 	      /*
 	       * Ignore "do not rewrite" shtik in this path
@@ -597,9 +580,6 @@ sr_rewrite (vlib_main_t * vm,
 		    b1->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
 		}
 	    }
-#if DPDK > 0			/* Cannot run replicate without DPDK and only replicate uses this label */
-	trace00:
-#endif /* DPDK */
 
 	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	    {
@@ -641,7 +621,7 @@ sr_rewrite (vlib_main_t * vm,
 	  ip6_header_t *ip0 = 0;
 	  ip6_sr_header_t *sr0 = 0;
 	  ip6_sr_tunnel_t *t0;
-	  u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
+	  u32 next0;
 	  u16 new_l0 = 0;
 
 	  bi0 = from[0];
@@ -661,16 +641,6 @@ sr_rewrite (vlib_main_t * vm,
 	  t0 =
 	    pool_elt_at_index (sm->tunnels,
 			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-#if DPDK > 0			/* Cannot call replication node yet without DPDK */
-	  /* add a replication node */
-	  if (PREDICT_FALSE (t0->policy_index != ~0))
-	    {
-	      vnet_buffer (b0)->ip.save_protocol = t0->policy_index;
-	      next0 = SR_REWRITE_NEXT_SR_REPLICATE;
-	      sr0 = (ip6_sr_header_t *) (t0->rewrite);
-	      goto trace0;
-	    }
-#endif /* DPDK */
 
 	  ASSERT (VLIB_BUFFER_PRE_DATA_SIZE
 		  >= ((word) vec_len (t0->rewrite)) + b0->current_data);
@@ -740,8 +710,11 @@ sr_rewrite (vlib_main_t * vm,
 
 	      sr_fix_hmac (sm, ip0, sr0);
 
-	      next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) :
-		next0;
+	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+		t0->first_hop_dpo.dpoi_index;
+	      next0 = t0->first_hop_dpo.dpoi_next_node;
+	      next0 = (sr_local_cb ?
+		       sr_local_cb (vm, node, b0, ip0, sr0) : next0);
 
 	      /*
 	       * Ignore "do not rewrite" shtik in this path
@@ -753,9 +726,6 @@ sr_rewrite (vlib_main_t * vm,
 		    b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
 		}
 	    }
-#if DPDK > 0			/* Cannot run replicate without DPDK and only replicate uses this label */
-	trace0:
-#endif /* DPDK */
 
 	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	    {
@@ -809,20 +779,69 @@ VLIB_NODE_FUNCTION_MULTIARCH (sr_rewrite_node, sr_rewrite)
 /* *INDENT-ON* */
 
 static int
-ip6_delete_route_no_next_hop (ip6_address_t * dst_address_arg,
-			      u32 dst_address_length, u32 rx_table_id)
+ip6_routes_add_del (ip6_sr_tunnel_t * t, int is_del)
 {
+  ip6_sr_main_t *sm = &sr_main;
+
+  /*
+   * the prefix for the tunnel's destination
+   */
+  /* *INDENT-OFF* */
   fib_prefix_t pfx = {
-    .fp_len = dst_address_length,
     .fp_proto = FIB_PROTOCOL_IP6,
+    .fp_len = t->dst_mask_width,
     .fp_addr = {
-		.ip6 = *dst_address_arg,
-		}
+      .ip6 = t->key.dst,
+    }
   };
+  /* *INDENT-ON* */
 
-  fib_table_entry_delete (fib_table_id_find_fib_index (FIB_PROTOCOL_IP6,
-						       rx_table_id),
-			  &pfx, FIB_SOURCE_SR);
+  if (is_del)
+    {
+      fib_table_entry_delete (t->rx_fib_index, &pfx, FIB_SOURCE_SR);
+    }
+  else
+    {
+      dpo_id_t dpo = DPO_INVALID;
+
+      dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels);
+      fib_table_entry_special_dpo_add (t->rx_fib_index,
+				       &pfx,
+				       FIB_SOURCE_SR,
+				       FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+      dpo_reset (&dpo);
+    }
+
+  /*
+   * Track the first hop address so we don't need to perform an extra
+   * lookup in the data-path
+   */
+  /* *INDENT-OFF* */
+  const fib_prefix_t first_hop_pfx = {
+    .fp_len = 128,
+    .fp_proto = FIB_PROTOCOL_IP6,
+    .fp_addr = {
+      .ip6 = t->first_hop,
+    }
+  };
+  /* *INDENT-ON* */
+
+  if (is_del)
+    {
+      fib_entry_child_remove (t->fib_entry_index, t->sibling_index);
+      fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR);
+    }
+  else
+    {
+      t->fib_entry_index =
+	fib_table_entry_special_add (t->rx_fib_index,
+				     &first_hop_pfx,
+				     FIB_SOURCE_RR,
+				     FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID);
+      t->sibling_index =
+	fib_entry_child_add (t->fib_entry_index,
+			     sr_fib_node_type, t - sm->tunnels);
+    }
 
   return 0;
 }
@@ -885,6 +904,18 @@ find_or_add_shared_secret (ip6_sr_main_t * sm, u8 * secret, u32 * indexp)
   return (key);
 }
 
+/**
+ * @brief Stack a tunnel on the forwarding chain of the first-hop
+ */
+static void
+sr_tunnel_stack (ip6_sr_tunnel_t * st)
+{
+  dpo_stack (sr_dpo_type,
+	     DPO_PROTO_IP6,
+	     &st->first_hop_dpo,
+	     fib_entry_contribute_ip_forwarding (st->fib_entry_index));
+}
+
 /**
  * @brief Add or Delete a Segment Routing tunnel.
  *
@@ -909,7 +940,6 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
   u8 hmac_key_index = 0;
   ip6_sr_policy_t *pt;
   int i;
-  dpo_id_t dpo = DPO_INVALID;
 
   /* Make sure that the rx FIB exists */
   p = hash_get (im->fib_index_by_table_id, a->rx_table_id);
@@ -981,8 +1011,8 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
       /* Delete existing tunnel */
       t = pool_elt_at_index (sm->tunnels, p[0]);
 
-      ip6_delete_route_no_next_hop (&t->key.dst, t->dst_mask_width,
-				    a->rx_table_id);
+      ip6_routes_add_del (t, 1);
+
       vec_free (t->rewrite);
       /* Remove tunnel from any policy if associated */
       if (t->policy_index != ~0)
@@ -1014,6 +1044,7 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
 	  hash_unset_mem (sm->tunnel_index_by_name, t->name);
 	  vec_free (t->name);
 	}
+      dpo_reset (&t->first_hop_dpo);
       pool_put (sm->tunnels, t);
       hp = hash_get_pair (sm->tunnel_index_by_key, &key);
       key_copy = (void *) (hp->key);
@@ -1026,6 +1057,7 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
   pool_get (sm->tunnels, t);
   memset (t, 0, sizeof (*t));
   t->policy_index = ~0;
+  fib_node_init (&t->node, sr_fib_node_type);
 
   clib_memcpy (&t->key, &key, sizeof (t->key));
   t->dst_mask_width = a->dst_mask_width;
@@ -1124,20 +1156,13 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
    * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain
    * at some point...
    */
-  dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels);
 
-  fib_prefix_t pfx = {
-    .fp_proto = FIB_PROTOCOL_IP6,
-    .fp_len = a->dst_mask_width,
-    .fp_addr = {
-		.ip6 = *a->dst_address,
-		}
-  };
-  fib_table_entry_special_dpo_add (rx_fib_index,
-				   &pfx,
-				   FIB_SOURCE_SR,
-				   FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
-  dpo_reset (&dpo);
+  /*
+   * Add the routes for the tunnel destination and first-hop, then stack
+   * the tunnel on the appropriate forwarding DPOs.
+   */
+  ip6_routes_add_del (t, 0);
+  sr_tunnel_stack (t);
 
   if (a->policy_name)
     {
@@ -1197,7 +1222,7 @@ format_sr_dpo (u8 * s, va_list * args)
   return (format (s, "SR: tunnel:[%d]", index));
 }
 
-const static dpo_vft_t sr_vft = {
+const static dpo_vft_t sr_dpo_vft = {
   .dv_lock = sr_dpo_lock,
   .dv_unlock = sr_dpo_unlock,
   .dv_format = format_sr_dpo,
@@ -1212,6 +1237,65 @@ const static char *const *const sr_nodes[DPO_PROTO_NUM] = {
   [DPO_PROTO_IP6] = sr_ip6_nodes,
 };
 
+static ip6_sr_tunnel_t *
+sr_tunnel_from_fib_node (fib_node_t * node)
+{
+#if (CLIB_DEBUG > 0)
+  ASSERT (sr_fib_node_type == node->fn_type);
+#endif
+  return ((ip6_sr_tunnel_t *) (((char *) node) -
+			       STRUCT_OFFSET_OF (ip6_sr_tunnel_t, node)));
+}
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+sr_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+{
+  sr_tunnel_stack (sr_tunnel_from_fib_node (node));
+
+  return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t *
+sr_tunnel_fib_node_get (fib_node_index_t index)
+{
+  ip6_sr_tunnel_t *st;
+  ip6_sr_main_t *sm;
+
+  sm = &sr_main;
+  st = pool_elt_at_index (sm->tunnels, index);
+
+  return (&st->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+sr_tunnel_last_lock_gone (fib_node_t * node)
+{
+  /*
+   * The SR tunnel is a root of the graph. As such
+   * it never has children and thus is never locked.
+   */
+  ASSERT (0);
+}
+
+/*
+ * Virtual function table registered by SR tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t sr_fib_vft = {
+  .fnv_get = sr_tunnel_fib_node_get,
+  .fnv_last_lock = sr_tunnel_last_lock_gone,
+  .fnv_back_walk = sr_tunnel_back_walk,
+};
+
 /**
  * @brief CLI parser for Add or Delete a Segment Routing tunnel.
  *
@@ -1764,6 +1848,8 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a)
   ip6_sr_tunnel_t *t;
   ip6_sr_main_t *sm = &sr_main;
   ip6_sr_policy_t *pt;
+  index_t rep;
+  u32 ii;
 
   if (a->is_del)
     {
@@ -1803,23 +1889,49 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a)
    * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain
    * at some point...
    */
-  dpo_id_t dpo = DPO_INVALID;
-
-  dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels);
 
-  /* Construct a FIB entry for multicast using the rx/tx fib from the first tunnel */
-  fib_prefix_t pfx = {
+  /*
+   * Construct an mFIB entry for the multicast address,
+   * using the rx/tx fib from the first tunnel.
+   * There is no RPF information for this address (I need to discuss this with
+   * Pablo), so for now accept from anywhere...
+   */
+  /* *INDENT-OFF* */
+  mfib_prefix_t pfx = {
     .fp_proto = FIB_PROTOCOL_IP6,
     .fp_len = 128,
-    .fp_addr = {
-		.ip6 = *a->multicast_address,
-		}
+    .fp_grp_addr = {
+      .ip6 = *a->multicast_address,
+    }
   };
-  fib_table_entry_special_dpo_add (t->rx_fib_index,
-				   &pfx,
-				   FIB_SOURCE_SR,
-				   FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
-  dpo_reset (&dpo);
+  /* *INDENT-ON* */
+
+  if (a->is_del)
+    mfib_table_entry_delete (t->rx_fib_index, &pfx, MFIB_SOURCE_SRv6);
+  else
+    {
+      /*
+       * Construct a replicate DPO that will replicate received packets over
+       * each tunnel in the policy
+       */
+      dpo_id_t dpo = DPO_INVALID;
+
+      rep = replicate_create (vec_len (pt->tunnel_indices), DPO_PROTO_IP6);
+
+      vec_foreach_index (ii, pt->tunnel_indices)
+      {
+	dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, pt->tunnel_indices[ii]);
+
+	replicate_set_bucket (rep, ii, &dpo);
+      }
+
+      mfib_table_entry_special_add (t->rx_fib_index,
+				    &pfx,
+				    MFIB_SOURCE_SRv6,
+				    MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF, rep);
+
+      dpo_reset (&dpo);
+    }
 
   u8 *mcast_copy = 0;
   mcast_copy = vec_new (ip6_address_t, 1);
@@ -1829,13 +1941,12 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a)
     {
       hash_unset_mem (sm->policy_index_by_multicast_address, mcast_copy);
       vec_free (mcast_copy);
-      return 0;
     }
-  /* else */
-
-  hash_set_mem (sm->policy_index_by_multicast_address, mcast_copy,
-		pt - sm->policies);
-
+  else
+    {
+      hash_set_mem (sm->policy_index_by_multicast_address, mcast_copy,
+		    pt - sm->policies);
+    }
 
   return 0;
 }
@@ -1888,12 +1999,7 @@ sr_add_del_multicast_map_command_fn (vlib_main_t * vm,
   a->multicast_address = &multicast_address;
   a->policy_name = policy_name;
 
-#if DPDK > 0			/*Cannot call replicate or configure multicast map yet without DPDK */
   rv = ip6_sr_add_del_multicastmap (a);
-#else
-  return clib_error_return (0,
-			    "cannot use multicast replicate spray case without DPDK installed");
-#endif /* DPDK */
 
   switch (rv)
     {
@@ -2295,12 +2401,6 @@ sr_init (vlib_main_t * vm)
   ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
   ASSERT (ip6_rewrite_node);
 
-#if DPDK > 0			/* Cannot run replicate without DPDK */
-  /* Add a disposition to sr_replicate for the sr multicast replicate node */
-  sm->ip6_lookup_sr_replicate_index =
-    vlib_node_add_next (vm, ip6_lookup_node->index, sr_replicate_node.index);
-#endif /* DPDK */
-
   /* Add a disposition to ip6_rewrite for the sr dst address hack node */
   sm->ip6_rewrite_sr_next_index =
     vlib_node_add_next (vm, ip6_rewrite_node->index,
@@ -2311,7 +2411,8 @@ sr_init (vlib_main_t * vm)
   sm->md = (void *) EVP_get_digestbyname ("sha1");
   sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX));
 
-  sr_dpo_type = dpo_register_new_type (&sr_vft, sr_nodes);
+  sr_dpo_type = dpo_register_new_type (&sr_dpo_vft, sr_nodes);
+  sr_fib_node_type = fib_node_register_new_type (&sr_fib_vft);
 
   return error;
 }
@@ -3087,7 +3188,7 @@ set_ip6_sr_rewrite_fn (vlib_main_t * vm,
   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
   adj->rewrite_header.node_index = sr_fix_dst_addr_node.index;
 
-  /* $$$$$ hack... steal the mcast group index */
+  /* $$$$$ hack... steal the interface address index */
   adj->if_address_index =
     vlib_node_add_next (vm, sr_fix_dst_addr_node.index,
 			hi->output_node_index);
diff --git a/src/vnet/sr/sr.h b/src/vnet/sr/sr.h
index 610b3699..3c50b735 100644
--- a/src/vnet/sr/sr.h
+++ b/src/vnet/sr/sr.h
@@ -15,8 +15,6 @@
 /**
  * @file
  * @brief Segment Routing header
- *
- * @note sr_replicate only works using DPDK today
  */
 #ifndef included_vnet_sr_h
 #define included_vnet_sr_h
@@ -71,6 +69,27 @@ typedef struct
   /** Indicates that this tunnel is part of a policy comprising
      of multiple tunnels. If == ~0 tunnel is not part of a policy */
   u32 policy_index;
+
+  /**
+   * The FIB node graph linkage
+   */
+  fib_node_t node;
+
+  /**
+   * The FIB entry index for the first hop. We track this so we
+   * don't need an extra lookup for it in the data plane
+   */
+  fib_node_index_t fib_entry_index;
+
+  /**
+   * This tunnel's sibling index in the children of the FIB entry
+   */
+  u32 sibling_index;
+
+  /**
+   * The DPO contributed by the first-hop FIB entry.
+   */
+  dpo_id_t first_hop_dpo;
 } ip6_sr_tunnel_t;
 
 /**
@@ -205,9 +224,6 @@ typedef struct
   /** ip6-rewrite next index for reinstalling the original dst address */
   u32 ip6_rewrite_sr_next_index;
 
-  /** ip6-replicate next index for multicast tunnel */
-  u32 ip6_lookup_sr_replicate_index;
-
   /** application API callback */
   void *sr_local_cb;
 
@@ -238,10 +254,6 @@ format_function_t format_ip6_sr_header_with_length;
 
 vlib_node_registration_t ip6_sr_input_node;
 
-#if DPDK > 0
-extern vlib_node_registration_t sr_replicate_node;
-#endif /* DPDK */
-
 int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a);
 int ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a);
 int ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a);
diff --git a/src/vnet/sr/sr_api.c b/src/vnet/sr/sr_api.c
index 6c6eb9b6..bab0fc84 100644
--- a/src/vnet/sr/sr_api.c
+++ b/src/vnet/sr/sr_api.c
@@ -190,12 +190,7 @@ static void vl_api_sr_multicast_map_add_del_t_handler
       goto out;
     }
 
-#if DPDK > 0			/* Cannot call replicate without DPDK */
   rv = ip6_sr_add_del_multicastmap (a);
-#else
-  clib_warning ("multicast replication without DPDK not implemented");
-  rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif /* DPDK */
 
 out:
 
diff --git a/src/vnet/sr/sr_replicate.c b/src/vnet/sr/sr_replicate.c
deleted file mode 100644
index fa5a68c3..00000000
--- a/src/vnet/sr/sr_replicate.c
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * sr_replicate.c: ipv6 segment routing replicator for multicast
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- *  @file
- *  @brief Functions for replicating packets across SR tunnels.
- *
- *  Leverages rte_pktmbuf_clone() so there is no memcpy for
- *  invariant parts of the packet.
- *
- *  @note Currently requires DPDK
-*/
-
-#if DPDK > 0			/* Cannot run replicate without DPDK */
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/pg/pg.h>
-#include <vnet/sr/sr.h>
-#include <vnet/devices/dpdk/dpdk.h>
-#include <vnet/devices/dpdk/dpdk_priv.h>
-#include <vnet/ip/ip.h>
-#include <vnet/fib/ip6_fib.h>
-
-#include <vppinfra/hash.h>
-#include <vppinfra/error.h>
-#include <vppinfra/elog.h>
-
-/**
- *   @brief sr_replicate state.
- *
-*/
-typedef struct
-{
-  /* convenience */
-  vlib_main_t *vlib_main;
-  vnet_main_t *vnet_main;
-} sr_replicate_main_t;
-
-sr_replicate_main_t sr_replicate_main;
-
-/**
- *    @brief Information to display in packet trace.
- *
-*/
-typedef struct
-{
-  ip6_address_t src, dst;
-  u16 length;
-  u32 next_index;
-  u32 tunnel_index;
-  u8 sr[256];
-} sr_replicate_trace_t;
-
-/**
- *  @brief packet trace format function.
- *
- *  @param *s u8 used for string output
- *  @param *args va_list  structured input to va_arg to output @ref sr_replicate_trace_t
- *  @return *s u8 - formatted trace output
-*/
-static u8 *
-format_sr_replicate_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  sr_replicate_trace_t *t = va_arg (*args, sr_replicate_trace_t *);
-  ip6_sr_main_t *sm = &sr_main;
-  ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index);
-  ip6_fib_t *rx_fib, *tx_fib;
-
-  rx_fib = ip6_fib_get (tun->rx_fib_index);
-  tx_fib = ip6_fib_get (tun->tx_fib_index);
-
-  s = format
-    (s, "SR-REPLICATE: next %s ip6 src %U dst %U len %u\n"
-     "           rx-fib-id %d tx-fib-id %d\n%U",
-     "ip6-lookup",
-     format_ip6_address, &t->src,
-     format_ip6_address, &t->dst, t->length,
-     rx_fib->table_id, tx_fib->table_id,
-     format_ip6_sr_header, t->sr, 0 /* print_hmac */ );
-  return s;
-
-}
-
-#define foreach_sr_replicate_error \
-_(REPLICATED, "sr packets replicated") \
-_(NO_BUFFERS, "error allocating buffers for replicas") \
-_(NO_REPLICAS, "no replicas were needed") \
-_(NO_BUFFER_DROPS, "sr no buffer drops")
-
-/**
- * @brief Struct for SR replicate errors
- */
-typedef enum
-{
-#define _(sym,str) SR_REPLICATE_ERROR_##sym,
-  foreach_sr_replicate_error
-#undef _
-    SR_REPLICATE_N_ERROR,
-} sr_replicate_error_t;
-
-/**
- * @brief Error strings for SR replicate
- */
-static char *sr_replicate_error_strings[] = {
-#define _(sym,string) string,
-  foreach_sr_replicate_error
-#undef _
-};
-
-/**
- * @brief Defines next-nodes for packet processing.
- *
-*/
-typedef enum
-{
-  SR_REPLICATE_NEXT_IP6_LOOKUP,
-  SR_REPLICATE_N_NEXT,
-} sr_replicate_next_t;
-
-/**
- *   @brief Single loop packet replicator.
- *
- *   @node sr-replicate
- *   @param vm vlib_main_t
- *   @return frame->n_vectors uword
-*/
-static uword
-sr_replicate_node_fn (vlib_main_t * vm,
-		      vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
-  dpdk_main_t *dm = &dpdk_main;
-  u32 n_left_from, *from, *to_next;
-  sr_replicate_next_t next_index;
-  int pkts_replicated = 0;
-  ip6_sr_main_t *sm = &sr_main;
-  int no_buffer_drops = 0;
-  vlib_buffer_free_list_t *fl;
-  unsigned socket_id = rte_socket_id ();
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0, hdr_bi0;
-	  vlib_buffer_t *b0, *orig_b0;
-	  struct rte_mbuf *orig_mb0 = 0, *hdr_mb0 = 0, *clone0 = 0;
-	  struct rte_mbuf **hdr_vec = 0, **rte_mbuf_vec = 0;
-	  ip6_sr_policy_t *pol0 = 0;
-	  ip6_sr_tunnel_t *t0 = 0;
-	  ip6_sr_header_t *hdr_sr0 = 0;
-	  ip6_header_t *ip0 = 0, *hdr_ip0 = 0;
-	  int num_replicas = 0;
-	  int i;
-	  u32 len_bytes = sizeof (ip6_header_t);
-	  u8 next_hdr, ip_next_hdr = IPPROTO_IPV6_ROUTE;
-
-	  bi0 = from[0];
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  orig_b0 = b0;
-
-	  pol0 = pool_elt_at_index (sm->policies,
-				    vnet_buffer (b0)->ip.save_protocol);
-
-	  ip0 = vlib_buffer_get_current (b0);
-	  /* Skip forward to the punch-in point */
-	  vlib_buffer_advance (b0, sizeof (*ip0));
-	  next_hdr = ip0->protocol;
-
-	  /* HBH must immediately follow ipv6 header */
-	  if (PREDICT_FALSE
-	      (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
-	    {
-	      ip6_hop_by_hop_ext_t *ext_hdr =
-		(ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
-	      u32 ext_hdr_len = 0;
-	      ext_hdr_len = ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
-	      len_bytes += ext_hdr_len;
-	      next_hdr = ext_hdr->next_hdr;
-	      ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
-	      ip_next_hdr = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS;
-	      /* Skip forward to the punch-in point */
-	      vlib_buffer_advance (b0, ext_hdr_len);
-
-	    }
-
-	  orig_mb0 = rte_mbuf_from_vlib_buffer (b0);
-
-	  i16 delta0 = vlib_buffer_length_in_chain (vm, orig_b0)
-	    - (i16) orig_mb0->pkt_len;
-
-	  u16 new_data_len0 = (u16) ((i16) orig_mb0->data_len + delta0);
-	  u16 new_pkt_len0 = (u16) ((i16) orig_mb0->pkt_len + delta0);
-
-	  orig_mb0->data_len = new_data_len0;
-	  orig_mb0->pkt_len = new_pkt_len0;
-	  orig_mb0->data_off += (u16) (b0->current_data);
-
-	  /*
-	     Before entering loop determine if we can allocate:
-	     - all the new HEADER RTE_MBUFs and assign them to a vector
-	     - all the clones
-
-	     if successful, then iterate over vectors of resources
-
-	   */
-	  num_replicas = vec_len (pol0->tunnel_indices);
-
-	  if (PREDICT_FALSE (num_replicas == 0))
-	    {
-	      b0->error = node->errors[SR_REPLICATE_ERROR_NO_REPLICAS];
-	      goto do_trace0;
-	    }
-
-	  vec_reset_length (hdr_vec);
-	  vec_reset_length (rte_mbuf_vec);
-
-	  for (i = 0; i < num_replicas; i++)
-	    {
-	      uint8_t nb_seg;
-	      struct rte_mbuf *clone0i;
-	      vlib_buffer_t *clone0_c, *clone_b0;
-
-	      t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
-	      hdr_mb0 = rte_pktmbuf_alloc (dm->pktmbuf_pools[socket_id]);
-
-	      if (i < (num_replicas - 1))
-		{
-		  /* Not the last tunnel to process */
-		  clone0 = rte_pktmbuf_clone
-		    (orig_mb0, dm->pktmbuf_pools[socket_id]);
-		  if (clone0 == 0)
-		    goto clone_fail;
-		  nb_seg = 0;
-		  clone0i = clone0;
-		  clone0_c = NULL;
-		  while ((clone0->nb_segs >= 1) && (nb_seg < clone0->nb_segs))
-		    {
-
-		      clone_b0 = vlib_buffer_from_rte_mbuf (clone0i);
-		      vlib_buffer_init_for_free_list (clone_b0, fl);
-
-		      ASSERT ((clone_b0->flags & VLIB_BUFFER_NEXT_PRESENT) ==
-			      0);
-		      ASSERT (clone_b0->current_data == 0);
-
-		      clone_b0->current_data =
-			(clone0i->buf_addr + clone0i->data_off) -
-			(void *) clone_b0->data;
-
-		      clone_b0->current_length = clone0i->data_len;
-		      if (PREDICT_FALSE (clone0_c != NULL))
-			{
-			  clone0_c->flags |= VLIB_BUFFER_NEXT_PRESENT;
-			  clone0_c->next_buffer =
-			    vlib_get_buffer_index (vm, clone_b0);
-			}
-		      clone0_c = clone_b0;
-		      clone0i = clone0i->next;
-		      nb_seg++;
-		    }
-		}
-	      else
-		/* First tunnel to process, use original MB */
-		clone0 = orig_mb0;
-
-
-	      if (PREDICT_FALSE (!clone0 || !hdr_mb0))
-		{
-		clone_fail:
-		  b0->error = node->errors[SR_REPLICATE_ERROR_NO_BUFFERS];
-
-		  vec_foreach_index (i, rte_mbuf_vec)
-		  {
-		    rte_pktmbuf_free (rte_mbuf_vec[i]);
-		  }
-		  vec_free (rte_mbuf_vec);
-
-		  vec_foreach_index (i, hdr_vec)
-		  {
-		    rte_pktmbuf_free (hdr_vec[i]);
-		  }
-		  vec_free (hdr_vec);
-
-		  goto do_trace0;
-		}
-
-	      vec_add1 (hdr_vec, hdr_mb0);
-	      vec_add1 (rte_mbuf_vec, clone0);
-
-	    }
-
-	  for (i = 0; i < num_replicas; i++)
-	    {
-	      vlib_buffer_t *hdr_b0;
-	      u16 new_l0 = 0;
-
-	      t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
-	      /* Our replicas */
-	      hdr_mb0 = hdr_vec[i];
-	      clone0 = rte_mbuf_vec[i];
-
-	      hdr_mb0->data_len = len_bytes + vec_len (t0->rewrite);
-	      hdr_mb0->pkt_len = hdr_mb0->data_len +
-		vlib_buffer_length_in_chain (vm, orig_b0);
-
-	      hdr_b0 = vlib_buffer_from_rte_mbuf (hdr_mb0);
-
-	      vlib_buffer_init_for_free_list (hdr_b0, fl);
-
-	      memcpy (hdr_b0->data, ip0, len_bytes);
-	      memcpy (hdr_b0->data + len_bytes, t0->rewrite,
-		      vec_len (t0->rewrite));
-
-	      hdr_b0->current_data = 0;
-	      hdr_b0->current_length = len_bytes + vec_len (t0->rewrite);
-	      hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT;
-	      hdr_b0->trace_index = orig_b0->trace_index;
-	      vnet_buffer (hdr_b0)->l2_classify.opaque_index = 0;
-
-	      hdr_b0->total_length_not_including_first_buffer =
-		hdr_mb0->pkt_len - hdr_b0->current_length;
-	      vnet_buffer (hdr_b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index;
-
-	      hdr_ip0 = (ip6_header_t *) hdr_b0->data;
-	      new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
-		vec_len (t0->rewrite);
-	      hdr_ip0->payload_length = clib_host_to_net_u16 (new_l0);
-	      hdr_sr0 = (ip6_sr_header_t *) ((u8 *) hdr_ip0 + len_bytes);
-	      /* $$$ tune */
-	      clib_memcpy (hdr_sr0, t0->rewrite, vec_len (t0->rewrite));
-	      hdr_sr0->protocol = next_hdr;
-	      hdr_ip0->protocol = ip_next_hdr;
-
-	      /* Copy dst address into the DA slot in the segment list */
-	      clib_memcpy (hdr_sr0->segments, ip0->dst_address.as_u64,
-			   sizeof (ip6_address_t));
-
-	      /* Rewrite the ip6 dst address */
-	      hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0];
-	      hdr_ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1];
-
-	      sr_fix_hmac (sm, hdr_ip0, hdr_sr0);
-
-	      /* prepend new header to invariant piece */
-	      hdr_mb0->next = clone0;
-	      hdr_b0->next_buffer =
-		vlib_get_buffer_index (vm,
-				       vlib_buffer_from_rte_mbuf (clone0));
-
-	      /* update header's fields */
-	      hdr_mb0->pkt_len =
-		(uint16_t) (hdr_mb0->data_len + clone0->pkt_len);
-	      hdr_mb0->nb_segs = (uint8_t) (clone0->nb_segs + 1);
-
-	      /* copy metadata from source packet */
-	      hdr_mb0->port = clone0->port;
-	      hdr_mb0->vlan_tci = clone0->vlan_tci;
-	      hdr_mb0->vlan_tci_outer = clone0->vlan_tci_outer;
-	      hdr_mb0->tx_offload = clone0->tx_offload;
-	      hdr_mb0->hash = clone0->hash;
-
-	      hdr_mb0->ol_flags = clone0->ol_flags & ~(IND_ATTACHED_MBUF);
-
-	      __rte_mbuf_sanity_check (hdr_mb0, 1);
-
-	      hdr_bi0 = vlib_get_buffer_index (vm, hdr_b0);
-
-	      to_next[0] = hdr_bi0;
-	      to_next += 1;
-	      n_left_to_next -= 1;
-
-	      if (n_left_to_next == 0)
-		{
-		  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-		  vlib_get_next_frame (vm, node, next_index,
-				       to_next, n_left_to_next);
-
-		}
-	      pkts_replicated++;
-	    }
-
-	  from += 1;
-	  n_left_from -= 1;
-
-	do_trace0:
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      sr_replicate_trace_t *tr = vlib_add_trace (vm, node,
-							 b0, sizeof (*tr));
-	      tr->tunnel_index = t0 - sm->tunnels;
-	      tr->length = 0;
-	      if (hdr_ip0)
-		{
-		  memcpy (tr->src.as_u8, hdr_ip0->src_address.as_u8,
-			  sizeof (tr->src.as_u8));
-		  memcpy (tr->dst.as_u8, hdr_ip0->dst_address.as_u8,
-			  sizeof (tr->dst.as_u8));
-		  if (hdr_ip0->payload_length)
-		    tr->length = clib_net_to_host_u16
-		      (hdr_ip0->payload_length);
-		}
-	      tr->next_index = next_index;
-	      if (hdr_sr0)
-		memcpy (tr->sr, hdr_sr0, sizeof (tr->sr));
-	    }
-
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  vlib_node_increment_counter (vm, sr_replicate_node.index,
-			       SR_REPLICATE_ERROR_REPLICATED,
-			       pkts_replicated);
-
-  vlib_node_increment_counter (vm, sr_replicate_node.index,
-			       SR_REPLICATE_ERROR_NO_BUFFER_DROPS,
-			       no_buffer_drops);
-
-  return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sr_replicate_node) = {
-  .function = sr_replicate_node_fn,
-  .name = "sr-replicate",
-  .vector_size = sizeof (u32),
-  .format_trace = format_sr_replicate_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-
-  .n_errors = ARRAY_LEN(sr_replicate_error_strings),
-  .error_strings = sr_replicate_error_strings,
-
-  .n_next_nodes = SR_REPLICATE_N_NEXT,
-
-  .next_nodes = {
-        [SR_REPLICATE_NEXT_IP6_LOOKUP] = "ip6-lookup",
-  },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (sr_replicate_node, sr_replicate_node_fn)
-/* *INDENT-ON* */
-
-clib_error_t *
-sr_replicate_init (vlib_main_t * vm)
-{
-  sr_replicate_main_t *msm = &sr_replicate_main;
-
-  msm->vlib_main = vm;
-  msm->vnet_main = vnet_get_main ();
-
-  return 0;
-}
-
-VLIB_INIT_FUNCTION (sr_replicate_init);
-
-#endif /* DPDK */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
-- 
cgit 1.2.3-korg


From 9bea8fb0b0b6377fbead21f4ff6bceb50080329e Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Fri, 3 Feb 2017 04:34:01 -0800
Subject: Next node frame over-flow after replication

Change-Id: I25077dd0739787de4f7512e5a70a62e8c34c28e4
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet/dpo/replicate_dpo.c | 18 ++++++++++++++++--
 test/test_ip_mcast.py        | 16 ++++++++++++----
 2 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index 8bad75ee..c779516f 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -664,6 +664,20 @@ replicate_inline (vlib_main_t * vm,
             /* ship copies to the rest of the buckets */
             for (bucket = 1; bucket < rep0->rep_n_buckets; bucket++)
             {
+                /*
+                 * After the enqueue of the first buffer, and of all subsequent
+                 * buffers in this loop, it is possible that we over-flow the
+                 * frame of the to-next node. When this happens we need to 'put'
+                 * that full frame to the node and get a fresh empty one.
+                 * Note that these are macros with side effects that change
+                 * to_next & n_left_to_next
+                 */
+                if (PREDICT_FALSE(0 == n_left_to_next))
+                {
+                    vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+                    vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+                }
+
                 /* Make a copy */
                 c0 = vlib_buffer_copy(vm, b0);
                 ci0 = vlib_get_buffer_index(vm, c0);
@@ -676,9 +690,9 @@ replicate_inline (vlib_main_t * vm,
                 next0 = dpo0->dpoi_next_node;
                 vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
-                if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+                if (PREDICT_FALSE(c0->flags & VLIB_BUFFER_IS_TRACED))
                 {
-                    replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+                    replicate_trace_t *t = vlib_add_trace (vm, node, c0, sizeof (*t));
                     t->rep_index = repi0;
                     t->dpo = *dpo0;
                 }
diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py
index 028853d2..9b668e42 100644
--- a/test/test_ip_mcast.py
+++ b/test/test_ip_mcast.py
@@ -29,6 +29,14 @@ class MRouteEntryFlags:
     MFIB_ENTRY_FLAG_CONNECTED = 4
     MFIB_ENTRY_FLAG_INHERIT_ACCEPT = 8
 
+#
+# The number of packets sent is set to 90 so that when we replicate more than 3
+# times, which we do for some entries, we will generate more than 256 packets
+# to the next node in the VLIB graph. Thus we are testing the code's correctness
+# handling this over-flow
+#
+N_PKTS_IN_STREAM = 90
+
 
 class TestIPMcast(VppTestCase):
     """ IP Multicast Test Case """
@@ -49,7 +57,7 @@ class TestIPMcast(VppTestCase):
 
     def create_stream_ip4(self, src_if, src_ip, dst_ip):
         pkts = []
-        for i in range(0, 65):
+        for i in range(0, N_PKTS_IN_STREAM):
             info = self.create_packet_info(src_if, src_if)
             payload = self.info_to_payload(info)
             p = (Ether(dst=src_if.local_mac, src=src_if.remote_mac) /
@@ -62,7 +70,7 @@ class TestIPMcast(VppTestCase):
 
     def create_stream_ip6(self, src_if, src_ip, dst_ip):
         pkts = []
-        for i in range(0, 65):
+        for i in range(0, N_PKTS_IN_STREAM):
             info = self.create_packet_info(src_if, src_if)
             payload = self.info_to_payload(info)
             p = (Ether(dst=src_if.local_mac, src=src_if.remote_mac) /
@@ -82,7 +90,7 @@ class TestIPMcast(VppTestCase):
         return capture
 
     def verify_capture_ip4(self, src_if, sent):
-        rxd = self.pg1.get_capture(65)
+        rxd = self.pg1.get_capture(N_PKTS_IN_STREAM)
 
         try:
             capture = self.verify_filter(rxd, sent)
@@ -112,7 +120,7 @@ class TestIPMcast(VppTestCase):
             raise
 
     def verify_capture_ip6(self, src_if, sent):
-        capture = self.pg1.get_capture(65)
+        capture = self.pg1.get_capture(N_PKTS_IN_STREAM)
 
         self.assertEqual(len(capture), len(sent))
 
-- 
cgit 1.2.3-korg


From ce1b4c7f05ce28d7b73eb7ed0a8ea4bd483f09e9 Mon Sep 17 00:00:00 2001
From: Florin Coras <fcoras@cisco.com>
Date: Thu, 26 Jan 2017 14:25:34 -0800
Subject: Basic support for LISP-GPE encapsulated NSH packets

Change-Id: I97fedb0f70dd18ed9bbe985407cc5fe714e8a2e2
Signed-off-by: Florin Coras <fcoras@cisco.com>
---
 src/vnet.am                            |   2 +
 src/vnet/adj/adj_internal.h            |   4 +-
 src/vnet/adj/adj_midchain.c            |  20 +++
 src/vnet/adj/adj_nsh.c                 | 211 ++++++++++++++++++++++
 src/vnet/adj/adj_nsh.h                 |  31 ++++
 src/vnet/dpo/dpo.c                     |   2 +
 src/vnet/dpo/dpo.h                     |   6 +-
 src/vnet/fib/fib_entry_delegate.c      |   4 +
 src/vnet/fib/fib_entry_delegate.h      |   3 +-
 src/vnet/fib/fib_entry_src.c           |   1 +
 src/vnet/fib/fib_path.c                |   4 +
 src/vnet/fib/fib_types.c               |   6 +
 src/vnet/fib/fib_types.h               |  11 +-
 src/vnet/gre/gre.c                     |   3 +
 src/vnet/interface.c                   |   1 +
 src/vnet/interface.h                   |   4 +-
 src/vnet/lisp-cp/control.c             |  32 ++++
 src/vnet/lisp-cp/lisp_api.c            |   2 +
 src/vnet/lisp-cp/lisp_cp_dpo.c         |   5 +
 src/vnet/lisp-cp/lisp_types.c          |  43 ++++-
 src/vnet/lisp-cp/lisp_types.h          |  15 +-
 src/vnet/lisp-gpe/interface.c          | 196 ++++++++++++++++++++-
 src/vnet/lisp-gpe/lisp_gpe.c           |   2 +-
 src/vnet/lisp-gpe/lisp_gpe.h           |   9 +
 src/vnet/lisp-gpe/lisp_gpe_adjacency.c |   8 +-
 src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c | 309 ++++++++++++++++++++++++++++++++-
 src/vnet/lisp-gpe/lisp_gpe_fwd_entry.h |  31 +++-
 src/vnet/mfib/mfib_entry.c             |   1 +
 28 files changed, 944 insertions(+), 22 deletions(-)
 create mode 100644 src/vnet/adj/adj_nsh.c
 create mode 100644 src/vnet/adj/adj_nsh.h

(limited to 'src/vnet/dpo')

diff --git a/src/vnet.am b/src/vnet.am
index 9b148f69..a8cc696f 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -958,12 +958,14 @@ libvnet_la_SOURCES +=				\
   vnet/adj/adj_midchain.c   			\
   vnet/adj/adj_mcast.c   			\
   vnet/adj/adj_l2.c      			\
+  vnet/adj/adj_nsh.c      			\
   vnet/adj/adj.c
 
 nobase_include_HEADERS +=			\
   vnet/adj/adj.h				\
   vnet/adj/adj_types.h				\
   vnet/adj/adj_glean.h  			\
+  vnet/adj/adj_nsh.h  				\
   vnet/adj/adj_nbr.h
 
 ########################################
diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h
index ece59121..30668625 100644
--- a/src/vnet/adj/adj_internal.h
+++ b/src/vnet/adj/adj_internal.h
@@ -20,7 +20,7 @@
 #include <vnet/ip/ip.h>
 #include <vnet/mpls/mpls.h>
 #include <vnet/adj/adj_l2.h>
-
+#include <vnet/adj/adj_nsh.h>
 
 /**
  * big switch to turn on Adjacency debugging
@@ -53,6 +53,8 @@ adj_get_rewrite_node (vnet_link_t linkt)
 	return (mpls_output_node.index);
     case VNET_LINK_ETHERNET:
 	return (adj_l2_rewrite_node.index);
+    case VNET_LINK_NSH:
+        return (adj_nsh_rewrite_node.index);
     case VNET_LINK_ARP:
 	break;
     }
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index 8c6ab5aa..35cdb003 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -16,6 +16,7 @@
 #include <vnet/adj/adj_nbr.h>
 #include <vnet/adj/adj_internal.h>
 #include <vnet/adj/adj_l2.h>
+#include <vnet/adj/adj_nsh.h>
 #include <vnet/adj/adj_midchain.h>
 #include <vnet/ethernet/arp_packet.h>
 #include <vnet/dpo/drop_dpo.h>
@@ -308,6 +309,18 @@ VNET_FEATURE_INIT (adj_midchain_tx_no_count_ethernet, static) = {
     .runs_before = VNET_FEATURES ("error-drop"),
     .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_ETHERNET],
 };
+VNET_FEATURE_INIT (adj_midchain_tx_nsh, static) = {
+    .arc_name = "nsh-output",
+    .node_name = "adj-midchain-tx",
+    .runs_before = VNET_FEATURES ("error-drop"),
+    .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_NSH],
+};
+VNET_FEATURE_INIT (adj_midchain_tx_no_count_nsh, static) = {
+    .arc_name = "nsh-output",
+    .node_name = "adj-midchain-tx-no-count",
+    .runs_before = VNET_FEATURES ("error-drop"),
+    .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_NSH],
+};
 
 static inline u32
 adj_get_midchain_node (vnet_link_t link)
@@ -321,6 +334,8 @@ adj_get_midchain_node (vnet_link_t link)
 	return (mpls_midchain_node.index);
     case VNET_LINK_ETHERNET:
 	return (adj_l2_midchain_node.index);
+    case VNET_LINK_NSH:
+        return (adj_nsh_midchain_node.index);
     case VNET_LINK_ARP:
 	break;
     }
@@ -354,6 +369,11 @@ adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
 	    arc = ethernet_main.output_feature_arc_index;
 	    break;
 	}
+    case VNET_LINK_NSH:
+        {
+          arc = nsh_main_dummy.output_feature_arc_index;
+          break;
+        }
     case VNET_LINK_ARP:
 	ASSERT(0);
 	break;
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
new file mode 100644
index 00000000..9a0f9d8b
--- /dev/null
+++ b/src/vnet/adj/adj_nsh.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/adj/adj_nsh.h>
+#include <vnet/ip/ip.h>
+
+nsh_main_dummy_t nsh_main_dummy;
+
+/**
+ * @brief Trace data for a NSH Midchain
+ */
+typedef struct adj_nsh_trace_t_ {
+    /** Adjacency index taken. */
+    u32 adj_index;
+} adj_nsh_trace_t;
+
+static u8 *
+format_adj_nsh_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    adj_nsh_trace_t * t = va_arg (*args, adj_nsh_trace_t *);
+
+    s = format (s, "adj-idx %d : %U",
+                t->adj_index,
+                format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
+    return s;
+}
+
+typedef enum adj_nsh_rewrite_next_t_
+{
+    ADJ_NSH_REWRITE_NEXT_DROP,
+} adj_gpe_rewrite_next_t;
+
+always_inline uword
+adj_nsh_rewrite_inline (vlib_main_t * vm,
+                       vlib_node_runtime_t * node,
+                       vlib_frame_t * frame,
+                       int is_midchain)
+{
+    u32 * from = vlib_frame_vector_args (frame);
+    u32 n_left_from, n_left_to_next, * to_next, next_index;
+    u32 cpu_index = os_get_cpu_number();
+
+    n_left_from = frame->n_vectors;
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            ip_adjacency_t * adj0;
+            vlib_buffer_t * p0;
+            char *h0;
+            u32 pi0, rw_len0, adj_index0, next0 = 0;
+            u32 tx_sw_if_index0;
+
+            pi0 = to_next[0] = from[0];
+            from += 1;
+            n_left_from -= 1;
+            to_next += 1;
+            n_left_to_next -= 1;
+
+            p0 = vlib_get_buffer (vm, pi0);
+            h0 = vlib_buffer_get_current (p0);
+
+            adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+            /* We should never rewrite a pkt using the MISS adjacency */
+            ASSERT(adj_index0);
+
+            adj0 = adj_get (adj_index0);
+
+            /* Guess we are only writing on simple IP4 header. */
+            vnet_rewrite_one_header(adj0[0], h0, sizeof(ip4_header_t));
+
+            /* Update packet buffer attributes/set output interface. */
+            rw_len0 = adj0[0].rewrite_header.data_bytes;
+            vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
+
+            vlib_increment_combined_counter(&adjacency_counters,
+                                            cpu_index,
+                                            adj_index0,
+                                            /* packet increment */ 0,
+                                            /* byte increment */ rw_len0);
+
+            /* Check MTU of outgoing interface. */
+            if (PREDICT_TRUE((vlib_buffer_length_in_chain (vm, p0)  <=
+                              adj0[0].rewrite_header.max_l3_packet_bytes)))
+            {
+                /* Don't adjust the buffer for ttl issue; icmp-error node wants
+                 * to see the IP headerr */
+                p0->current_data -= rw_len0;
+                p0->current_length += rw_len0;
+                tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+                if (is_midchain)
+                {
+                    adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+                }
+
+                vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+                /*
+                 * Follow the feature ARC. this will result eventually in
+                 * the midchain-tx node
+                 */
+                vnet_feature_arc_start (nsh_main_dummy.output_feature_arc_index,
+                                        tx_sw_if_index0, &next0, p0);
+            }
+            else
+            {
+                /* can't fragment NSH */
+                next0 = ADJ_NSH_REWRITE_NEXT_DROP;
+            }
+
+            if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                adj_nsh_trace_t *tr = vlib_add_trace (vm, node,
+                                                     p0, sizeof (*tr));
+                tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+            }
+
+            vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                             to_next, n_left_to_next,
+                                             pi0, next0);
+        }
+
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+    return frame->n_vectors;
+}
+
+static uword
+adj_nsh_rewrite (vlib_main_t * vm,
+                vlib_node_runtime_t * node,
+                vlib_frame_t * frame)
+{
+    return adj_nsh_rewrite_inline (vm, node, frame, 0);
+}
+
+static uword
+adj_nsh_midchain (vlib_main_t * vm,
+                 vlib_node_runtime_t * node,
+                 vlib_frame_t * frame)
+{
+    return adj_nsh_rewrite_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (adj_nsh_rewrite_node) = {
+    .function = adj_nsh_rewrite,
+    .name = "adj-nsh-rewrite",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_adj_nsh_trace,
+
+    .n_next_nodes = 1,
+    .next_nodes = {
+        [ADJ_NSH_REWRITE_NEXT_DROP] = "error-drop",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_nsh_rewrite_node, adj_nsh_rewrite)
+
+VLIB_REGISTER_NODE (adj_nsh_midchain_node) = {
+    .function = adj_nsh_midchain,
+    .name = "adj-nsh-midchain",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_adj_nsh_trace,
+
+    .n_next_nodes = 1,
+    .next_nodes = {
+        [ADJ_NSH_REWRITE_NEXT_DROP] = "error-drop",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (adj_nsh_midchain_node, adj_nsh_midchain)
+
+/* Built-in ip4 tx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (nsh_output, static) =
+{
+  .arc_name  = "nsh-output",
+  .start_nodes = VNET_FEATURES ("adj-nsh-midchain"),
+  .arc_index_ptr = &nsh_main_dummy.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (nsh_tx_drop, static) =
+{
+  .arc_name = "nsh-output",
+  .node_name = "error-drop",
+  .runs_before = 0,     /* not before any other features */
+};
+/* *INDENT-ON* */
diff --git a/src/vnet/adj/adj_nsh.h b/src/vnet/adj/adj_nsh.h
new file mode 100644
index 00000000..5501fbb9
--- /dev/null
+++ b/src/vnet/adj/adj_nsh.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ADJ_NSH_H__
+#define __ADJ_NSH_H__
+
+#include <vnet/adj/adj.h>
+
+extern vlib_node_registration_t adj_nsh_midchain_node;
+extern vlib_node_registration_t adj_nsh_rewrite_node;
+
+typedef struct _nsh_main_dummy
+{
+  u8 output_feature_arc_index;
+} nsh_main_dummy_t;
+
+extern nsh_main_dummy_t nsh_main_dummy;
+
+#endif
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index cc2fa0eb..d8e075a7 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -98,6 +98,8 @@ vnet_link_to_dpo_proto (vnet_link_t linkt)
         return (DPO_PROTO_MPLS);
     case VNET_LINK_ETHERNET:
         return (DPO_PROTO_ETHERNET);
+    case VNET_LINK_NSH:
+        return (DPO_PROTO_NSH);
     case VNET_LINK_ARP:
 	break;
     }
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
index aff4e1b8..48b92d3d 100644
--- a/src/vnet/dpo/dpo.h
+++ b/src/vnet/dpo/dpo.h
@@ -67,9 +67,10 @@ typedef enum dpo_proto_t_
     DPO_PROTO_IP6,
     DPO_PROTO_ETHERNET,
     DPO_PROTO_MPLS,
+    DPO_PROTO_NSH,
 } __attribute__((packed)) dpo_proto_t;
 
-#define DPO_PROTO_NUM ((dpo_proto_t)(DPO_PROTO_MPLS+1))
+#define DPO_PROTO_NUM ((dpo_proto_t)(DPO_PROTO_NSH+1))
 #define DPO_PROTO_NONE ((dpo_proto_t)(DPO_PROTO_NUM+1))
 
 #define DPO_PROTOS {		\
@@ -77,11 +78,12 @@ typedef enum dpo_proto_t_
     [DPO_PROTO_IP6]  = "ip6",	\
     [DPO_PROTO_ETHERNET]  = "ethernet", \
     [DPO_PROTO_MPLS] = "mpls",	\
+    [DPO_PROTO_NSH] = "nsh",    \
 }
 
 #define FOR_EACH_DPO_PROTO(_proto)    \
     for (_proto = DPO_PROTO_IP4;      \
-	 _proto <= DPO_PROTO_MPLS;    \
+	 _proto <= DPO_PROTO_NSH;    \
 	 _proto++)
 
 /**
diff --git a/src/vnet/fib/fib_entry_delegate.c b/src/vnet/fib/fib_entry_delegate.c
index efe402d1..70840b16 100644
--- a/src/vnet/fib/fib_entry_delegate.c
+++ b/src/vnet/fib/fib_entry_delegate.c
@@ -122,6 +122,8 @@ fib_entry_chain_type_to_delegate_type (fib_forward_chain_type_t fct)
     case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
     case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
         break;
+    case FIB_FORW_CHAIN_TYPE_NSH:
+        return (FIB_ENTRY_DELEGATE_CHAIN_NSH);
     }
     ASSERT(0);
     return (FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4);
@@ -142,6 +144,8 @@ fib_entry_delegate_type_to_chain_type (fib_entry_delegate_type_t fdt)
         return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
     case FIB_ENTRY_DELEGATE_CHAIN_ETHERNET:
         return (FIB_FORW_CHAIN_TYPE_ETHERNET);
+    case FIB_ENTRY_DELEGATE_CHAIN_NSH:
+        return (FIB_FORW_CHAIN_TYPE_NSH);
     case FIB_ENTRY_DELEGATE_COVERED:
     case FIB_ENTRY_DELEGATE_ATTACHED_IMPORT:
     case FIB_ENTRY_DELEGATE_ATTACHED_EXPORT:
diff --git a/src/vnet/fib/fib_entry_delegate.h b/src/vnet/fib/fib_entry_delegate.h
index 6d3a6549..d9183c5f 100644
--- a/src/vnet/fib/fib_entry_delegate.h
+++ b/src/vnet/fib/fib_entry_delegate.h
@@ -35,6 +35,7 @@ typedef enum fib_entry_delegate_type_t_ {
     FIB_ENTRY_DELEGATE_CHAIN_MPLS_EOS = FIB_FORW_CHAIN_TYPE_MPLS_EOS,
     FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
     FIB_ENTRY_DELEGATE_CHAIN_ETHERNET = FIB_FORW_CHAIN_TYPE_ETHERNET,
+    FIB_ENTRY_DELEGATE_CHAIN_NSH = FIB_FORW_CHAIN_TYPE_NSH,
     /**
      * Dependency list of covered entries.
      * these are more specific entries that are interested in changes
@@ -51,7 +52,7 @@ typedef enum fib_entry_delegate_type_t_ {
 #define FOR_EACH_DELEGATE_CHAIN(_entry, _fdt, _fed, _body)    \
 {                                                             \
     for (_fdt = FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4;         \
-         _fdt <= FIB_ENTRY_DELEGATE_CHAIN_ETHERNET;           \
+         _fdt <= FIB_ENTRY_DELEGATE_CHAIN_NSH;                \
          _fdt++)                                              \
     {                                                         \
         _fed = fib_entry_delegate_get(_entry, _fdt);          \
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index d54787cd..57109153 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -355,6 +355,7 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
             break;
         }
         case FIB_FORW_CHAIN_TYPE_ETHERNET:
+        case FIB_FORW_CHAIN_TYPE_NSH:
 	    ASSERT(0);
 	    break;
         }
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 080057f3..aa545b5e 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -1755,6 +1755,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
 	    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
 	    case FIB_FORW_CHAIN_TYPE_ETHERNET:
+	    case FIB_FORW_CHAIN_TYPE_NSH:
 	    {
 		adj_index_t ai;
 
@@ -1787,6 +1788,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
 	    case FIB_FORW_CHAIN_TYPE_ETHERNET:
+	    case FIB_FORW_CHAIN_TYPE_NSH:
 		ASSERT(0);
 		break;
 	    }
@@ -1809,6 +1811,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
 	    case FIB_FORW_CHAIN_TYPE_ETHERNET:
+	    case FIB_FORW_CHAIN_TYPE_NSH:
 		ASSERT(0);
 		break;
             }
@@ -1824,6 +1827,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
 	    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
 	    case FIB_FORW_CHAIN_TYPE_ETHERNET:
+	    case FIB_FORW_CHAIN_TYPE_NSH:
                 break;
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c
index 3ecb38e8..2837a59d 100644
--- a/src/vnet/fib/fib_types.c
+++ b/src/vnet/fib/fib_types.c
@@ -279,6 +279,8 @@ fib_forw_chain_type_from_dpo_proto (dpo_proto_t proto)
 	return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
     case DPO_PROTO_ETHERNET:
 	return (FIB_FORW_CHAIN_TYPE_ETHERNET);
+    case DPO_PROTO_NSH:
+        return (FIB_FORW_CHAIN_TYPE_NSH);
     }
     ASSERT(0);
     return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
@@ -297,6 +299,8 @@ fib_forw_chain_type_to_link_type (fib_forward_chain_type_t fct)
 	return (VNET_LINK_IP6);
     case FIB_FORW_CHAIN_TYPE_ETHERNET:
 	return (VNET_LINK_ETHERNET);
+    case FIB_FORW_CHAIN_TYPE_NSH:
+        return (VNET_LINK_NSH);
     case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
 	/*
 	 * insufficient information to to convert
@@ -322,6 +326,8 @@ fib_forw_chain_type_to_dpo_proto (fib_forward_chain_type_t fct)
 	return (DPO_PROTO_IP6);
     case FIB_FORW_CHAIN_TYPE_ETHERNET:
 	return (DPO_PROTO_ETHERNET);
+    case FIB_FORW_CHAIN_TYPE_NSH:
+        return (DPO_PROTO_NSH);
     case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
     case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
 	return (DPO_PROTO_MPLS);
diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h
index c51bc9c0..05e0e0af 100644
--- a/src/vnet/fib/fib_types.h
+++ b/src/vnet/fib/fib_types.h
@@ -105,10 +105,14 @@ typedef enum fib_forward_chain_type_t_ {
     FIB_FORW_CHAIN_TYPE_MCAST_IP6,
     /**
      * Contribute an object that is to be used to forward Ethernet packets.
+     */
+    FIB_FORW_CHAIN_TYPE_ETHERNET,
+    /**
+     * Contribute an object that is to be used to forward NSH packets.
      * This is last in the list since it is not valid for many FIB objects,
      * and thus their array of per-chain-type DPOs can be sized smaller.
      */
-    FIB_FORW_CHAIN_TYPE_ETHERNET,
+    FIB_FORW_CHAIN_TYPE_NSH,
 }  __attribute__ ((packed)) fib_forward_chain_type_t;
 
 #define FIB_FORW_CHAINS {					\
@@ -119,14 +123,15 @@ typedef enum fib_forward_chain_type_t_ {
     [FIB_FORW_CHAIN_TYPE_MCAST_IP6]     = "multicast-ip6",	\
     [FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS]  = "mpls-neos",	        \
     [FIB_FORW_CHAIN_TYPE_MPLS_EOS]      = "mpls-eos",	        \
+    [FIB_FORW_CHAIN_TYPE_NSH]           = "nsh",                \
 }
 
-#define FIB_FORW_CHAIN_NUM (FIB_FORW_CHAIN_TYPE_MPLS_ETHERNET+1)
+#define FIB_FORW_CHAIN_NUM (FIB_FORW_CHAIN_TYPE_NSH+1)
 #define FIB_FORW_CHAIN_MPLS_NUM (FIB_FORW_CHAIN_TYPE_MPLS_EOS+1)
 
 #define FOR_EACH_FIB_FORW_CHAIN(_item)			  \
     for (_item = FIB_FORW_CHAIN_TYPE_UNICAST_IP4;   	  \
-	 _item <= FIB_FORW_CHAIN_TYPE_ETHERNET;		  \
+	 _item <= FIB_FORW_CHAIN_TYPE_NSH;		  \
 	 _item++)
 
 #define FOR_EACH_FIB_FORW_MPLS_CHAIN(_item)		  \
diff --git a/src/vnet/gre/gre.c b/src/vnet/gre/gre.c
index 0faed13e..cd43a3af 100644
--- a/src/vnet/gre/gre.c
+++ b/src/vnet/gre/gre.c
@@ -177,6 +177,9 @@ gre_proto_from_vnet_link (vnet_link_t link)
         return (GRE_PROTOCOL_teb);
     case VNET_LINK_ARP:
         return (GRE_PROTOCOL_arp);
+    case VNET_LINK_NSH:
+        ASSERT(0);
+        break;
     }
     ASSERT(0);
     return (GRE_PROTOCOL_ip4);
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index 9454ac18..2a1e70e8 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -1364,6 +1364,7 @@ vnet_link_to_l3_proto (vnet_link_t link)
     case VNET_LINK_ARP:
       return (VNET_L3_PACKET_TYPE_ARP);
     case VNET_LINK_ETHERNET:
+    case VNET_LINK_NSH:
       ASSERT (0);
       break;
     }
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index d42e5fda..7b791751 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -240,6 +240,7 @@ typedef enum vnet_link_t_
   VNET_LINK_MPLS,
   VNET_LINK_ETHERNET,
   VNET_LINK_ARP,
+  VNET_LINK_NSH,
 } __attribute__ ((packed)) vnet_link_t;
 
 #define VNET_LINKS {                   \
@@ -248,13 +249,14 @@ typedef enum vnet_link_t_
     [VNET_LINK_IP6] = "ipv6",          \
     [VNET_LINK_MPLS] = "mpls",         \
     [VNET_LINK_ARP] = "arp",	       \
+    [VNET_LINK_NSH] = "nsh",           \
 }
 
 /**
  * @brief Number of link types. Not part of the enum so it does not have to be included in
  * switch statements
  */
-#define VNET_LINK_NUM (VNET_LINK_ARP+1)
+#define VNET_LINK_NUM (VNET_LINK_NSH+1)
 
 /**
  * @brief Convert a link to to an Ethertype
diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c
index cc73dfc5..f0383e16 100644
--- a/src/vnet/lisp-cp/control.c
+++ b/src/vnet/lisp-cp/control.c
@@ -2700,6 +2700,11 @@ get_src_and_dst_eids_from_buffer (lisp_cp_main_t * lcm, vlib_buffer_t * b,
       gid_address_vni (dst) = vni;
       gid_address_vni (src) = vni;
     }
+  else if (LISP_AFI_LCAF == type)
+    {
+      /* Eventually extend this to support NSH and other */
+      ASSERT (0);
+    }
 }
 
 static uword
@@ -2818,6 +2823,14 @@ lisp_cp_lookup_l2 (vlib_main_t * vm,
   return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_MAC));
 }
 
+static uword
+lisp_cp_lookup_nsh (vlib_main_t * vm,
+		    vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+  /* TODO decide if NSH should be propagated as LCAF or not */
+  return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_LCAF));
+}
+
 /* *INDENT-OFF* */
 VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = {
   .function = lisp_cp_lookup_ip4,
@@ -2875,6 +2888,25 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_l2_node) = {
 };
 /* *INDENT-ON* */
 
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lisp_cp_lookup_nsh_node) = {
+  .function = lisp_cp_lookup_nsh,
+  .name = "lisp-cp-lookup-nsh",
+  .vector_size = sizeof (u32),
+  .format_trace = format_lisp_cp_lookup_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = LISP_CP_LOOKUP_N_ERROR,
+  .error_strings = lisp_cp_lookup_error_strings,
+
+  .n_next_nodes = LISP_CP_LOOKUP_N_NEXT,
+
+  .next_nodes = {
+      [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop",
+  },
+};
+/* *INDENT-ON* */
+
 /* lisp_cp_input statistics */
 #define foreach_lisp_cp_input_error                               \
 _(DROP, "drop")                                                   \
diff --git a/src/vnet/lisp-cp/lisp_api.c b/src/vnet/lisp-cp/lisp_api.c
index a877540b..78d32e17 100644
--- a/src/vnet/lisp-cp/lisp_api.c
+++ b/src/vnet/lisp-cp/lisp_api.c
@@ -714,6 +714,8 @@ fid_type_to_api_type (fid_address_t * fid)
 
     case FID_ADDR_MAC:
       return 2;
+    case FID_ADDR_NSH:
+      return 3;
     }
 
   return ~0;
diff --git a/src/vnet/lisp-cp/lisp_cp_dpo.c b/src/vnet/lisp-cp/lisp_cp_dpo.c
index 185b07a2..848f621e 100644
--- a/src/vnet/lisp-cp/lisp_cp_dpo.c
+++ b/src/vnet/lisp-cp/lisp_cp_dpo.c
@@ -79,12 +79,17 @@ const static char *const lisp_cp_ethernet_nodes[] = {
   NULL,
 };
 
+const static char *const lisp_cp_nsh_nodes[] = {
+  "lisp-cp-lookup-nsh",
+  NULL,
+};
 
 const static char *const *const lisp_cp_nodes[DPO_PROTO_NUM] = {
   [DPO_PROTO_IP4] = lisp_cp_ip4_nodes,
   [DPO_PROTO_IP6] = lisp_cp_ip6_nodes,
   [DPO_PROTO_ETHERNET] = lisp_cp_ethernet_nodes,
   [DPO_PROTO_MPLS] = NULL,
+  [DPO_PROTO_NSH] = lisp_cp_nsh_nodes,
 };
 
 clib_error_t *
diff --git a/src/vnet/lisp-cp/lisp_types.c b/src/vnet/lisp-cp/lisp_types.c
index 748905d2..4a3d05b7 100644
--- a/src/vnet/lisp-cp/lisp_types.c
+++ b/src/vnet/lisp-cp/lisp_types.c
@@ -202,6 +202,20 @@ format_mac_address (u8 * s, va_list * args)
 		 a[0], a[1], a[2], a[3], a[4], a[5]);
 }
 
+uword
+unformat_nsh_address (unformat_input_t * input, va_list * args)
+{
+  nsh_t *a = va_arg (*args, nsh_t *);
+  return unformat (input, "SPI:%d SI:%d", &a->spi, &a->si);
+}
+
+u8 *
+format_nsh_address (u8 * s, va_list * args)
+{
+  nsh_t *a = va_arg (*args, nsh_t *);
+  return format (s, "SPI:%d SI:%d", a->spi, a->si);
+}
+
 u8 *
 format_fid_address (u8 * s, va_list * args)
 {
@@ -211,9 +225,10 @@ format_fid_address (u8 * s, va_list * args)
     {
     case FID_ADDR_IP_PREF:
       return format (s, "%U", format_ip_prefix, &fid_addr_ippref (a));
-
     case FID_ADDR_MAC:
       return format (s, "%U", format_mac_address, &fid_addr_mac (a));
+    case FID_ADDR_NSH:
+      return format (s, "%U", format_nsh_address, &fid_addr_nsh (a));
 
     default:
       clib_warning ("Can't format fid address type %d!", fid_addr_type (a));
@@ -239,6 +254,8 @@ format_gid_address (u8 * s, va_list * args)
     case GID_ADDR_MAC:
       return format (s, "[%d] %U", gid_address_vni (a), format_mac_address,
 		     &gid_address_mac (a));
+    case GID_ADDR_NSH:
+      return format (s, "%U", format_nsh_address, &gid_address_nsh (a));
     default:
       clib_warning ("Can't format gid type %d", type);
       return 0;
@@ -252,6 +269,7 @@ unformat_fid_address (unformat_input_t * i, va_list * args)
   fid_address_t *a = va_arg (*args, fid_address_t *);
   ip_prefix_t ippref;
   u8 mac[6] = { 0 };
+  nsh_t nsh;
 
   if (unformat (i, "%U", unformat_ip_prefix, &ippref))
     {
@@ -263,6 +281,11 @@ unformat_fid_address (unformat_input_t * i, va_list * args)
       fid_addr_type (a) = FID_ADDR_MAC;
       mac_copy (fid_addr_mac (a), mac);
     }
+  else if (unformat (i, "%U", unformat_nsh_address, &nsh))
+    {
+      fid_addr_type (a) = FID_ADDR_NSH;
+      nsh_copy (&fid_addr_nsh (a), mac);
+    }
   else
     return 0;
 
@@ -301,6 +324,7 @@ unformat_gid_address (unformat_input_t * input, va_list * args)
   u8 mac[6] = { 0 };
   ip_prefix_t ippref;
   fid_address_t sim1, sim2;
+  nsh_t nsh;
 
   memset (&ippref, 0, sizeof (ippref));
   memset (&sim1, 0, sizeof (sim1));
@@ -323,6 +347,11 @@ unformat_gid_address (unformat_input_t * input, va_list * args)
       mac_copy (gid_address_mac (a), mac);
       gid_address_type (a) = GID_ADDR_MAC;
     }
+  else if (unformat (input, "%U", unformat_nsh_address, &nsh))
+    {
+      nsh_copy (&gid_address_nsh (a), &nsh);
+      gid_address_type (a) = GID_ADDR_NSH;
+    }
   else
     return 0;
 
@@ -588,6 +617,10 @@ fid_addr_parse (u8 * p, fid_address_t * a)
 
     case FID_ADDR_IP_PREF:
       return ip_address_parse (p, afi, ip_addr);
+
+    case FID_ADDR_NSH:
+      ASSERT (0);
+      break;
     }
   return ~0;
 }
@@ -917,6 +950,12 @@ mac_copy (void *dst, void *src)
   clib_memcpy (dst, src, 6);
 }
 
+void
+nsh_copy (void *dst, void *src)
+{
+  clib_memcpy (dst, src, sizeof (nsh_t));
+}
+
 void
 sd_copy (void *dst, void *src)
 {
@@ -1083,6 +1122,8 @@ fid_address_length (fid_address_t * a)
       return ip_prefix_length (&fid_addr_ippref (a));
     case FID_ADDR_MAC:
       return 0;
+    case FID_ADDR_NSH:
+      return 0;
     }
   return 0;
 }
diff --git a/src/vnet/lisp-cp/lisp_types.h b/src/vnet/lisp-cp/lisp_types.h
index ac58b894..e43f5ab0 100644
--- a/src/vnet/lisp-cp/lisp_types.h
+++ b/src/vnet/lisp-cp/lisp_types.h
@@ -89,6 +89,7 @@ typedef enum
   GID_ADDR_LCAF,
   GID_ADDR_MAC,
   GID_ADDR_SRC_DST,
+  GID_ADDR_NSH,
   GID_ADDR_NO_ADDRESS,
   GID_ADDR_TYPES
 } gid_address_type_t;
@@ -106,7 +107,8 @@ typedef enum
 typedef enum fid_addr_type_t_
 {
   FID_ADDR_IP_PREF,
-  FID_ADDR_MAC
+  FID_ADDR_MAC,
+  FID_ADDR_NSH
 } __attribute__ ((packed)) fid_addr_type_t;
 
 /* flat address type */
@@ -116,6 +118,7 @@ typedef struct
   {
     ip_prefix_t ippref;
     u8 mac[6];
+    u32 nsh;
   };
   fid_addr_type_t type;
 } fid_address_t;
@@ -124,6 +127,7 @@ typedef fid_address_t dp_address_t;
 
 #define fid_addr_ippref(_a) (_a)->ippref
 #define fid_addr_mac(_a) (_a)->mac
+#define fid_addr_nsh(_a) (_a)->nsh
 #define fid_addr_type(_a) (_a)->type
 u8 *format_fid_address (u8 * s, va_list * args);
 
@@ -153,6 +157,12 @@ typedef struct
 #define vni_mask_len(_a) (_a)->vni_mask_len
 #define vni_gid(_a) (_a)->gid_addr
 
+typedef struct
+{
+  u32 spi;
+  u8 si;
+} nsh_t;
+
 typedef struct
 {
   /* the union needs to be at the beginning! */
@@ -177,6 +187,7 @@ typedef struct _gid_address_t
     lcaf_t lcaf;
     u8 mac[6];
     source_dest_t sd;
+    nsh_t nsh;
   };
   u8 type;
   u32 vni;
@@ -232,6 +243,7 @@ void gid_address_ip_set (gid_address_t * dst, void *src, u8 version);
 #define gid_address_ip_version(_a) ip_addr_version(&gid_address_ip(_a))
 #define gid_address_lcaf(_a) (_a)->lcaf
 #define gid_address_mac(_a) (_a)->mac
+#define gid_address_nsh(_a) (_a)->nsh
 #define gid_address_vni(_a) (_a)->vni
 #define gid_address_vni_mask(_a) (_a)->vni_mask
 #define gid_address_sd_dst_ippref(_a) sd_dst_ippref(&(_a)->sd)
@@ -249,6 +261,7 @@ void gid_address_ip_set (gid_address_t * dst, void *src, u8 version);
   _(ip_prefix)                    \
   _(lcaf)                         \
   _(mac)                          \
+  _(nsh)                          \
   _(sd)
 
 /* *INDENT-OFF* */
diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c
index 3288b241..d12dc362 100644
--- a/src/vnet/lisp-gpe/interface.c
+++ b/src/vnet/lisp-gpe/interface.c
@@ -201,7 +201,7 @@ VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = {
 
 typedef struct
 {
-  u32 lb_index;
+  u32 dpo_index;
 } l2_lisp_gpe_tx_trace_t;
 
 static u8 *
@@ -211,7 +211,7 @@ format_l2_lisp_gpe_tx_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   l2_lisp_gpe_tx_trace_t *t = va_arg (*args, l2_lisp_gpe_tx_trace_t *);
 
-  s = format (s, "L2-LISP-GPE-TX: load-balance %d", t->lb_index);
+  s = format (s, "L2-LISP-GPE-TX: load-balance %d", t->dpo_index);
   return s;
 }
 
@@ -278,7 +278,7 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
 	    {
 	      l2_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0,
 							   sizeof (*tr));
-	      tr->lb_index = lbi0;
+	      tr->dpo_index = lbi0;
 	    }
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, l2_arc_to_lb);
@@ -306,6 +306,110 @@ VNET_DEVICE_CLASS (l2_lisp_gpe_device_class,static) = {
 };
 /* *INDENT-ON* */
 
+typedef struct
+{
+  u32 dpo_index;
+} nsh_lisp_gpe_tx_trace_t;
+
+u8 *
+format_nsh_lisp_gpe_tx_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nsh_lisp_gpe_tx_trace_t *t = va_arg (*args, nsh_lisp_gpe_tx_trace_t *);
+
+  s = format (s, "NSH-GPE-TX: tunnel %d", t->dpo_index);
+  return s;
+}
+
+/**
+ * @brief LISP-GPE interface TX for NSH overlays.
+ * @node nsh_lisp_gpe_interface_tx
+ *
+ * The NSH LISP-GPE interface TX function.
+ *
+ * @param[in]   vm        vlib_main_t corresponding to the current thread.
+ * @param[in]   node      vlib_node_runtime_t data for this node.
+ * @param[in]   frame     vlib_frame_t whose contents should be dispatched.
+ *
+ * @return number of vectors in frame.
+ */
+static uword
+nsh_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
+			   vlib_frame_t * from_frame)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  lisp_gpe_main_t *lgm = &lisp_gpe_main;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  vlib_buffer_t *b0;
+	  u32 bi0;
+	  u32 *nsh0, next0;
+	  const dpo_id_t *dpo0;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  nsh0 = vlib_buffer_get_current (b0);
+
+	  vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_LCAF;
+
+	  /* lookup SPI + SI (second word of the NSH header).
+	   * NB: Load balancing was done by the control plane */
+	  dpo0 = lisp_nsh_fib_lookup (lgm, nsh0[1]);
+
+	  next0 = dpo0->dpoi_next_node;
+	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      nsh_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0,
+							    sizeof (*tr));
+	      tr->dpo_index = dpo0->dpoi_index;
+	    }
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return from_frame->n_vectors;
+}
+
+static u8 *
+format_nsh_lisp_gpe_name (u8 * s, va_list * args)
+{
+  u32 dev_instance = va_arg (*args, u32);
+  return format (s, "nsh_lisp_gpe%d", dev_instance);
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (nsh_lisp_gpe_device_class,static) = {
+  .name = "NSH_LISP_GPE",
+  .format_device_name = format_nsh_lisp_gpe_name,
+  .format_tx_trace = format_nsh_lisp_gpe_tx_trace,
+  .tx_function = nsh_lisp_gpe_interface_tx,
+};
+/* *INDENT-ON* */
+
 static vnet_hw_interface_t *
 lisp_gpe_create_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table,
 		       vnet_device_class_t * dev_class,
@@ -615,6 +719,72 @@ lisp_gpe_del_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id)
   lisp_gpe_remove_iface (lgm, hip[0], bd_index, &lgm->l2_ifaces);
 }
 
+/**
+ * @brief Add LISP-GPE NSH interface.
+ *
+ * Creates LISP-GPE interface, sets it in L3 mode.
+ *
+ * @param[in]   lgm     Reference to @ref lisp_gpe_main_t.
+ * @param[in]   a       Parameters to create interface.
+ *
+ * @return sw_if_index.
+ */
+u32
+lisp_gpe_add_nsh_iface (lisp_gpe_main_t * lgm)
+{
+  vnet_main_t *vnm = lgm->vnet_main;
+  tunnel_lookup_t *nsh_ifaces = &lgm->nsh_ifaces;
+  vnet_hw_interface_t *hi;
+  uword *hip, *si;
+
+  hip = hash_get (nsh_ifaces->hw_if_index_by_dp_table, 0);
+
+  if (hip)
+    {
+      clib_warning ("NSH interface 0 already exists");
+      return ~0;
+    }
+
+  si = hash_get (nsh_ifaces->sw_if_index_by_vni, 0);
+  if (si)
+    {
+      clib_warning ("NSH interface already exists");
+      return ~0;
+    }
+
+  /* create lisp iface and populate tunnel tables */
+  hi = lisp_gpe_create_iface (lgm, 0, 0,
+			      &nsh_lisp_gpe_device_class, &lgm->nsh_ifaces);
+
+  /* enable interface */
+  vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+			       VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+  vnet_hw_interface_set_flags (vnm, hi->hw_if_index,
+			       VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+  return (hi->sw_if_index);
+}
+
+/**
+ * @brief Del LISP-GPE NSH interface.
+ *
+ */
+void
+lisp_gpe_del_nsh_iface (lisp_gpe_main_t * lgm)
+{
+  tunnel_lookup_t *nsh_ifaces = &lgm->nsh_ifaces;
+  uword *hip;
+
+  hip = hash_get (nsh_ifaces->hw_if_index_by_dp_table, 0);
+
+  if (hip == 0)
+    {
+      clib_warning ("The NSH 0 interface doesn't exist");
+      return;
+    }
+  lisp_gpe_remove_iface (lgm, hip[0], 0, &lgm->nsh_ifaces);
+}
+
 static clib_error_t *
 lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input,
 				   vlib_cli_command_t * cmd)
@@ -623,6 +793,7 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input,
   u8 is_add = 1;
   u32 table_id, vni, bd_id;
   u8 vni_is_set = 0, vrf_is_set = 0, bd_index_is_set = 0;
+  u8 nsh_iface = 0;
 
   if (vnet_lisp_gpe_enable_disable_status () == 0)
     {
@@ -651,6 +822,10 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input,
 	{
 	  bd_index_is_set = 1;
 	}
+      else if (unformat (line_input, "nsh"))
+	{
+	  nsh_iface = 1;
+	}
       else
 	{
 	  return clib_error_return (0, "parse error: '%U'",
@@ -689,6 +864,21 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input,
 	lisp_gpe_tenant_l3_iface_unlock (vni);
     }
 
+  if (nsh_iface)
+    {
+      if (is_add)
+	{
+	  if (~0 == lisp_gpe_add_nsh_iface (&lisp_gpe_main))
+	    {
+	      return clib_error_return (0, "NSH interface not created");
+	    }
+	  else
+	    {
+	      lisp_gpe_del_nsh_iface (&lisp_gpe_main);
+	    }
+	}
+    }
+
   return (NULL);
 }
 
diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c
index e78d45c9..e76c03f0 100644
--- a/src/vnet/lisp-gpe/lisp_gpe.c
+++ b/src/vnet/lisp-gpe/lisp_gpe.c
@@ -151,6 +151,7 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm,
   gid_address_copy (&a->lcl_eid, leid);
   gid_address_copy (&a->rmt_eid, reid);
   a->locator_pairs = pairs;
+  a->action = action;
 
   rv = vnet_lisp_gpe_add_del_fwd_entry (a, 0);
   if (0 != rv)
@@ -291,7 +292,6 @@ format_vnet_lisp_gpe_status (u8 * s, va_list * args)
   return format (s, "%s", lgm->is_en ? "enabled" : "disabled");
 }
 
-
 /** LISP-GPE init function. */
 clib_error_t *
 lisp_gpe_init (vlib_main_t * vm)
diff --git a/src/vnet/lisp-gpe/lisp_gpe.h b/src/vnet/lisp-gpe/lisp_gpe.h
index 3288c99f..e92df385 100644
--- a/src/vnet/lisp-gpe/lisp_gpe.h
+++ b/src/vnet/lisp-gpe/lisp_gpe.h
@@ -119,6 +119,15 @@ typedef struct lisp_gpe_main
   /** Load-balance for a miss in the table */
   dpo_id_t l2_lb_cp_lkup;
 
+  /* NSH data structures
+   * ================== */
+
+    BVT (clib_bihash) nsh_fib;
+
+  tunnel_lookup_t nsh_ifaces;
+
+  const dpo_id_t *nsh_cp_lkup;
+
   /** convenience */
   vlib_main_t *vlib_main;
   vnet_main_t *vnet_main;
diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
index 8c96a25c..1dbf8677 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
@@ -211,6 +211,8 @@ lisp_gpe_adj_proto_from_vnet_link_type (vnet_link_t linkt)
       return (LISP_GPE_NEXT_PROTO_IP6);
     case VNET_LINK_ETHERNET:
       return (LISP_GPE_NEXT_PROTO_ETHERNET);
+    case VNET_LINK_NSH:
+      return (LISP_GPE_NEXT_PROTO_NSH);
     default:
       ASSERT (0);
     }
@@ -254,14 +256,14 @@ lisp_gpe_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
   ladj = pool_elt_at_index (lisp_adj_pool, lai);
   lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
   linkt = adj_get_link_type (ai);
-
   adj_nbr_midchain_update_rewrite
     (ai, lisp_gpe_fixup,
      (VNET_LINK_ETHERNET == linkt ?
       ADJ_MIDCHAIN_FLAG_NO_COUNT :
       ADJ_MIDCHAIN_FLAG_NONE),
-     lisp_gpe_tunnel_build_rewrite
-     (lgt, ladj, lisp_gpe_adj_proto_from_vnet_link_type (linkt)));
+     lisp_gpe_tunnel_build_rewrite (lgt, ladj,
+				    lisp_gpe_adj_proto_from_vnet_link_type
+				    (linkt)));
 
   lisp_gpe_adj_stack_one (ladj, ai);
 }
diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
index 7ad8679e..e51b585e 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
@@ -340,10 +340,14 @@ gid_to_dp_address (gid_address_t * g, dp_address_t * d)
       d->type = FID_ADDR_IP_PREF;
       break;
     case GID_ADDR_MAC:
-    default:
       mac_copy (&d->mac, &gid_address_mac (g));
       d->type = FID_ADDR_MAC;
       break;
+    case GID_ADDR_NSH:
+    default:
+      d->nsh = gid_address_nsh (g).spi << 8 | gid_address_nsh (g).si;
+      d->type = FID_ADDR_NSH;
+      break;
     }
 }
 
@@ -671,7 +675,7 @@ del_l2_fwd_entry (lisp_gpe_main_t * lgm,
 }
 
 /**
- * @brief Construct and insert the forwarding information used by a L2 entry
+ * @brief Construct and insert the forwarding information used by an L2 entry
  */
 static void
 lisp_gpe_l2_update_fwding (lisp_gpe_fwd_entry_t * lfe)
@@ -688,7 +692,16 @@ lisp_gpe_l2_update_fwding (lisp_gpe_fwd_entry_t * lfe)
     }
   else
     {
-      dpo_copy (&dpo, &lgm->l2_lb_cp_lkup);
+      switch (lfe->action)
+	{
+	case SEND_MAP_REQUEST:
+	  dpo_copy (&dpo, &lgm->l2_lb_cp_lkup);
+	  break;
+	case NO_ACTION:
+	case FORWARD_NATIVE:
+	case DROP:
+	  dpo_copy (&dpo, drop_dpo_get (DPO_PROTO_ETHERNET));
+	}
     }
 
   /* add entry to l2 lisp fib */
@@ -784,6 +797,276 @@ add_l2_fwd_entry (lisp_gpe_main_t * lgm,
   return 0;
 }
 
+/**
+ * @brief Lookup NSH SD FIB entry
+ *
+ * Does an SPI+SI lookup in the NSH LISP FIB.
+ *
+ * @param[in]   lgm             Reference to @ref lisp_gpe_main_t.
+ * @param[in]   spi_si          SPI + SI.
+ *
+ * @return next node index.
+ */
+const dpo_id_t *
+lisp_nsh_fib_lookup (lisp_gpe_main_t * lgm, u32 spi_si)
+{
+  int rv;
+  BVT (clib_bihash_kv) kv, value;
+
+  memset (&kv, 0, sizeof (kv));
+  kv.key[0] = spi_si;
+  rv = BV (clib_bihash_search_inline_2) (&lgm->nsh_fib, &kv, &value);
+
+  if (rv != 0)
+    {
+      return lgm->nsh_cp_lkup;
+    }
+  else
+    {
+      lisp_gpe_fwd_entry_t *lfe;
+      lfe = pool_elt_at_index (lgm->lisp_fwd_entry_pool, value.value);
+      return &lfe->nsh.choice;
+    }
+}
+
+/**
+ * @brief Add/del NSH FIB entry
+ *
+ * Inserts value in NSH FIB keyed by SPI+SI. If entry is
+ * overwritten the associated value is returned.
+ *
+ * @param[in]   lgm             Reference to @ref lisp_gpe_main_t.
+ * @param[in]   spi_si          SPI + SI.
+ * @param[in]   dpo             Load balanced mapped to SPI + SI
+ *
+ * @return ~0 or value of overwritten entry.
+ */
+static u32
+lisp_nsh_fib_add_del_entry (u32 spi_si, u32 lfei, u8 is_add)
+{
+  lisp_gpe_main_t *lgm = &lisp_gpe_main;
+  BVT (clib_bihash_kv) kv, value;
+  u32 old_val = ~0;
+
+  memset (&kv, 0, sizeof (kv));
+  kv.key[0] = spi_si;
+  kv.value = 0ULL;
+
+  if (BV (clib_bihash_search) (&lgm->nsh_fib, &kv, &value) == 0)
+    old_val = value.value;
+
+  if (!is_add)
+    BV (clib_bihash_add_del) (&lgm->nsh_fib, &kv, 0 /* is_add */ );
+  else
+    {
+      kv.value = lfei;
+      BV (clib_bihash_add_del) (&lgm->nsh_fib, &kv, 1 /* is_add */ );
+    }
+  return old_val;
+}
+
+#define NSH_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define NSH_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+static void
+nsh_fib_init (lisp_gpe_main_t * lgm)
+{
+  BV (clib_bihash_init) (&lgm->nsh_fib, "nsh fib",
+			 1 << max_log2 (NSH_FIB_DEFAULT_HASH_NUM_BUCKETS),
+			 NSH_FIB_DEFAULT_HASH_MEMORY_SIZE);
+
+  /*
+   * the result from a 'miss' in a NSH Table
+   */
+  lgm->nsh_cp_lkup = lisp_cp_dpo_get (DPO_PROTO_NSH);
+}
+
+static void
+del_nsh_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe)
+{
+  lisp_fwd_path_t *path;
+
+  if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+    {
+      vec_foreach (path, lfe->paths)
+      {
+	lisp_gpe_adjacency_unlock (path->lisp_adj);
+      }
+      fib_path_list_child_remove (lfe->nsh.path_list_index,
+				  lfe->nsh.child_index);
+      dpo_reset (&lfe->nsh.choice);
+    }
+
+  lisp_nsh_fib_add_del_entry (fid_addr_nsh (&lfe->key->rmt), (u32) ~ 0, 0);
+
+  hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key);
+  clib_mem_free (lfe->key);
+  pool_put (lgm->lisp_fwd_entry_pool, lfe);
+}
+
+/**
+ * @brief Delete LISP NSH forwarding entry.
+ *
+ * Coordinates the removal of forwarding entries for NSH LISP overlay:
+ *
+ * @param[in]   lgm     Reference to @ref lisp_gpe_main_t.
+ * @param[in]   a       Parameters for building the forwarding entry.
+ *
+ * @return 0 on success.
+ */
+static int
+del_nsh_fwd_entry (lisp_gpe_main_t * lgm,
+		   vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+{
+  lisp_gpe_fwd_entry_key_t key;
+  lisp_gpe_fwd_entry_t *lfe;
+
+  lfe = find_fwd_entry (lgm, a, &key);
+
+  if (NULL == lfe)
+    return VNET_API_ERROR_INVALID_VALUE;
+
+  del_nsh_fwd_entry_i (lgm, lfe);
+
+  return (0);
+}
+
+/**
+ * @brief Construct and insert the forwarding information used by an NSH entry
+ */
+static void
+lisp_gpe_nsh_update_fwding (lisp_gpe_fwd_entry_t * lfe)
+{
+  lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
+  dpo_id_t dpo = DPO_INVALID;
+  vnet_hw_interface_t *hi;
+  uword *hip;
+
+  if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+    {
+      fib_path_list_contribute_forwarding (lfe->nsh.path_list_index,
+					   FIB_FORW_CHAIN_TYPE_NSH,
+					   &lfe->nsh.dpo);
+
+      /*
+       * LISP encap is always the same for this SPI+SI so we do that hash now
+       * and stack on the choice.
+       */
+      if (DPO_LOAD_BALANCE == lfe->nsh.dpo.dpoi_type)
+	{
+	  const dpo_id_t *tmp;
+	  const load_balance_t *lb;
+	  int hash;
+
+	  lb = load_balance_get (lfe->nsh.dpo.dpoi_index);
+	  hash = fid_addr_nsh (&lfe->key->rmt) % lb->lb_n_buckets;
+	  tmp =
+	    load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+
+	  dpo_copy (&dpo, tmp);
+	}
+    }
+  else
+    {
+      switch (lfe->action)
+	{
+	case SEND_MAP_REQUEST:
+	  dpo_copy (&dpo, lgm->nsh_cp_lkup);
+	  break;
+	case NO_ACTION:
+	case FORWARD_NATIVE:
+	case DROP:
+	  dpo_copy (&dpo, drop_dpo_get (DPO_PROTO_NSH));
+	}
+    }
+
+  /* We have only one nsh-lisp interface (no NSH virtualization) */
+  hip = hash_get (lgm->nsh_ifaces.hw_if_index_by_dp_table, 0);
+  hi = vnet_get_hw_interface (lgm->vnet_main, hip[0]);
+
+  dpo_stack_from_node (hi->tx_node_index, &lfe->nsh.choice, &dpo);
+
+  /* add entry to nsh lisp fib */
+  lisp_nsh_fib_add_del_entry (fid_addr_nsh (&lfe->key->rmt),
+			      lfe - lgm->lisp_fwd_entry_pool, 1);
+
+  dpo_reset (&dpo);
+}
+
+/**
+ * @brief Add LISP NSH forwarding entry.
+ *
+ * Coordinates the creation of forwarding entries for L2 LISP overlay:
+ * creates lisp-gpe tunnel and injects new entry in Source/Dest L2 FIB.
+ *
+ * @param[in]   lgm     Reference to @ref lisp_gpe_main_t.
+ * @param[in]   a       Parameters for building the forwarding entry.
+ *
+ * @return 0 on success.
+ */
+static int
+add_nsh_fwd_entry (lisp_gpe_main_t * lgm,
+		   vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
+{
+  lisp_gpe_fwd_entry_key_t key;
+  lisp_gpe_fwd_entry_t *lfe;
+
+  lfe = find_fwd_entry (lgm, a, &key);
+
+  if (NULL != lfe)
+    /* don't support updates */
+    return VNET_API_ERROR_INVALID_VALUE;
+
+  pool_get (lgm->lisp_fwd_entry_pool, lfe);
+  memset (lfe, 0, sizeof (*lfe));
+  lfe->key = clib_mem_alloc (sizeof (key));
+  memcpy (lfe->key, &key, sizeof (key));
+
+  hash_set_mem (lgm->lisp_gpe_fwd_entries, lfe->key,
+		lfe - lgm->lisp_fwd_entry_pool);
+
+  lfe->type = (a->is_negative ?
+	       LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE :
+	       LISP_GPE_FWD_ENTRY_TYPE_NORMAL);
+  lfe->tenant = 0;
+
+  if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
+    {
+      fib_route_path_t *rpaths;
+
+      /*
+       * Make the sorted array of LISP paths with their resp. adjacency
+       */
+      lisp_gpe_fwd_entry_mk_paths (lfe, a);
+
+      /*
+       * From the LISP paths, construct a FIB path list that will
+       * contribute a load-balance.
+       */
+      rpaths = lisp_gpe_mk_fib_paths (lfe->paths);
+
+      lfe->nsh.path_list_index =
+	fib_path_list_create (FIB_PATH_LIST_FLAG_NONE, rpaths);
+
+      /*
+       * become a child of the path-list so we receive updates when
+       * its forwarding state changes. this includes an implicit lock.
+       */
+      lfe->nsh.child_index =
+	fib_path_list_child_add (lfe->nsh.path_list_index,
+				 FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY,
+				 lfe - lgm->lisp_fwd_entry_pool);
+    }
+  else
+    {
+      lfe->action = a->action;
+    }
+
+  lisp_gpe_nsh_update_fwding (lfe);
+
+  return 0;
+}
+
 /**
  * @brief conver from the embedded fib_node_t struct to the LSIP entry
  */
@@ -802,7 +1085,12 @@ static fib_node_back_walk_rc_t
 lisp_gpe_fib_node_back_walk (fib_node_t * node,
 			     fib_node_back_walk_ctx_t * ctx)
 {
-  lisp_gpe_l2_update_fwding (lisp_gpe_fwd_entry_from_fib_node (node));
+  lisp_gpe_fwd_entry_t *lfe = lisp_gpe_fwd_entry_from_fib_node (node);
+
+  if (fid_addr_type (&lfe->key->rmt) == FID_ADDR_MAC)
+    lisp_gpe_l2_update_fwding (lfe);
+  else if (fid_addr_type (&lfe->key->rmt) == FID_ADDR_NSH)
+    lisp_gpe_nsh_update_fwding (lfe);
 
   return (FIB_NODE_BACK_WALK_CONTINUE);
 }
@@ -877,6 +1165,11 @@ vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a,
 	return add_l2_fwd_entry (lgm, a);
       else
 	return del_l2_fwd_entry (lgm, a);
+    case GID_ADDR_NSH:
+      if (a->is_add)
+	return add_nsh_fwd_entry (lgm, a);
+      else
+	return del_nsh_fwd_entry (lgm, a);
     default:
       clib_warning ("Forwarding entries for type %d not supported!", type);
       return -1;
@@ -903,6 +1196,9 @@ vnet_lisp_gpe_fwd_entry_flush (void)
       case FID_ADDR_IP_PREF:
 	del_ip_fwd_entry_i (lgm, lfe);
 	break;
+      case FID_ADDR_NSH:
+        del_nsh_fwd_entry_i (lgm, lfe);
+        break;
       }
   }));
   /* *INDENT-ON* */
@@ -967,6 +1263,10 @@ format_lisp_gpe_fwd_entry (u8 * s, va_list ap)
 	  s = format (s, " fib-path-list:%d\n", lfe->l2.path_list_index);
 	  s = format (s, " dpo:%U\n", format_dpo_id, &lfe->l2.dpo, 0);
 	  break;
+	case FID_ADDR_NSH:
+	  s = format (s, " fib-path-list:%d\n", lfe->nsh.path_list_index);
+	  s = format (s, " dpo:%U\n", format_dpo_id, &lfe->nsh.dpo, 0);
+	  break;
 	case FID_ADDR_IP_PREF:
 	  break;
 	}
@@ -1036,6 +1336,7 @@ lisp_gpe_fwd_entry_init (vlib_main_t * vm)
     return (error);
 
   l2_fib_init (lgm);
+  nsh_fib_init (lgm);
 
   fib_node_register_type (FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY, &lisp_fwd_vft);
 
diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.h b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.h
index f7923671..d58895a3 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.h
+++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.h
@@ -81,7 +81,7 @@ typedef struct lisp_gpe_fwd_entry_t_
   fib_node_t node;
 
   /**
-   * The Entry's key: {lEID,r-EID,vni}
+   * The Entry's key: {lEID,rEID,vni}
    */
   lisp_gpe_fwd_entry_key_t *key;
 
@@ -150,6 +150,33 @@ typedef struct lisp_gpe_fwd_entry_t_
        */
       dpo_id_t dpo;
     } l2;
+
+    /**
+     * Fields relevant to an NSH entry
+     */
+    struct
+    {
+      /**
+       * The path-list created for the forwarding
+       */
+      fib_node_index_t path_list_index;
+
+      /**
+       * Child index of this entry on the path-list
+       */
+      u32 child_index;
+
+      /**
+       * The DPO contributed by NSH
+       */
+      dpo_id_t dpo;
+
+      /**
+       * The DPO used for forwarding. Obtained after stacking tx node
+       * onto lb choice
+       */
+      dpo_id_t choice;
+    } nsh;
   };
 
   union
@@ -177,6 +204,8 @@ extern void vnet_lisp_gpe_fwd_entry_flush (void);
 extern u32 lisp_l2_fib_lookup (lisp_gpe_main_t * lgm,
 			       u16 bd_index, u8 src_mac[8], u8 dst_mac[8]);
 
+extern const dpo_id_t *lisp_nsh_fib_lookup (lisp_gpe_main_t * lgm,
+					    u32 spi_si);
 #endif
 
 /*
diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c
index f1b6e8ee..acbe90bb 100644
--- a/src/vnet/mfib/mfib_entry.c
+++ b/src/vnet/mfib/mfib_entry.c
@@ -465,6 +465,7 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index,
     case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
     case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
     case FIB_FORW_CHAIN_TYPE_ETHERNET:
+    case FIB_FORW_CHAIN_TYPE_NSH:
         ASSERT(0);
         break;
     }
-- 
cgit 1.2.3-korg


From b69111e167f5be70b3721ed5c2e5e02b971c3f67 Mon Sep 17 00:00:00 2001
From: Florin Coras <fcoras@cisco.com>
Date: Mon, 13 Feb 2017 23:55:27 -0800
Subject: Add NSH load-balance and drop DPO

Also adds missing gpe nsh address type functions.

Change-Id: I3353a23c0518da9ce3b221ddf8c5bd0364930154
Signed-off-by: Florin Coras <fcoras@cisco.com>
---
 src/vnet/adj/adj_midchain.c   |   6 +++
 src/vnet/dpo/drop_dpo.c       |   6 +++
 src/vnet/dpo/load_balance.c   | 100 +++++++++++++++++++++++++++++++++++++++++-
 src/vnet/lisp-cp/lisp_types.c |  43 ++++++++++++++----
 src/vnet/lisp-gpe/lisp_gpe.c  |   8 ++--
 5 files changed, 148 insertions(+), 15 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index 35cdb003..7d315651 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -563,6 +563,11 @@ const static char* const midchain_ethernet_nodes[] =
     "adj-l2-midchain",
     NULL,
 };
+const static char* const midchain_nsh_nodes[] =
+{
+    "adj-nsh-midchain",
+    NULL,
+};
 
 const static char* const * const midchain_nodes[DPO_PROTO_NUM] =
 {
@@ -570,6 +575,7 @@ const static char* const * const midchain_nodes[DPO_PROTO_NUM] =
     [DPO_PROTO_IP6]  = midchain_ip6_nodes,
     [DPO_PROTO_MPLS] = midchain_mpls_nodes,
     [DPO_PROTO_ETHERNET] = midchain_ethernet_nodes,
+    [DPO_PROTO_NSH] = midchain_nsh_nodes,
 };
 
 void
diff --git a/src/vnet/dpo/drop_dpo.c b/src/vnet/dpo/drop_dpo.c
index 5118d2a4..a1821ddd 100644
--- a/src/vnet/dpo/drop_dpo.c
+++ b/src/vnet/dpo/drop_dpo.c
@@ -91,12 +91,18 @@ const static char* const drop_ethernet_nodes[] =
     "error-drop",
     NULL,
 };
+const static char* const drop_nsh_nodes[] =
+{
+    "error-drop",
+    NULL,
+};
 const static char* const * const drop_nodes[DPO_PROTO_NUM] =
 {
     [DPO_PROTO_IP4]  = drop_ip4_nodes,
     [DPO_PROTO_IP6]  = drop_ip6_nodes,
     [DPO_PROTO_MPLS] = drop_mpls_nodes,
     [DPO_PROTO_ETHERNET] = drop_ethernet_nodes,
+    [DPO_PROTO_NSH] = drop_nsh_nodes,
 };
 
 void
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index f11b4e4d..e9fb5d9d 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -810,12 +810,18 @@ const static char* const load_balance_l2_nodes[] =
     "l2-load-balance",
     NULL,
 };
+const static char* const load_balance_nsh_nodes[] =
+{
+    "nsh-load-balance",
+    NULL,
+};
 const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
 {
     [DPO_PROTO_IP4]  = load_balance_ip4_nodes,
     [DPO_PROTO_IP6]  = load_balance_ip6_nodes,
     [DPO_PROTO_MPLS] = load_balance_mpls_nodes,
     [DPO_PROTO_ETHERNET] = load_balance_l2_nodes,
+    [DPO_PROTO_NSH] = load_balance_nsh_nodes,
 };
 
 void
@@ -981,7 +987,7 @@ l2_load_balance (vlib_main_t * vm,
 }
 
 static u8 *
-format_load_balance_trace (u8 * s, va_list * args)
+format_l2_load_balance_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
@@ -999,7 +1005,97 @@ VLIB_REGISTER_NODE (l2_load_balance_node) = {
   .name = "l2-load-balance",
   .vector_size = sizeof (u32),
 
-  .format_trace = format_load_balance_trace,
+  .format_trace = format_l2_load_balance_trace,
+  .n_next_nodes = 1,
+  .next_nodes = {
+      [0] = "error-drop",
+  },
+};
+
+static uword
+nsh_load_balance (vlib_main_t * vm,
+                 vlib_node_runtime_t * node,
+                 vlib_frame_t * frame)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+        {
+          vlib_buffer_t *b0;
+          u32 bi0, lbi0, next0, *nsh0;
+          const dpo_id_t *dpo0;
+          const load_balance_t *lb0;
+
+          bi0 = from[0];
+          to_next[0] = bi0;
+          from += 1;
+          to_next += 1;
+          n_left_from -= 1;
+          n_left_to_next -= 1;
+
+          b0 = vlib_get_buffer (vm, bi0);
+
+          lbi0 =  vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+          lb0 = load_balance_get(lbi0);
+
+          /* SPI + SI are the second word of the NSH header */
+          nsh0 = vlib_buffer_get_current (b0);
+          vnet_buffer(b0)->ip.flow_hash = nsh0[1] % lb0->lb_n_buckets;
+
+          dpo0 = load_balance_get_bucket_i(lb0,
+                                           vnet_buffer(b0)->ip.flow_hash &
+                                           (lb0->lb_n_buckets_minus_1));
+
+          next0 = dpo0->dpoi_next_node;
+          vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+          if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+              load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
+                                                         sizeof (*tr));
+              tr->lb_index = lbi0;
+            }
+          vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                           n_left_to_next, bi0, next0);
+        }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+static u8 *
+format_nsh_load_balance_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
+
+  s = format (s, "NSH-load-balance: index %d", t->lb_index);
+  return s;
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (nsh_load_balance_node) = {
+  .function = nsh_load_balance,
+  .name = "nsh-load-balance",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_nsh_load_balance_trace,
   .n_next_nodes = 1,
   .next_nodes = {
       [0] = "error-drop",
diff --git a/src/vnet/lisp-cp/lisp_types.c b/src/vnet/lisp-cp/lisp_types.c
index 4a3d05b7..b6466686 100644
--- a/src/vnet/lisp-cp/lisp_types.c
+++ b/src/vnet/lisp-cp/lisp_types.c
@@ -31,16 +31,16 @@ typedef int (*cmp_fct) (void *, void *);
 
 size_to_write_fct size_to_write_fcts[GID_ADDR_TYPES] =
   { ip_prefix_size_to_write, lcaf_size_to_write, mac_size_to_write,
-  sd_size_to_write
+  sd_size_to_write, nsh_size_to_write
 };
 serdes_fct write_fcts[GID_ADDR_TYPES] =
-  { ip_prefix_write, lcaf_write, mac_write, sd_write };
+  { ip_prefix_write, lcaf_write, mac_write, sd_write, nsh_write };
 cast_fct cast_fcts[GID_ADDR_TYPES] =
-  { ip_prefix_cast, lcaf_cast, mac_cast, sd_cast };
+  { ip_prefix_cast, lcaf_cast, mac_cast, sd_cast, nsh_cast };
 addr_len_fct addr_len_fcts[GID_ADDR_TYPES] =
-  { ip_prefix_length, lcaf_length, mac_length, sd_length };
+  { ip_prefix_length, lcaf_length, mac_length, sd_length, nsh_length };
 copy_fct copy_fcts[GID_ADDR_TYPES] =
-  { ip_prefix_copy, lcaf_copy, mac_copy, sd_copy };
+  { ip_prefix_copy, lcaf_copy, mac_copy, sd_copy, nsh_copy };
 
 #define foreach_lcaf_type \
   _(1, no_addr)      \
@@ -951,15 +951,15 @@ mac_copy (void *dst, void *src)
 }
 
 void
-nsh_copy (void *dst, void *src)
+sd_copy (void *dst, void *src)
 {
-  clib_memcpy (dst, src, sizeof (nsh_t));
+  clib_memcpy (dst, src, sizeof (source_dest_t));
 }
 
 void
-sd_copy (void *dst, void *src)
+nsh_copy (void *dst, void *src)
 {
-  clib_memcpy (dst, src, sizeof (source_dest_t));
+  clib_memcpy (dst, src, sizeof (nsh_t));
 }
 
 int
@@ -1031,6 +1031,12 @@ sd_length (void *a)
   return 0;
 }
 
+u8
+nsh_length (void *a)
+{
+  return 0;
+}
+
 void *
 lcaf_cast (gid_address_t * a)
 {
@@ -1049,6 +1055,12 @@ sd_cast (gid_address_t * a)
   return &gid_address_sd (a);
 }
 
+void *
+nsh_cast (gid_address_t * a)
+{
+  return &gid_address_nsh (a);
+}
+
 u8
 no_addr_length (void *a)
 {
@@ -1167,6 +1179,13 @@ sd_write (u8 * p, void *a)
   return size;
 }
 
+u16
+nsh_write (u8 * p, void *a)
+{
+  clib_warning ("not done");
+  return 0;
+}
+
 u16
 vni_write (u8 * p, void *a)
 {
@@ -1287,6 +1306,12 @@ mac_size_to_write (void *a)
   return sizeof (u16) + 6;
 }
 
+u16
+nsh_size_to_write (void *a)
+{
+  return sizeof (u16) + 4;
+}
+
 u8
 gid_address_len (gid_address_t * a)
 {
diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c
index e76c03f0..d2f7ad44 100644
--- a/src/vnet/lisp-gpe/lisp_gpe.c
+++ b/src/vnet/lisp-gpe/lisp_gpe.c
@@ -103,15 +103,15 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm,
 	}
     }
 
-  if (!vni_set || !dp_table_set)
+  if (!reid_set)
     {
-      vlib_cli_output (vm, "vni and vrf/bd must be set!");
+      vlib_cli_output (vm, "remote eid must be set!");
       goto done;
     }
 
-  if (!reid_set)
+  if (gid_address_type (reid) != GID_ADDR_NSH && (!vni_set || !dp_table_set))
     {
-      vlib_cli_output (vm, "remote eid must be set!");
+      vlib_cli_output (vm, "vni and vrf/bd must be set!");
       goto done;
     }
 
-- 
cgit 1.2.3-korg


From 20a175a18414c67e38b5ce0709b33fb1df8069c9 Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Tue, 14 Feb 2017 07:28:41 -0800
Subject: dhcp: multiple additions

DHCP additions:
1) DHCPv4 will only relay a message back to the client, if the Option82 information is present. So make this the default.
2) It is no longer possible to select via the API to "insert circuit ID" - since this is now default
3) Remove the version 2 API since it's now the same as version 1.
4) Adding the VSS option is now conditional only on the presence of VSS config (not the 'insert' option in the set API)
5) DHCP proxy dump via API

Change-Id: Ia7271ba8c1d4dbf34a02c401d268ccfbb1b74f17
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/scripts/vnet/dhcp/proxy   |  21 ++
 src/vat/api_format.c          | 173 ++++++-----
 src/vnet/dhcp/client.c        |   1 +
 src/vnet/dhcp/client.h        |   3 +
 src/vnet/dhcp/dhcp.api        |  67 ++---
 src/vnet/dhcp/dhcp_api.c      | 112 ++++---
 src/vnet/dhcp/proxy.h         |  33 ++-
 src/vnet/dhcp/proxy_error.def |   3 +-
 src/vnet/dhcp/proxy_node.c    | 676 +++++++++++++++++++++++-------------------
 src/vnet/dhcpv6/proxy.h       |  19 +-
 src/vnet/dhcpv6/proxy_node.c  | 513 ++++++++++++++++++--------------
 src/vnet/dpo/receive_dpo.c    |   5 +
 src/vpp/api/custom_dump.c     |  34 ---
 test/test_dhcp.py             | 166 ++++++-----
 test/vpp_papi_provider.py     |   6 +-
 15 files changed, 978 insertions(+), 854 deletions(-)
 create mode 100644 src/scripts/vnet/dhcp/proxy

(limited to 'src/vnet/dpo')

diff --git a/src/scripts/vnet/dhcp/proxy b/src/scripts/vnet/dhcp/proxy
new file mode 100644
index 00000000..c709d87d
--- /dev/null
+++ b/src/scripts/vnet/dhcp/proxy
@@ -0,0 +1,21 @@
+loop create
+loop create
+
+set int state loop0 up
+set int state loop1 up
+
+set int ip  table loop1 1
+set int ip6 table loop1 1
+
+set int ip addr loop0 10.0.0.1/24
+set int ip addr loop0 10.0.1.1/24
+
+set int ip addr loop0 2001::1/64
+set int ip addr loop0 2001:1::1/64
+
+set dhcp proxy server 10.255.0.1 src-address 10.0.0.1 server-fib-id 0 rx-fib-id 0
+set dhcp proxy server 10.255.0.2 src-address 10.0.1.1 server-fib-id 1 rx-fib-id 1
+
+set dhcpv6 proxy server 3001::1 src-address 2001::1 server-fib-id 0 rx-fib-id 0
+set dhcpv6 proxy server 3002::1 src-address 2001:1::1 server-fib-id 1 rx-fib-id 1
+
diff --git a/src/vat/api_format.c b/src/vat/api_format.c
index 11e68214..78c5e279 100644
--- a/src/vat/api_format.c
+++ b/src/vat/api_format.c
@@ -3819,7 +3819,6 @@ _(reset_vrf_reply)                                      \
 _(oam_add_del_reply)                                    \
 _(reset_fib_reply)                                      \
 _(dhcp_proxy_config_reply)                              \
-_(dhcp_proxy_config_2_reply)                            \
 _(dhcp_proxy_set_vss_reply)                             \
 _(dhcp_client_config_reply)                             \
 _(set_ip_flow_hash_reply)                               \
@@ -4033,8 +4032,8 @@ _(CREATE_SUBIF_REPLY, create_subif_reply)                     		\
 _(OAM_ADD_DEL_REPLY, oam_add_del_reply)                                 \
 _(RESET_FIB_REPLY, reset_fib_reply)                                     \
 _(DHCP_PROXY_CONFIG_REPLY, dhcp_proxy_config_reply)                     \
-_(DHCP_PROXY_CONFIG_2_REPLY, dhcp_proxy_config_2_reply)                 \
 _(DHCP_PROXY_SET_VSS_REPLY, dhcp_proxy_set_vss_reply)                   \
+_(DHCP_PROXY_DETAILS, dhcp_proxy_details)                               \
 _(DHCP_CLIENT_CONFIG_REPLY, dhcp_client_config_reply)                   \
 _(SET_IP_FLOW_HASH_REPLY, set_ip_flow_hash_reply)                       \
 _(SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY,                                \
@@ -7635,9 +7634,9 @@ api_dhcp_proxy_config (vat_main_t * vam)
 {
   unformat_input_t *i = vam->input;
   vl_api_dhcp_proxy_config_t *mp;
-  u32 vrf_id = 0;
+  u32 rx_vrf_id = 0;
+  u32 server_vrf_id = 0;
   u8 is_add = 1;
-  u8 insert_cid = 1;
   u8 v4_address_set = 0;
   u8 v6_address_set = 0;
   ip4_address_t v4address;
@@ -7653,9 +7652,9 @@ api_dhcp_proxy_config (vat_main_t * vam)
     {
       if (unformat (i, "del"))
 	is_add = 0;
-      else if (unformat (i, "vrf %d", &vrf_id))
+      else if (unformat (i, "rx_vrf_id %d", &rx_vrf_id))
 	;
-      else if (unformat (i, "insert-cid %d", &insert_cid))
+      else if (unformat (i, "server_vrf_id %d", &server_vrf_id))
 	;
       else if (unformat (i, "svr %U", unformat_ip4_address, &v4address))
 	v4_address_set = 1;
@@ -7701,9 +7700,9 @@ api_dhcp_proxy_config (vat_main_t * vam)
   /* Construct the API message */
   M (DHCP_PROXY_CONFIG, mp);
 
-  mp->insert_circuit_id = insert_cid;
   mp->is_add = is_add;
-  mp->vrf_id = ntohl (vrf_id);
+  mp->rx_vrf_id = ntohl (rx_vrf_id);
+  mp->server_vrf_id = ntohl (server_vrf_id);
   if (v6_address_set)
     {
       mp->is_ipv6 = 1;
@@ -7724,100 +7723,98 @@ api_dhcp_proxy_config (vat_main_t * vam)
   return ret;
 }
 
-static int
-api_dhcp_proxy_config_2 (vat_main_t * vam)
+#define vl_api_dhcp_proxy_details_t_endian vl_noop_handler
+#define vl_api_dhcp_proxy_details_t_print vl_noop_handler
+
+static void
+vl_api_dhcp_proxy_details_t_handler (vl_api_dhcp_proxy_details_t * mp)
 {
-  unformat_input_t *i = vam->input;
-  vl_api_dhcp_proxy_config_2_t *mp;
-  u32 rx_vrf_id = 0;
-  u32 server_vrf_id = 0;
-  u8 is_add = 1;
-  u8 insert_cid = 1;
-  u8 v4_address_set = 0;
-  u8 v6_address_set = 0;
-  ip4_address_t v4address;
-  ip6_address_t v6address;
-  u8 v4_src_address_set = 0;
-  u8 v6_src_address_set = 0;
-  ip4_address_t v4srcaddress;
-  ip6_address_t v6srcaddress;
-  int ret;
+  vat_main_t *vam = &vat_main;
 
-  /* Parse args required to build the message */
-  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
-    {
-      if (unformat (i, "del"))
-	is_add = 0;
-      else if (unformat (i, "rx_vrf_id %d", &rx_vrf_id))
-	;
-      else if (unformat (i, "server_vrf_id %d", &server_vrf_id))
-	;
-      else if (unformat (i, "insert-cid %d", &insert_cid))
-	;
-      else if (unformat (i, "svr %U", unformat_ip4_address, &v4address))
-	v4_address_set = 1;
-      else if (unformat (i, "svr %U", unformat_ip6_address, &v6address))
-	v6_address_set = 1;
-      else if (unformat (i, "src %U", unformat_ip4_address, &v4srcaddress))
-	v4_src_address_set = 1;
-      else if (unformat (i, "src %U", unformat_ip6_address, &v6srcaddress))
-	v6_src_address_set = 1;
-      else
-	break;
-    }
+  if (mp->is_ipv6)
+    print (vam->ofp,
+	   "RX Table-ID %d, Server Table-ID %d, Server Address %U, Source Address %U, VSS FIB-ID %d, VSS OUI %d",
+	   ntohl (mp->rx_vrf_id),
+	   ntohl (mp->server_vrf_id),
+	   format_ip6_address, mp->dhcp_server,
+	   format_ip6_address, mp->dhcp_src_address,
+	   ntohl (mp->vss_oui), ntohl (mp->vss_fib_id));
+  else
+    print (vam->ofp,
+	   "RX Table-ID %d, Server Table-ID %d, Server Address %U, Source Address %U, VSS FIB-ID %d, VSS OUI %d",
+	   ntohl (mp->rx_vrf_id),
+	   ntohl (mp->server_vrf_id),
+	   format_ip4_address, mp->dhcp_server,
+	   format_ip4_address, mp->dhcp_src_address,
+	   ntohl (mp->vss_oui), ntohl (mp->vss_fib_id));
+}
 
-  if (v4_address_set && v6_address_set)
-    {
-      errmsg ("both v4 and v6 server addresses set");
-      return -99;
-    }
-  if (!v4_address_set && !v6_address_set)
-    {
-      errmsg ("no server addresses set");
-      return -99;
-    }
+static void vl_api_dhcp_proxy_details_t_handler_json
+  (vl_api_dhcp_proxy_details_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  vat_json_node_t *node = NULL;
+  struct in_addr ip4;
+  struct in6_addr ip6;
 
-  if (v4_src_address_set && v6_src_address_set)
+  if (VAT_JSON_ARRAY != vam->json_tree.type)
     {
-      errmsg ("both v4 and v6  src addresses set");
-      return -99;
+      ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+      vat_json_init_array (&vam->json_tree);
     }
-  if (!v4_src_address_set && !v6_src_address_set)
+  node = vat_json_array_add (&vam->json_tree);
+
+  vat_json_init_object (node);
+  vat_json_object_add_uint (node, "rx-table-id", ntohl (mp->rx_vrf_id));
+  vat_json_object_add_uint (node, "server-table-id",
+			    ntohl (mp->server_vrf_id));
+  if (mp->is_ipv6)
     {
-      errmsg ("no src addresses set");
-      return -99;
+      clib_memcpy (&ip6, &mp->dhcp_server, sizeof (ip6));
+      vat_json_object_add_ip6 (node, "server_address", ip6);
+      clib_memcpy (&ip6, &mp->dhcp_src_address, sizeof (ip6));
+      vat_json_object_add_ip6 (node, "src_address", ip6);
     }
-
-  if (!(v4_src_address_set && v4_address_set) &&
-      !(v6_src_address_set && v6_address_set))
+  else
     {
-      errmsg ("no matching server and src addresses set");
-      return -99;
+      clib_memcpy (&ip4, &mp->dhcp_server, sizeof (ip4));
+      vat_json_object_add_ip4 (node, "server_address", ip4);
+      clib_memcpy (&ip4, &mp->dhcp_src_address, sizeof (ip4));
+      vat_json_object_add_ip4 (node, "src_address", ip4);
     }
+  vat_json_object_add_uint (node, "vss-fib-id", ntohl (mp->vss_fib_id));
+  vat_json_object_add_uint (node, "vss-oui", ntohl (mp->vss_oui));
+}
 
-  /* Construct the API message */
-  M (DHCP_PROXY_CONFIG_2, mp);
+static int
+api_dhcp_proxy_dump (vat_main_t * vam)
+{
+  unformat_input_t *i = vam->input;
+  vl_api_control_ping_t *mp_ping;
+  vl_api_dhcp_proxy_dump_t *mp;
+  u8 is_ipv6 = 0;
+  int ret;
 
-  mp->insert_circuit_id = insert_cid;
-  mp->is_add = is_add;
-  mp->rx_vrf_id = ntohl (rx_vrf_id);
-  mp->server_vrf_id = ntohl (server_vrf_id);
-  if (v6_address_set)
-    {
-      mp->is_ipv6 = 1;
-      clib_memcpy (mp->dhcp_server, &v6address, sizeof (v6address));
-      clib_memcpy (mp->dhcp_src_address, &v6srcaddress, sizeof (v6address));
-    }
-  else
+  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
     {
-      clib_memcpy (mp->dhcp_server, &v4address, sizeof (v4address));
-      clib_memcpy (mp->dhcp_src_address, &v4srcaddress, sizeof (v4address));
+      if (unformat (i, "ipv6"))
+	is_ipv6 = 1;
+      else
+	{
+	  clib_warning ("parse error '%U'", format_unformat_error, i);
+	  return -99;
+	}
     }
 
-  /* send it... */
+  M (DHCP_PROXY_DUMP, mp);
+
+  mp->is_ip6 = is_ipv6;
   S (mp);
 
-  /* Wait for a reply, return good/bad news  */
+  /* Use a control ping for synchronization */
+  M (CONTROL_PING, mp_ping);
+  S (mp_ping);
+
   W (ret);
   return ret;
 }
@@ -18187,12 +18184,10 @@ _(oam_add_del, "src <ip4-address> dst <ip4-address> [vrf <n>] [del]")   \
 _(reset_fib, "vrf <n> [ipv6]")                                          \
 _(dhcp_proxy_config,                                                    \
   "svr <v46-address> src <v46-address>\n"                               \
-   "insert-cid <n> [del]")                                              \
-_(dhcp_proxy_config_2,                                                  \
-  "svr <v46-address> src <v46-address>\n"                               \
-   "rx_vrf_id <nn> server_vrf_id <nn> insert-cid <n> [del]")            \
+   "rx_vrf_id <nn> server_vrf_id <nn>  [del]")                          \
 _(dhcp_proxy_set_vss,                                                   \
   "tbl_id <n> fib_id <n> oui <n> [ipv6] [del]")                         \
+_(dhcp_proxy_dump, "ip6")                                               \
 _(dhcp_client_config,                                                   \
   "<intfc> | sw_if_index <id> [hostname <name>] [disable_event] [del]") \
 _(set_ip_flow_hash,                                                     \
diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c
index c352e310..8a1a43b3 100644
--- a/src/vnet/dhcp/client.c
+++ b/src/vnet/dhcp/client.c
@@ -13,6 +13,7 @@
  * limitations under the License.
  */
 #include <vlib/vlib.h>
+#include <vnet/dhcp/client.h>
 #include <vnet/dhcp/proxy.h>
 #include <vnet/fib/fib_table.h>
 
diff --git a/src/vnet/dhcp/client.h b/src/vnet/dhcp/client.h
index d15e686b..a74368cb 100644
--- a/src/vnet/dhcp/client.h
+++ b/src/vnet/dhcp/client.h
@@ -19,6 +19,9 @@
 #ifndef included_dhcp_client_h
 #define included_dhcp_client_h
 
+#include <vnet/ip/ip.h>
+#include <vnet/dhcp/packet.h>
+
 #define foreach_dhcp_client_state               \
 _(DHCP_DISCOVER)                                \
 _(DHCP_REQUEST)                                 \
diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api
index c228cd04..8daadd8c 100644
--- a/src/vnet/dhcp/dhcp.api
+++ b/src/vnet/dhcp/dhcp.api
@@ -16,7 +16,8 @@
 /** \brief DHCP Proxy config add / del request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
-    @param vrf_id - vrf id
+    @param rx_vrf_id - Rx/interface vrf id
+    @param server_vrf_id - server vrf id
     @param if_ipv6 - ipv6 if non-zero, else ipv4
     @param is_add - add the config if non-zero, else delete
     @param insert_circuit_id - option82 suboption 1 fib number
@@ -27,10 +28,10 @@ define dhcp_proxy_config
 {
   u32 client_index;
   u32 context;
-  u32 vrf_id;
+  u32 rx_vrf_id;
+  u32 server_vrf_id;
   u8 is_ipv6;
   u8 is_add;
-  u8 insert_circuit_id;
   u8 dhcp_server[16];
   u8 dhcp_src_address[16];
 };
@@ -45,40 +46,6 @@ define dhcp_proxy_config_reply
   i32 retval;
 };
 
-/** \brief DHCP Proxy config 2 add / del request
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param rx_vrf_id - receive vrf id
-    @param server_vrf_id - server vrf id
-    @param if_ipv6 - ipv6 if non-zero, else ipv4
-    @param is_add - add the config if non-zero, else delete
-    @param insert_circuit_id - option82 suboption 1 fib number
-    @param dhcp_server[] - server address
-    @param dhcp_src_address[] - <fix this, need details>
-*/
-define dhcp_proxy_config_2
-{
-  u32 client_index;
-  u32 context;
-  u32 rx_vrf_id;
-  u32 server_vrf_id;
-  u8 is_ipv6;
-  u8 is_add;
-  u8 insert_circuit_id;
-  u8 dhcp_server[16];
-  u8 dhcp_src_address[16];
-};
-
-/** \brief DHCP Proxy config 2 add / del response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for request
-*/
-define dhcp_proxy_config_2_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief DHCP Proxy set / unset vss request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -159,6 +126,32 @@ define dhcp_compl_event
   u8 host_mac[6];
 };
 
+/** \brief Dump DHCP proxy table
+    @param client_index - opaque cookie to identify the sender
+    @param True for IPv6 proxy table
+*/
+define dhcp_proxy_dump
+{
+  u32 client_index;
+  u32 context;
+  u8  is_ip6;
+};
+
+/** \brief Tell client about a DHCP completion event
+    @param client_index - opaque cookie to identify the sender
+*/
+define dhcp_proxy_details
+{
+  u32 context;
+  u32 rx_vrf_id;
+  u32 server_vrf_id;
+  u32 vss_oui;
+  u32 vss_fib_id;
+  u8 is_ipv6;
+  u8 dhcp_server[16];
+  u8 dhcp_src_address[16];
+};
+
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c
index 88b32b24..ce9039b7 100644
--- a/src/vnet/dhcp/dhcp_api.c
+++ b/src/vnet/dhcp/dhcp_api.c
@@ -46,7 +46,8 @@
 
 #define foreach_vpe_api_msg                       \
 _(DHCP_PROXY_CONFIG,dhcp_proxy_config)            \
-_(DHCP_PROXY_CONFIG_2,dhcp_proxy_config_2)        \
+_(DHCP_PROXY_DUMP,dhcp_proxy_dump)                \
+_(DHCP_PROXY_DETAILS,dhcp_proxy_details)          \
 _(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss)          \
 _(DHCP_CLIENT_CONFIG, dhcp_client_config)
 
@@ -58,8 +59,8 @@ dhcpv4_proxy_config (vl_api_dhcp_proxy_config_t * mp)
 
   rv = dhcp_proxy_set_server ((ip4_address_t *) (&mp->dhcp_server),
 			      (ip4_address_t *) (&mp->dhcp_src_address),
-			      (u32) ntohl (mp->vrf_id),
-			      (int) mp->insert_circuit_id,
+			      (u32) ntohl (mp->rx_vrf_id),
+			      (u32) ntohl (mp->server_vrf_id),
 			      (int) (mp->is_add == 0));
 
   REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY);
@@ -74,44 +75,11 @@ dhcpv6_proxy_config (vl_api_dhcp_proxy_config_t * mp)
 
   rv = dhcpv6_proxy_set_server ((ip6_address_t *) (&mp->dhcp_server),
 				(ip6_address_t *) (&mp->dhcp_src_address),
-				(u32) ntohl (mp->vrf_id),
-				(int) mp->insert_circuit_id,
-				(int) (mp->is_add == 0));
-
-  REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY);
-}
-
-static void
-dhcpv4_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp)
-{
-  vl_api_dhcp_proxy_config_reply_t *rmp;
-  int rv;
-
-  rv = dhcp_proxy_set_server_2 ((ip4_address_t *) (&mp->dhcp_server),
-				(ip4_address_t *) (&mp->dhcp_src_address),
 				(u32) ntohl (mp->rx_vrf_id),
 				(u32) ntohl (mp->server_vrf_id),
-				(int) mp->insert_circuit_id,
 				(int) (mp->is_add == 0));
 
-  REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY);
-}
-
-
-static void
-dhcpv6_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp)
-{
-  vl_api_dhcp_proxy_config_reply_t *rmp;
-  int rv = -1;
-
-  rv = dhcpv6_proxy_set_server_2 ((ip6_address_t *) (&mp->dhcp_server),
-				  (ip6_address_t *) (&mp->dhcp_src_address),
-				  (u32) ntohl (mp->rx_vrf_id),
-				  (u32) ntohl (mp->server_vrf_id),
-				  (int) mp->insert_circuit_id,
-				  (int) (mp->is_add == 0));
-
-  REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY);
+  REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY);
 }
 
 
@@ -143,6 +111,67 @@ static void vl_api_dhcp_proxy_config_t_handler
     dhcpv6_proxy_config (mp);
 }
 
+static void
+vl_api_dhcp_proxy_dump_t_handler (vl_api_dhcp_proxy_dump_t * mp)
+{
+  unix_shared_memory_queue_t *q;
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (q == 0)
+    return;
+
+  if (mp->is_ip6 == 0)
+    dhcp_proxy_dump (q, mp->context);
+  else
+    dhcpv6_proxy_dump (q, mp->context);
+}
+
+void
+dhcp_send_details (void *opaque,
+		   u32 context,
+		   const ip46_address_t * server,
+		   const ip46_address_t * src,
+		   u32 server_fib_id,
+		   u32 rx_fib_id, u32 vss_fib_id, u32 vss_oui)
+{
+  vl_api_dhcp_proxy_details_t *mp;
+  unix_shared_memory_queue_t *q = opaque;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  if (!mp)
+    return;
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_DETAILS);
+  mp->context = context;
+
+  mp->rx_vrf_id = htonl (rx_fib_id);
+  mp->server_vrf_id = htonl (server_fib_id);
+  mp->vss_oui = htonl (vss_oui);
+  mp->vss_fib_id = htonl (vss_fib_id);
+
+  mp->is_ipv6 = !ip46_address_is_ip4 (server);
+
+  if (mp->is_ipv6)
+    {
+      memcpy (mp->dhcp_server, server, 16);
+      memcpy (mp->dhcp_src_address, src, 16);
+    }
+  else
+    {
+      /* put the address in the first bytes */
+      memcpy (mp->dhcp_server, &server->ip4, 4);
+      memcpy (mp->dhcp_src_address, &src->ip4, 4);
+    }
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+
+static void
+vl_api_dhcp_proxy_details_t_handler (vl_api_dhcp_proxy_details_t * mp)
+{
+  clib_warning ("BUG");
+}
+
 void
 dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname,
 			   u8 is_ipv6, u8 * host_address, u8 * router_address,
@@ -172,15 +201,6 @@ dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname,
   vl_msg_api_send_shmem (q, (u8 *) & mp);
 }
 
-static void vl_api_dhcp_proxy_config_2_t_handler
-  (vl_api_dhcp_proxy_config_2_t * mp)
-{
-  if (mp->is_ipv6 == 0)
-    dhcpv4_proxy_config_2 (mp);
-  else
-    dhcpv6_proxy_config_2 (mp);
-}
-
 static void vl_api_dhcp_client_config_t_handler
   (vl_api_dhcp_client_config_t * mp)
 {
diff --git a/src/vnet/dhcp/proxy.h b/src/vnet/dhcp/proxy.h
index e12c0d00..4b115c74 100644
--- a/src/vnet/dhcp/proxy.h
+++ b/src/vnet/dhcp/proxy.h
@@ -27,7 +27,6 @@
 #include <vnet/pg/pg.h>
 #include <vnet/ip/format.h>
 #include <vnet/ip/udp.h>
-#include <vnet/dhcp/client.h>
 
 typedef enum {
 #define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n,
@@ -49,9 +48,7 @@ typedef union {
 typedef struct {
   ip4_address_t dhcp_server;
   ip4_address_t dhcp_src_address;
-  u32 insert_option_82;
   u32 server_fib_index;
-  u32 valid;
 } dhcp_server_t;
 
 typedef struct {
@@ -64,29 +61,39 @@ typedef struct {
   /* to drop pkts in server-to-client direction */
   u32 error_drop_node_index;
 
-  vss_info *opt82vss;
+  vss_info *vss;
 
   /* hash lookup specific vrf_id -> option 82 vss suboption  */
-  uword * opt82vss_index_by_vrf_id;
+  u32 *vss_index_by_rx_fib_index;
 
   /* convenience */
-  dhcp_client_main_t * dhcp_client_main;
   vlib_main_t * vlib_main;
   vnet_main_t * vnet_main;
 } dhcp_proxy_main_t;
 
-dhcp_proxy_main_t dhcp_proxy_main;
+extern dhcp_proxy_main_t dhcp_proxy_main;
 
-int dhcp_proxy_set_server (ip4_address_t *addr, ip4_address_t *src_address,
-                           u32 fib_id, int insert_option_82, int is_del);
+void dhcp_send_details (void *opaque,
+                        u32 context,
+                        const ip46_address_t *server,
+                        const ip46_address_t *src,
+                        u32 server_fib_id,
+                        u32 rx_fib_id,
+                        u32 vss_fib_id,
+                        u32 vss_oui);
 
-int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address,
-                             u32 rx_fib_id,
-                             u32 server_fib_id, 
-                             int insert_option_82, int is_del);
+int dhcp_proxy_set_server (ip4_address_t *addr,
+                           ip4_address_t *src_address,
+                           u32 fib_id,
+                           u32 server_fib_id, 
+                           int is_del);
 
 int dhcp_proxy_set_option82_vss(u32 vrf_id,
                                 u32 oui,
                                 u32 fib_id, 
                                 int is_del);
+
+void dhcp_proxy_dump(void *opaque,
+                     u32 context);
+
 #endif /* included_dhcp_proxy_h */
diff --git a/src/vnet/dhcp/proxy_error.def b/src/vnet/dhcp/proxy_error.def
index 6aa06eb5..6d790d73 100644
--- a/src/vnet/dhcp/proxy_error.def
+++ b/src/vnet/dhcp/proxy_error.def
@@ -21,7 +21,8 @@ dhcp_proxy_error (RELAY_TO_SERVER, "DHCP packets relayed to the server")
 dhcp_proxy_error (RELAY_TO_CLIENT, "DHCP packets relayed to clients")
 dhcp_proxy_error (OPTION_82_ERROR, "DHCP failed to insert option 82")
 dhcp_proxy_error (NO_OPTION_82, "DHCP option 82 missing")
-dhcp_proxy_error (BAD_OPTION_82, "Bad DHCP option 82 value")
+dhcp_proxy_error (BAD_OPTION_82_ITF, "Bad DHCP option 82 interface value")
+dhcp_proxy_error (BAD_OPTION_82_ADDR, "Bad DHCP option 82 address value")
 dhcp_proxy_error (BAD_FIB_ID, "DHCP option 82 fib-id to fib-index map failure")
 dhcp_proxy_error (NO_INTERFACE_ADDRESS, "DHCP no interface address")
 dhcp_proxy_error (OPTION_82_VSS_NOT_PROCESSED, "DHCP VSS not processed by DHCP server")
diff --git a/src/vnet/dhcp/proxy_node.c b/src/vnet/dhcp/proxy_node.c
index 6a58fcdb..ab6819fe 100644
--- a/src/vnet/dhcp/proxy_node.c
+++ b/src/vnet/dhcp/proxy_node.c
@@ -18,6 +18,7 @@
 #include <vlib/vlib.h>
 #include <vnet/pg/pg.h>
 #include <vnet/dhcp/proxy.h>
+#include <vnet/dhcp/client.h>
 #include <vnet/fib/ip4_fib.h>
 
 static char * dhcp_proxy_error_strings[] = {
@@ -57,6 +58,8 @@ typedef struct {
 vlib_node_registration_t dhcp_proxy_to_server_node;
 vlib_node_registration_t dhcp_proxy_to_client_node;
 
+dhcp_proxy_main_t dhcp_proxy_main;
+
 u8 * format_dhcp_proxy_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
@@ -94,6 +97,42 @@ u8 * format_dhcp_proxy_header_with_length (u8 * s, va_list * args)
   return s;
 }
 
+static inline vss_info *
+dhcp_get_vss_info (dhcp_proxy_main_t *dm,
+                   u32 rx_fib_index)
+{
+  vss_info *v;
+
+  if (vec_len(dm->vss_index_by_rx_fib_index) <= rx_fib_index ||
+      dm->vss_index_by_rx_fib_index[rx_fib_index] == ~0)
+  {
+      v = NULL;
+  }
+  else
+  {
+      v = pool_elt_at_index (dm->vss,
+                             dm->vss_index_by_rx_fib_index[rx_fib_index]);
+  }
+
+  return (v);
+}
+
+static inline dhcp_server_t *
+dhcp_get_server (dhcp_proxy_main_t *dm,
+                 u32 rx_fib_index)
+{
+  dhcp_server_t *s = NULL;
+
+  if (vec_len(dm->dhcp_server_index_by_rx_fib_index) > rx_fib_index &&
+      dm->dhcp_server_index_by_rx_fib_index[rx_fib_index] != ~0)
+  {
+      s = pool_elt_at_index (dm->dhcp_servers,
+                             dm->dhcp_server_index_by_rx_fib_index[rx_fib_index]);
+  }
+
+  return (s);
+}
+
 static uword
 dhcp_proxy_to_server_input (vlib_main_t * vm,
                             vlib_node_runtime_t * node,
@@ -131,9 +170,12 @@ dhcp_proxy_to_server_input (vlib_main_t * vm,
           u32 sw_if_index = 0;
           u32 original_sw_if_index = 0;
           u8  *end = NULL;
-          u32 fib_index, server_index;
+          u32 fib_index;
           dhcp_server_t * server;
           u32 rx_sw_if_index;
+          dhcp_option_t *o;
+          u32 len = 0;
+          vlib_buffer_free_list_t *fl;
 
 	  bi0 = from[0];
 	  to_next[0] = bi0;
@@ -166,26 +208,16 @@ dhcp_proxy_to_server_input (vlib_main_t * vm,
           rx_sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX];
 
           fib_index = im->fib_index_by_sw_if_index [rx_sw_if_index];
-
-          if (fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index))
-            server_index = dpm->dhcp_server_index_by_rx_fib_index[fib_index];
-          else
-            server_index = 0;
+          server = dhcp_get_server(dpm, fib_index);
           
-          if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp_servers, 
-                                                 server_index)))
+          if (PREDICT_FALSE (NULL == server))
             {
-            no_server:
               error0 = DHCP_PROXY_ERROR_NO_SERVER;
               next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP;
               pkts_no_server++;
               goto do_trace;
             }
           
-          server = pool_elt_at_index (dpm->dhcp_servers, server_index);
-          if (server->valid == 0)
-            goto no_server;
-
           vlib_buffer_advance (b0, -(sizeof(*ip0)));
           ip0 = vlib_buffer_get_current (b0);
 
@@ -216,142 +248,131 @@ dhcp_proxy_to_server_input (vlib_main_t * vm,
           h0->gateway_ip_address.as_u32 = server->dhcp_src_address.as_u32;
           pkts_to_server++;
 
-          if (server->insert_option_82) 
-            {
-              u32 fib_index, fib_id, opt82_fib_id=0, opt82_oui=0;
-	      ip4_fib_t * fib;
-              dhcp_option_t *o = (dhcp_option_t *) h0->options;
-              u32 len = 0;
-              vlib_buffer_free_list_t *fl;
+          o = (dhcp_option_t *) h0->options;
               
-              fib_index = im->fib_index_by_sw_if_index 
-                [vnet_buffer(b0)->sw_if_index[VLIB_RX]];
-	      fib = ip4_fib_get (fib_index);
-	      fib_id = fib->table_id;
-
-              end = b0->data + b0->current_data + b0->current_length;
-              /* TLVs are not performance-friendly... */
-              while  (o->option != 0xFF /* end of options */ && (u8 *)o < end) 
-                  o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
-
-              fl = vlib_buffer_get_free_list (vm, b0->free_list_index);
-              // start write at (option*)o, some packets have padding
-              if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes)
-                {
-                  next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP;
-                  pkts_too_big++;
-                  goto do_trace;
-                }
+          fib_index = im->fib_index_by_sw_if_index 
+              [vnet_buffer(b0)->sw_if_index[VLIB_RX]];
+
+          end = b0->data + b0->current_data + b0->current_length;
+          /* TLVs are not performance-friendly... */
+          while  (o->option != 0xFF /* end of options */ && (u8 *)o < end) 
+              o = (dhcp_option_t *) (((uword) o) + (o->length + 2));
 
-              if ((o->option == 0xFF)  && ((u8 *)o <= end))
-                {  
-                  vnet_main_t *vnm = vnet_get_main();   
-                  u16 old_l0, new_l0;
-                  ip4_address_t _ia0, * ia0 = &_ia0;
-                  uword  *p_vss;
-                  vss_info *vss;
-                  vnet_sw_interface_t *swif;
-                  sw_if_index = 0;
-                  original_sw_if_index = 0;
+          fl = vlib_buffer_get_free_list (vm, b0->free_list_index);
+          // start write at (option*)o, some packets have padding
+          if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes)
+          {
+              next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+              pkts_too_big++;
+              goto do_trace;
+          }
+
+          if ((o->option == 0xFF)  && ((u8 *)o <= end))
+          {  
+              vnet_main_t *vnm = vnet_get_main();   
+              u16 old_l0, new_l0;
+              ip4_address_t _ia0, * ia0 = &_ia0;
+              vss_info *vss;
+              vnet_sw_interface_t *swif;
+              sw_if_index = 0;
+              original_sw_if_index = 0;
                   
-                  original_sw_if_index = sw_if_index = 
-                      vnet_buffer(b0)->sw_if_index[VLIB_RX];
-                  swif = vnet_get_sw_interface (vnm, sw_if_index);
-                  if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)
-                      sw_if_index = swif->unnumbered_sw_if_index;
+              original_sw_if_index = sw_if_index = 
+                  vnet_buffer(b0)->sw_if_index[VLIB_RX];
+              swif = vnet_get_sw_interface (vnm, sw_if_index);
+              if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)
+                  sw_if_index = swif->unnumbered_sw_if_index;
                   
-                  p_vss = hash_get (dpm->opt82vss_index_by_vrf_id,
-                                    fib_id);
-                  if (p_vss) 
-                    {
-                      vss = pool_elt_at_index (dpm->opt82vss, p_vss[0]);
-                      opt82_oui =  vss->vpn_id.oui;
-                      opt82_fib_id =  vss->vpn_id.fib_id;
-                    }
-                  /* 
-                   * Get the first ip4 address on the [client-side] 
-                   * RX interface, if not unnumbered. otherwise use
-                   * the loopback interface's ip address.
-                   */
-                  ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0);
+              /* 
+               * Get the first ip4 address on the [client-side] 
+               * RX interface, if not unnumbered. otherwise use
+               * the loopback interface's ip address.
+               */
+              ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0);
                   
-                  if (ia0 == 0)
-                    {
-                      error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS;
-                      next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP;
-                      pkts_no_interface_address++;
-                      goto do_trace;
-                    }
-
-                  /* Add option 82 */
-                  o->option = 82;   /* option 82 */
-                  o->length = 12;   /* 12 octets to follow */
-                  o->data[0] = 1;   /* suboption 1, circuit ID (=FIB id) */
-                  o->data[1] = 4;   /* length of suboption */
-                  o->data[2] = (original_sw_if_index >> 24) & 0xFF;
-                  o->data[3] = (original_sw_if_index >> 16) & 0xFF;
-                  o->data[4] = (original_sw_if_index >> 8)  & 0xFF;
-                  o->data[5] = (original_sw_if_index >> 0)  & 0xFF;
-		  o->data[6] = 5; /* suboption 5 (client RX intfc address) */
-		  o->data[7] = 4; /* length 4 */
-		  o->data[8] = ia0->as_u8[0];
-		  o->data[9] = ia0->as_u8[1];
-		  o->data[10] = ia0->as_u8[2];
-		  o->data[11] = ia0->as_u8[3];
-                  o->data[12] = 0xFF;
-                  if (opt82_oui !=0 || opt82_fib_id != 0)
-                    {
-                      o->data[12] = 151; /* vss suboption */
-                      if (255 == opt82_fib_id) {
-                          o->data[13] = 1;   /* length */
-                          o->data[14] = 255;   /* vss option type */
-                          o->data[15] = 152; /* vss control suboption */
-                          o->data[16] = 0;   /* length */
-                          /* and a new "end-of-options" option (0xff) */
-                          o->data[17] = 0xFF;
-                          o->length += 5;
-                      } else {
-                          o->data[13] = 8;   /* length */
-                          o->data[14] = 1;   /* vss option type */
-                          o->data[15] = (opt82_oui >> 16) & 0xff;
-                          o->data[16] = (opt82_oui >> 8) & 0xff;
-                          o->data[17] = (opt82_oui ) & 0xff;
-                          o->data[18] = (opt82_fib_id >> 24) & 0xff;
-                          o->data[19] = (opt82_fib_id >> 16) & 0xff;
-                          o->data[20] = (opt82_fib_id >> 8) & 0xff;
-                          o->data[21] = (opt82_fib_id) & 0xff;
-                          o->data[22] = 152; /* vss control suboption */
-                          o->data[23] = 0;   /* length */
+              if (ia0 == 0)
+              {
+                  error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS;
+                  next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+                  pkts_no_interface_address++;
+                  goto do_trace;
+              }
+
+              /* Add option 82 */
+              o->option = 82;   /* option 82 */
+              o->length = 12;   /* 12 octets to follow */
+              o->data[0] = 1;   /* suboption 1, circuit ID (=FIB id) */
+              o->data[1] = 4;   /* length of suboption */
+              o->data[2] = (original_sw_if_index >> 24) & 0xFF;
+              o->data[3] = (original_sw_if_index >> 16) & 0xFF;
+              o->data[4] = (original_sw_if_index >> 8)  & 0xFF;
+              o->data[5] = (original_sw_if_index >> 0)  & 0xFF;
+              o->data[6] = 5; /* suboption 5 (client RX intfc address) */
+              o->data[7] = 4; /* length 4 */
+              o->data[8] = ia0->as_u8[0];
+              o->data[9] = ia0->as_u8[1];
+              o->data[10] = ia0->as_u8[2];
+              o->data[11] = ia0->as_u8[3];
+              o->data[12] = 0xFF;
+
+              vss = dhcp_get_vss_info (dpm, fib_index);
+              if (NULL != vss)
+              {
+                  u32 opt82_fib_id=0, opt82_oui=0;
+
+                  opt82_oui =  vss->vpn_id.oui;
+                  opt82_fib_id =  vss->vpn_id.fib_id;
+
+                  o->data[12] = 151; /* vss suboption */
+                  if (255 == opt82_fib_id) {
+                      o->data[13] = 1;   /* length */
+                      o->data[14] = 255;   /* vss option type */
+                      o->data[15] = 152; /* vss control suboption */
+                      o->data[16] = 0;   /* length */
+                      /* and a new "end-of-options" option (0xff) */
+                      o->data[17] = 0xFF;
+                      o->length += 5;
+                  } else {
+                      o->data[13] = 8;   /* length */
+                      o->data[14] = 1;   /* vss option type */
+                      o->data[15] = (opt82_oui >> 16) & 0xff;
+                      o->data[16] = (opt82_oui >> 8) & 0xff;
+                      o->data[17] = (opt82_oui ) & 0xff;
+                      o->data[18] = (opt82_fib_id >> 24) & 0xff;
+                      o->data[19] = (opt82_fib_id >> 16) & 0xff;
+                      o->data[20] = (opt82_fib_id >> 8) & 0xff;
+                      o->data[21] = (opt82_fib_id) & 0xff;
+                      o->data[22] = 152; /* vss control suboption */
+                      o->data[23] = 0;   /* length */
                           
-                          /* and a new "end-of-options" option (0xff) */
-                          o->data[24] = 0xFF;
-                          o->length += 12;
-                      }
+                      /* and a new "end-of-options" option (0xff) */
+                      o->data[24] = 0xFF;
+                      o->length += 12;
                   }
-
-                  len = o->length + 3;
-                  b0->current_length += len;
-                  /* Fix IP header length and checksum */
-                  old_l0 = ip0->length;
-                  new_l0 = clib_net_to_host_u16 (old_l0);
-                  new_l0 += len;
-                  new_l0 = clib_host_to_net_u16 (new_l0);
-                  ip0->length = new_l0;
-                  sum0 = ip0->checksum;
-                  sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
-                                         length /* changed member */);
-                  ip0->checksum = ip_csum_fold (sum0);
-
-                  /* Fix UDP length */
-                  new_l0 = clib_net_to_host_u16 (u0->length);
-                  new_l0 += len;
-                  u0->length = clib_host_to_net_u16 (new_l0);
-                } else {
-                  vlib_node_increment_counter 
-                      (vm, dhcp_proxy_to_server_node.index,
-                       DHCP_PROXY_ERROR_OPTION_82_ERROR, 1);
-                }
-            }
+              }
+
+              len = o->length + 3;
+              b0->current_length += len;
+              /* Fix IP header length and checksum */
+              old_l0 = ip0->length;
+              new_l0 = clib_net_to_host_u16 (old_l0);
+              new_l0 += len;
+              new_l0 = clib_host_to_net_u16 (new_l0);
+              ip0->length = new_l0;
+              sum0 = ip0->checksum;
+              sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+                                     length /* changed member */);
+              ip0->checksum = ip_csum_fold (sum0);
+
+              /* Fix UDP length */
+              new_l0 = clib_net_to_host_u16 (u0->length);
+              new_l0 += len;
+              u0->length = clib_host_to_net_u16 (new_l0);
+          } else {
+              vlib_node_increment_counter 
+                  (vm, dhcp_proxy_to_server_node.index,
+                   DHCP_PROXY_ERROR_OPTION_82_ERROR, 1);
+          }
           
           next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP;
 
@@ -451,11 +472,13 @@ dhcp_proxy_to_client_input (vlib_main_t * vm,
       vnet_sw_interface_t *si0;
       u32 error0 = (u32)~0;
       vnet_sw_interface_t *swif;
-      u32 server_index;
       u32 fib_index;
       dhcp_server_t * server;
       u32 original_sw_if_index = (u32) ~0;
-          
+      ip4_address_t relay_addr = {
+          .as_u32 = 0,
+      };
+
       bi0 = from[0];
       from += 1;
       n_left_from -= 1;
@@ -501,13 +524,21 @@ dhcp_proxy_to_client_input (vlib_main_t * vm,
                            and the sw_if_index */
                         if (sub->option == 1 && sub->length == 4)
                           {
-                            sw_if_index = (o->data[2] << 24)
-                                | (o->data[3] << 16)
-                                | (o->data[4] << 8)
-                                | (o->data[5]);
-                          } else if (sub->option == 151 &&
-                                     sub->length == 7 &&
-                                     sub->data[0] == 1)
+                            sw_if_index = ((sub->data[0] << 24) |
+                                           (sub->data[1] << 16) |
+                                           (sub->data[2] << 8)  |
+                                           (sub->data[3]));
+                          }
+                        else if (sub->option == 5 && sub->length == 4)
+                          {
+                              relay_addr.as_u8[0] = sub->data[0];
+                              relay_addr.as_u8[1] = sub->data[1];
+                              relay_addr.as_u8[2] = sub->data[2];
+                              relay_addr.as_u8[3] = sub->data[3];
+                          }
+                        else if (sub->option == 151 &&
+                                 sub->length == 7 &&
+                                 sub->data[0] == 1)
                             vss_exist = 1;
                         else if (sub->option == 152 && sub->length == 0)
                             vss_ctrl = 1;
@@ -539,34 +570,27 @@ dhcp_proxy_to_client_input (vlib_main_t * vm,
           goto do_trace;
         }
       
+      if (relay_addr.as_u32 == 0)
+        {
+          error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ADDR;
+          goto drop_packet;
+        }
 
       if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index))
         {
-          error0 = DHCP_PROXY_ERROR_BAD_OPTION_82;
+          error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ITF;
           goto drop_packet;
         }
 
       fib_index = im->fib_index_by_sw_if_index [sw_if_index];
+      server = dhcp_get_server(dpm, fib_index);
 
-      if (fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index))
-        server_index = dpm->dhcp_server_index_by_rx_fib_index[fib_index];
-      else
-        server_index = 0;
-
-      if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp_servers, 
-                                             server_index)))
-        {
-          error0 = DHCP_PROXY_ERROR_BAD_OPTION_82;
-          goto drop_packet;
-        }
-      
-      server = pool_elt_at_index (dpm->dhcp_servers, server_index);
-      if (server->valid == 0)
+      if (PREDICT_FALSE (NULL == server))
         {
           error0 = DHCP_PROXY_ERROR_NO_SERVER;
           goto drop_packet;
         }
-
+      
       if (ip0->src_address.as_u32 != server->dhcp_server.as_u32)
         {             
           error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS;
@@ -587,6 +611,12 @@ dhcp_proxy_to_client_input (vlib_main_t * vm,
           goto drop_packet;
         }
 
+      if (relay_addr.as_u32 != ia0->as_u32)
+        {             
+          error0 = DHCP_PROXY_ERROR_BAD_YIADDR;
+          goto drop_packet;
+        }
+
       u0->checksum = 0;
       u0->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcp_to_client);
       sum0 = ip0->checksum;
@@ -677,7 +707,7 @@ clib_error_t * dhcp_proxy_init (vlib_main_t * vm)
   error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
   dm->error_drop_node_index = error_drop_node->index;
 
-  dm->opt82vss_index_by_vrf_id = hash_create (0, sizeof (uword));
+  dm->vss_index_by_rx_fib_index = NULL;
 
   udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_client, 
                          dhcp_proxy_to_client_node.index, 1 /* is_ip4 */);
@@ -694,15 +724,17 @@ clib_error_t * dhcp_proxy_init (vlib_main_t * vm)
 
 VLIB_INIT_FUNCTION (dhcp_proxy_init);
 
-int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address,
-                             u32 rx_fib_id,
-                             u32 server_fib_id, 
-                             int insert_option_82, int is_del)
+int dhcp_proxy_set_server (ip4_address_t *addr,
+                           ip4_address_t *src_address,
+                           u32 rx_fib_id,
+                           u32 server_fib_id, 
+                           int is_del)
 {
   dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
   dhcp_server_t * server = 0;
   u32 server_index = 0;
   u32 rx_fib_index = 0;
+
   const fib_prefix_t all_1s =
   {
       .fp_len = 32,
@@ -719,97 +751,68 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address,
   rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
                                                    rx_fib_id);
 
-  if (rx_fib_id == 0)
-    {
-      server = pool_elt_at_index (dpm->dhcp_servers, 0);
-      
-      if (is_del)
-        {
-          memset (server, 0, sizeof (*server));
-          fib_table_entry_special_remove(rx_fib_index,
-                                         &all_1s,
-                                         FIB_SOURCE_DHCP);
-         return 0;
-        }
-      if (!server->valid)
-          fib_table_entry_special_add(rx_fib_index,
-                                      &all_1s,
-                                      FIB_SOURCE_DHCP,
-                                      FIB_ENTRY_FLAG_LOCAL,
-                                      ADJ_INDEX_INVALID);
-
-      goto initialize_it;
-    }
-
   if (is_del)
     {
       if (rx_fib_index >= vec_len(dpm->dhcp_server_index_by_rx_fib_index))
         return VNET_API_ERROR_NO_SUCH_ENTRY;
       
       server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index];
-      ASSERT(server_index > 0);
+
+      if (server_index == ~0)
+        return VNET_API_ERROR_NO_SUCH_ENTRY;
 
       /* Use the default server again.  */
-      dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = 0;
+      dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = ~0;
       server = pool_elt_at_index (dpm->dhcp_servers, server_index);
-      memset (server, 0, sizeof (*server));
-      pool_put (dpm->dhcp_servers, server);
 
       fib_table_entry_special_remove(rx_fib_index,
                                      &all_1s,
                                      FIB_SOURCE_DHCP);
+      fib_table_unlock (rx_fib_index,
+                        FIB_PROTOCOL_IP4);
+      fib_table_unlock (server->server_fib_index,
+                        FIB_PROTOCOL_IP4);
 
+      memset (server, 0, sizeof (*server));
+      pool_put (dpm->dhcp_servers, server);
       return 0;
     }
-
-  if (rx_fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index))
-    {
-      server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index];
-      if (server_index != 0)
-        {
-          server = pool_elt_at_index (dpm->dhcp_servers, server_index);
-          goto initialize_it;
-        }
-    }
-
-  pool_get (dpm->dhcp_servers, server);
-
-  fib_table_entry_special_add(rx_fib_index,
-                              &all_1s,
-                              FIB_SOURCE_DHCP,
-                              FIB_ENTRY_FLAG_LOCAL,
-                              ADJ_INDEX_INVALID);
-  
- initialize_it:
-
-
-  server->dhcp_server.as_u32 = addr->as_u32;
-  server->server_fib_index = 
-      fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
-	  				server_fib_id);
-  server->dhcp_src_address.as_u32 = src_address->as_u32;
-  server->insert_option_82 = insert_option_82;
-  server->valid = 1;
-  if (rx_fib_index)
-    {
-      vec_validate (dpm->dhcp_server_index_by_rx_fib_index, rx_fib_index);
+  else
+  {
+      vec_validate_init_empty(dpm->dhcp_server_index_by_rx_fib_index,
+                              rx_fib_index,
+                              ~0);
+
+      pool_get (dpm->dhcp_servers, server);
+
+      server->dhcp_server.as_u32 = addr->as_u32;
+      server->dhcp_src_address.as_u32 = src_address->as_u32;
+
+      fib_table_entry_special_add(rx_fib_index,
+                                  &all_1s,
+                                  FIB_SOURCE_DHCP,
+                                  FIB_ENTRY_FLAG_LOCAL,
+                                  ADJ_INDEX_INVALID);
+      fib_table_lock (rx_fib_index,
+                      FIB_PROTOCOL_IP4);
+
+      server->server_fib_index = 
+          fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
+                                            server_fib_id);
+
+      vec_validate_init_empty (dpm->dhcp_server_index_by_rx_fib_index,
+                               rx_fib_index,
+                               ~0);
       dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = 
-        server - dpm->dhcp_servers;
-    }
+          server - dpm->dhcp_servers;
+  }
 
-  return 0;
-}
+  fib_table_unlock (rx_fib_index,
+                    FIB_PROTOCOL_IP4);
 
-/* Old API, manipulates the default server (only) */
-int dhcp_proxy_set_server (ip4_address_t *addr, ip4_address_t *src_address,
-                           u32 fib_id, int insert_option_82, int is_del)
-{
-  return dhcp_proxy_set_server_2 (addr, src_address, 0 /* rx_fib_id */,
-                                  fib_id /* server_fib_id */, 
-                                  insert_option_82, is_del);
+  return 0;
 }
 
-
 static clib_error_t *
 dhcp_proxy_set_command_fn (vlib_main_t * vm,
                            unformat_input_t * input,
@@ -818,7 +821,6 @@ dhcp_proxy_set_command_fn (vlib_main_t * vm,
   ip4_address_t server_addr, src_addr;
   u32 server_fib_id = 0, rx_fib_id = 0;
   int is_del = 0;
-  int add_option_82 = 0;
   int set_src = 0, set_server = 0;
   
   while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) 
@@ -833,9 +835,6 @@ dhcp_proxy_set_command_fn (vlib_main_t * vm,
       else if (unformat(input, "src-address %U", 
 			unformat_ip4_address, &src_addr))
         set_src = 1;
-      else if (unformat (input, "add-option-82")
-               || unformat (input, "insert-option-82"))
-        add_option_82 = 1;
       else if (unformat (input, "delete") ||
                unformat (input, "del"))
         is_del = 1;
@@ -847,8 +846,8 @@ dhcp_proxy_set_command_fn (vlib_main_t * vm,
     {
       int rv;
 
-      rv = dhcp_proxy_set_server_2 (&server_addr, &src_addr, rx_fib_id, 
-                                    server_fib_id, add_option_82, is_del);
+      rv = dhcp_proxy_set_server (&server_addr, &src_addr, rx_fib_id, 
+                                  server_fib_id, is_del);
       switch (rv)
         {
         case 0:
@@ -882,7 +881,7 @@ dhcp_proxy_set_command_fn (vlib_main_t * vm,
 
 VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = {
   .path = "set dhcp proxy",
-  .short_help = "set dhcp proxy [del] server <ip-addr> src-address <ip-addr> [add-option-82] [server-fib-id <n>] [rx-fib-id <n>]",
+  .short_help = "set dhcp proxy [del] server <ip-addr> src-address <ip-addr> [server-fib-id <n>] [rx-fib-id <n>]",
   .function = dhcp_proxy_set_command_fn,
 };
 
@@ -896,8 +895,8 @@ u8 * format_dhcp_proxy_server (u8 * s, va_list * args)
 
   if (dm == 0)
     {
-      s = format (s, "%=16s%=16s%=14s%=14s%=20s", "Server", "Src Address", 
-                  "Server FIB", "RX FIB", "Insert Option 82");
+      s = format (s, "%=16s%=16s%=14s%=14s", "Server", "Src Address", 
+                  "Server FIB", "RX FIB");
       return s;
     }
 
@@ -911,11 +910,10 @@ u8 * format_dhcp_proxy_server (u8 * s, va_list * args)
   if (rx_fib)
     rx_fib_id = rx_fib->table_id;
 
-  s = format (s, "%=16U%=16U%=14u%=14u%=20s",
+  s = format (s, "%=16U%=16U%=14u%=14u",
               format_ip4_address, &server->dhcp_server,
               format_ip4_address, &server->dhcp_src_address,
-              server_fib_id, rx_fib_id,
-              server->insert_option_82 ? "yes" : "no");
+              server_fib_id, rx_fib_id);
   return s;
 }
 
@@ -925,24 +923,22 @@ dhcp_proxy_show_command_fn (vlib_main_t * vm,
                             vlib_cli_command_t * cmd)
 {
   dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
-  ip4_main_t * im = &ip4_main;
   dhcp_server_t * server;
-  u32 server_index;
-  int i;
+  u32 server_index, i;
 
   vlib_cli_output (vm, "%U", format_dhcp_proxy_server, 0 /* header line */,
                    0, 0);
 
-  for (i = 0; i < vec_len (im->fibs); i++)
-    {
-      if (i < vec_len(dpm->dhcp_server_index_by_rx_fib_index))
-        server_index = dpm->dhcp_server_index_by_rx_fib_index[i];
-      else
-        server_index = 0;
+  vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index)
+  {
+      server_index = dpm->dhcp_server_index_by_rx_fib_index[i];
+      if (~0 == server_index)
+          continue;
+
       server = pool_elt_at_index (dpm->dhcp_servers, server_index);
-      if (server->valid)
-        vlib_cli_output (vm, "%U", format_dhcp_proxy_server, dpm, 
-                         server, i);
+
+      vlib_cli_output (vm, "%U", format_dhcp_proxy_server, dpm, 
+                       server, i);
     }
 
   return 0;
@@ -954,50 +950,104 @@ VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = {
   .function = dhcp_proxy_show_command_fn,
 };
 
+void
+dhcp_proxy_dump (void *opaque,
+                 u32 context)
+{
+  dhcp_proxy_main_t * dpm = &dhcp_proxy_main;
+  ip4_fib_t *s_fib, *r_fib;
+  dhcp_server_t * server;
+  u32 server_index, i;
+  vss_info *v;
+
+  vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index)
+  {
+      server_index = dpm->dhcp_server_index_by_rx_fib_index[i];
+      if (~0 == server_index)
+          continue;
+
+      server = pool_elt_at_index (dpm->dhcp_servers, server_index);
+      v = dhcp_get_vss_info(dpm, i);
+
+      ip46_address_t src_addr = {
+          .ip4 = server->dhcp_src_address,
+      };
+      ip46_address_t server_addr = {
+          .ip4 = server->dhcp_server,
+      };
+
+      s_fib = ip4_fib_get(server->server_fib_index);
+      r_fib = ip4_fib_get(i);
+
+      dhcp_send_details(opaque,
+                        context,
+                        &server_addr,
+                        &src_addr,
+                        s_fib->table_id,
+                        r_fib->table_id,
+                        (v ? v->vpn_id.fib_id : 0),
+                        (v ? v->vpn_id.oui : 0));
+  }
+}
 
-int dhcp_proxy_set_option82_vss(  u32 vrf_id,
-                                  u32 oui,
-                                  u32 fib_id, 
-                                  int is_del)
+int dhcp_proxy_set_option82_vss(u32 tbl_id,
+                                u32 oui,
+                                u32 fib_id, 
+                                int is_del)
 {
   dhcp_proxy_main_t *dm = &dhcp_proxy_main;
-  uword *p;
-  vss_info *a;
-  u32 old_oui=0, old_fib_id=0;
+  vss_info *v = NULL;
+  u32  rx_fib_index;
+  int rc = 0;
   
-  p = hash_get (dm->opt82vss_index_by_vrf_id, vrf_id);
+  rx_fib_index = ip4_fib_table_find_or_create_and_lock(tbl_id);
+  v = dhcp_get_vss_info(dm, rx_fib_index);
 
-  if (p) 
-    {
-      a = pool_elt_at_index (dm->opt82vss, p[0]);
-      if (!a) 
-        return VNET_API_ERROR_NO_SUCH_FIB;
-      old_oui = a->vpn_id.oui;
-      old_fib_id = a->vpn_id.fib_id;
-          
+  if (NULL != v)
+  {
       if (is_del)
-        {
-          if (old_oui == oui &&
-              old_fib_id == fib_id)
-            {
-              pool_put(dm->opt82vss, a);
-              hash_unset (dm->opt82vss_index_by_vrf_id, vrf_id);
-              return 0;
-            }
-          else
-            return VNET_API_ERROR_NO_SUCH_ENTRY;
-        }
-      pool_put(dm->opt82vss, a);
-      hash_unset (dm->opt82vss_index_by_vrf_id, vrf_id);
-  } else if (is_del)
-    return VNET_API_ERROR_NO_SUCH_ENTRY;
-  pool_get (dm->opt82vss, a);
-  memset (a, ~0, sizeof (a[0]));
-  a->vpn_id.oui = oui;
-  a->vpn_id.fib_id = fib_id;
-  hash_set (dm->opt82vss_index_by_vrf_id, vrf_id, a - dm->opt82vss);
+      {
+          /* release the lock held on the table when the VSS
+           * info was created */
+          fib_table_unlock (rx_fib_index,
+                            FIB_PROTOCOL_IP4);
+
+          pool_put (dm->vss, v);
+          dm->vss_index_by_rx_fib_index[rx_fib_index] = ~0;
+      }
+      else
+      {
+          /* this is a modify */
+          v->vpn_id.fib_id = fib_id;
+          v->vpn_id.oui = oui;
+      }
+  }
+  else
+  {
+      if (is_del)
+          rc = VNET_API_ERROR_NO_SUCH_ENTRY;
+      else
+      {
+          /* create a new entry */
+          vec_validate_init_empty(dm->vss_index_by_rx_fib_index,
+                                  rx_fib_index, ~0);
+
+          /* hold a lock on the table whilst the VSS info exist */
+          fib_table_lock (rx_fib_index,
+                          FIB_PROTOCOL_IP4);
+
+          pool_get (dm->vss, v);
+          v->vpn_id.fib_id = fib_id;
+          v->vpn_id.oui = oui;
+          dm->vss_index_by_rx_fib_index[rx_fib_index] = v - dm->vss;
+      }
+  }
+
+  /* Release the lock taken during the create_or_lock at the start */
+  fib_table_unlock (rx_fib_index,
+                    FIB_PROTOCOL_IP4);
   
-  return 0;
+  return (rc);
 }
 
 static clib_error_t *
@@ -1065,20 +1115,20 @@ dhcp_vss_show_command_fn (vlib_main_t * vm,
   
 {
   dhcp_proxy_main_t * dm = &dhcp_proxy_main;
+  ip4_fib_t *fib;
+  u32 *fib_index;
   vss_info *v;
-  u32 oui;
-  u32 fib_id;
-  u32 tbl_id;
-  uword index;
   
   vlib_cli_output (vm, "%=9s%=11s%=12s","Table", "OUI", "VPN-ID");
-  hash_foreach (tbl_id, index, dm->opt82vss_index_by_vrf_id,
+  pool_foreach (fib_index, dm->vss_index_by_rx_fib_index,
   ({
-     v = pool_elt_at_index (dm->opt82vss, index);
-     oui = v->vpn_id.oui;
-     fib_id = v->vpn_id.fib_id;
-     vlib_cli_output (vm, "%=9d 0x%08x%=12d",
-                      tbl_id, oui, fib_id);
+      fib = ip4_fib_get (*fib_index);
+      v = pool_elt_at_index (dm->vss, *fib_index);
+
+      vlib_cli_output (vm, "%=6d%=6d%=12d",
+                       fib->table_id,
+                       v->vpn_id.oui,
+                       v->vpn_id.fib_id);
   }));
   
   return 0;
diff --git a/src/vnet/dhcpv6/proxy.h b/src/vnet/dhcpv6/proxy.h
index 9e18913a..77ced361 100644
--- a/src/vnet/dhcpv6/proxy.h
+++ b/src/vnet/dhcpv6/proxy.h
@@ -48,9 +48,7 @@ typedef union {
 typedef struct {
   ip6_address_t dhcp6_server;
   ip6_address_t dhcp6_src_address;
-  u32 insert_vss;
   u32 server_fib6_index;
-  u32 valid;
 } dhcpv6_server_t;
 
 typedef struct {
@@ -70,7 +68,7 @@ typedef struct {
   dhcpv6_vss_info *vss;
 
   /* hash lookup specific vrf_id -> VSS vector index*/
-  uword  *vss_index_by_vrf_id;
+  u32 *vss_index_by_rx_fib_index;
    
   /* convenience */
   vlib_main_t * vlib_main;
@@ -79,17 +77,18 @@ typedef struct {
 
 dhcpv6_proxy_main_t dhcpv6_proxy_main;
 
-int dhcpv6_proxy_set_server (ip6_address_t *addr, ip6_address_t *src_address,
-                             u32 fib_id, int insert_vss, int is_del);
-
 int dhcpv6_proxy_set_vss(u32 tbl_id,
                          u32 oui,
                          u32 fib_id, 
                          int is_del);
 
-int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address,
-                             u32 rx_fib_id,
-                             u32 server_fib_id,
-                             int insert_vss, int is_del);
+int dhcpv6_proxy_set_server(ip6_address_t *addr,
+                            ip6_address_t *src_address,
+                            u32 rx_fib_id,
+                            u32 server_fib_id,
+                            int is_del);
+
+void dhcpv6_proxy_dump(void *opaque,
+                       u32 context);
 
 #endif /* included_dhcpv6_proxy_h */
diff --git a/src/vnet/dhcpv6/proxy_node.c b/src/vnet/dhcpv6/proxy_node.c
index 4137624c..f40798e6 100644
--- a/src/vnet/dhcpv6/proxy_node.c
+++ b/src/vnet/dhcpv6/proxy_node.c
@@ -18,6 +18,7 @@
 #include <vlib/vlib.h>
 #include <vnet/pg/pg.h>
 #include <vnet/dhcpv6/proxy.h>
+#include <vnet/dhcp/proxy.h>
 #include <vnet/fib/ip6_fib.h>
 #include <vnet/mfib/mfib_table.h>
 #include <vnet/mfib/ip6_mfib.h>
@@ -117,6 +118,42 @@ static inline void copy_ip6_address (ip6_address_t *dst, ip6_address_t *src)
   dst->as_u64[1] = src->as_u64[1];
 } 
 
+static inline dhcpv6_vss_info *
+dhcpv6_get_vss_info (dhcpv6_proxy_main_t *dm,
+                     u32 rx_fib_index)
+{
+  dhcpv6_vss_info *v;
+
+  if (vec_len(dm->vss_index_by_rx_fib_index) <= rx_fib_index ||
+      dm->vss_index_by_rx_fib_index[rx_fib_index] == ~0)
+  {
+      v = NULL;
+  }
+  else
+  {
+      v = pool_elt_at_index (dm->vss,
+                             dm->vss_index_by_rx_fib_index[rx_fib_index]);
+  }
+
+  return (v);
+}
+
+static inline dhcpv6_server_t *
+dhcpv6_get_server (dhcpv6_proxy_main_t *dm,
+                   u32 rx_fib_index)
+{
+  dhcpv6_server_t *s = NULL;
+
+  if (vec_len(dm->dhcp6_server_index_by_rx_fib_index) > rx_fib_index &&
+      dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] != ~0)
+  {
+      s = pool_elt_at_index (dm->dhcp6_servers,
+                             dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index]);
+  }
+
+  return (s);
+}
+
 static uword
 dhcpv6_proxy_to_server_input (vlib_main_t * vm,
                             vlib_node_runtime_t * node,
@@ -132,13 +169,10 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
   u32 pkts_wrong_msg_type=0;
   u32 pkts_too_big=0;
   ip6_main_t * im = &ip6_main;
-  ip6_fib_t * fib;
   ip6_address_t * src;
   int bogus_length;
   dhcpv6_server_t * server;
   u32  rx_fib_idx = 0, server_fib_idx = 0;
-  u32 server_idx;
-  u32 fib_id1 = 0;
 
   next_index = node->cached_next_index;
 
@@ -172,12 +206,8 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
           ethernet_header_t * e_h0;
           u8 client_src_mac[6];
           vlib_buffer_free_list_t *fl;
-
-          uword *p_vss;
-          u32  oui1=0;
           dhcpv6_vss_info *vss;
 
-
 	  bi0 = from[0];
 	  to_next[0] = bi0;
 	  from += 1;
@@ -228,25 +258,15 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
           /* Send to DHCPV6 server via the configured FIB */
           rx_sw_if_index = sw_if_index =  vnet_buffer(b0)->sw_if_index[VLIB_RX];
           rx_fib_idx = im->fib_index_by_sw_if_index [rx_sw_if_index];
+          server = dhcpv6_get_server(dpm, rx_fib_idx);
 
-	  if (vec_len(dpm->dhcp6_server_index_by_rx_fib_index) <= rx_fib_idx)
-	    goto no_server;
-
-	  server_idx = dpm->dhcp6_server_index_by_rx_fib_index[rx_fib_idx];
-
-          if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp6_servers,
-                                                          server_idx)))
-                     {
-                     no_server:
-                       error0 = DHCPV6_PROXY_ERROR_NO_SERVER;
-                       next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
-                       pkts_no_server++;
-                       goto do_trace;
-                     }
-
-          server = pool_elt_at_index(dpm->dhcp6_servers, server_idx);
-          if (server->valid == 0)
-            goto no_server;
+          if (PREDICT_FALSE (NULL == server))
+          {
+              error0 = DHCPV6_PROXY_ERROR_NO_SERVER;
+              next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
+              pkts_no_server++;
+              goto do_trace;
+          }
 
           server_fib_idx = server->server_fib6_index;
           vnet_buffer(b0)->sw_if_index[VLIB_TX] = server_fib_idx;
@@ -331,19 +351,6 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
           id1 = (dhcpv6_int_id_t *) (((uword) ip1) + b0->current_length);
           b0->current_length += (sizeof (*id1));
 
-
-          fib = ip6_fib_get (rx_fib_idx);
-
-          //TODO: Revisit if hash makes sense here
-          p_vss = hash_get (dpm->vss_index_by_vrf_id,
-                            fib->table_id);
-          if (p_vss)
-            {
-              vss = pool_elt_at_index (dpm->vss, p_vss[0]);
-              oui1 =  vss->vpn_id.oui;
-              fib_id1 =  vss->vpn_id.fib_id;
-            }
-
           id1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_INTERFACE_ID);
           id1->opt.length = clib_host_to_net_u16(sizeof(rx_sw_if_index));
           id1->int_idx = clib_host_to_net_u32(rx_sw_if_index);
@@ -360,20 +367,24 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
                clib_memcpy(cmac->data, client_src_mac, 6);
                u1->length += sizeof(*cmac);
             }
-          if (server->insert_vss !=0 ) {
+
+          //TODO: Revisit if hash makes sense here
+          vss = dhcpv6_get_vss_info(dpm, rx_fib_idx);
+
+          if (NULL != vss) {
               vss1 = (dhcpv6_vss_t *) (((uword) ip1) + b0->current_length);
               b0->current_length += (sizeof (*vss1));
               vss1->opt.length =clib_host_to_net_u16(sizeof(*vss1) -
 						     sizeof(vss1->opt));
               vss1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_VSS);
               vss1->data[0] = 1;   // type
-              vss1->data[1] = oui1>>16 & 0xff;
-              vss1->data[2] = oui1>>8  & 0xff;
-              vss1->data[3] = oui1 & 0xff;
-              vss1->data[4] = fib_id1>>24 & 0xff;
-              vss1->data[5] = fib_id1>>16 & 0xff;
-              vss1->data[6] = fib_id1>>8 & 0xff;
-              vss1->data[7] = fib_id1 & 0xff;
+              vss1->data[1] = vss->vpn_id.oui >>16 & 0xff;
+              vss1->data[2] = vss->vpn_id.oui >>8  & 0xff;
+              vss1->data[3] = vss->vpn_id.oui & 0xff;
+              vss1->data[4] = vss->vpn_id.fib_id >> 24 & 0xff;
+              vss1->data[5] = vss->vpn_id.fib_id >> 16 & 0xff;
+              vss1->data[6] = vss->vpn_id.fib_id >> 8 & 0xff;
+              vss1->data[7] = vss->vpn_id.fib_id & 0xff;
               u1->length += sizeof(*vss1);
           }
 
@@ -524,9 +535,8 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm,
       u16 len = 0;
       u8 interface_opt_flag = 0;
       u8 relay_msg_opt_flag = 0;
-      ip6_fib_t * svr_fib;
       ip6_main_t * im = &ip6_main;
-      u32 server_fib_idx, svr_fib_id, client_fib_idx, server_idx;
+      u32 server_fib_idx, client_fib_idx;
 
       bi0 = from[0];
       from += 1;
@@ -608,31 +618,18 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm,
       vlib_buffer_advance (b0, sizeof(*r0));
 
       client_fib_idx = im->fib_index_by_sw_if_index[sw_if_index];
-      if (client_fib_idx < vec_len(dm->dhcp6_server_index_by_rx_fib_index))
-    	  server_idx = dm->dhcp6_server_index_by_rx_fib_index[client_fib_idx];
-      else
-    	  server_idx = 0;
-
-      if (PREDICT_FALSE (pool_is_free_index (dm->dhcp6_servers, server_idx)))
-        {
-          error0 = DHCPV6_PROXY_ERROR_WRONG_INTERFACE_ID_OPTION;
-          goto drop_packet;
-        }
+      server = dhcpv6_get_server(dm, client_fib_idx);
 
-      server = pool_elt_at_index (dm->dhcp6_servers, server_idx);
-      if (server->valid == 0)
+      if (NULL == server)
       {
     	  error0 = DHCPV6_PROXY_ERROR_NO_SERVER;
           goto drop_packet;
       }
 
-
       server_fib_idx = im->fib_index_by_sw_if_index
           [vnet_buffer(b0)->sw_if_index[VLIB_RX]];
-      svr_fib = ip6_fib_get (server_fib_idx);
-      svr_fib_id = svr_fib->table_id;
 
-      if (svr_fib_id != server->server_fib6_index ||
+      if (server_fib_idx != server->server_fib6_index ||
           ip0->src_address.as_u64[0] != server->dhcp6_server.as_u64[0] ||
           ip0->src_address.as_u64[1] != server->dhcp6_server.as_u64[1])
         {
@@ -760,7 +757,7 @@ clib_error_t * dhcpv6_proxy_init (vlib_main_t * vm)
   error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
   dm->error_drop_node_index = error_drop_node->index;
 
-  dm->vss_index_by_vrf_id = hash_create (0, sizeof (uword));
+  dm->vss_index_by_rx_fib_index = NULL;
 
   /* RFC says this is the dhcpv6 server address  */
   dm->all_dhcpv6_server_address.as_u64[0] = clib_host_to_net_u64 (0xFF05000000000000);
@@ -785,121 +782,138 @@ clib_error_t * dhcpv6_proxy_init (vlib_main_t * vm)
 
 VLIB_INIT_FUNCTION (dhcpv6_proxy_init);
 
-/* Old API, manipulates a single server (only) shared by all Rx VRFs */
-int dhcpv6_proxy_set_server (ip6_address_t *addr, ip6_address_t *src_address,
-                             u32 fib_id, int insert_vss, int is_del)
-{
-	return dhcpv6_proxy_set_server_2 (addr, src_address,
-			0, fib_id,
-			insert_vss, is_del);
-}
-
-int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address,
-                               u32 rx_fib_id, u32 server_fib_id,
-                               int insert_vss, int is_del)
+int dhcpv6_proxy_set_server (ip6_address_t *addr,
+                             ip6_address_t *src_address,
+                             u32 rx_fib_id,
+                             u32 server_fib_id,
+                             int is_del)
 {
   dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main;
   dhcpv6_server_t * server = 0;
-  u32 server_fib_index = 0;
   u32 rx_fib_index = 0;
+  int rc = 0;
 
   rx_fib_index = ip6_mfib_table_find_or_create_and_lock(rx_fib_id);
-  server_fib_index = ip6_fib_table_find_or_create_and_lock(server_fib_id);
-
-  if (is_del)
-      {
-
-	  if (rx_fib_index >= vec_len(dm->dhcp6_server_index_by_rx_fib_index))
-		  return VNET_API_ERROR_NO_SUCH_ENTRY;
 
-	  server_fib_index = dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index];
-
-	  dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = 0;
-	  server = pool_elt_at_index (dm->dhcp6_servers, server_fib_index);
-	  memset (server, 0, sizeof (*server));
-	  pool_put (dm->dhcp6_servers, server);
-	  return 0;
+  const mfib_prefix_t all_dhcp_servers = {
+      .fp_len = 128,
+      .fp_proto = FIB_PROTOCOL_IP6,
+      .fp_grp_addr = {
+          .ip6 = dm->all_dhcpv6_server_relay_agent_address,
       }
+  };
 
-  if (addr->as_u64[0] == 0 &&
-        addr->as_u64[1] == 0 )
-      return VNET_API_ERROR_INVALID_DST_ADDRESS;
-
-    if (src_address->as_u64[0] == 0 &&
-        src_address->as_u64[1] == 0)
-      return VNET_API_ERROR_INVALID_SRC_ADDRESS;
-
-  if (rx_fib_id == 0)
+  if (is_del)
     {
-      server = pool_elt_at_index (dm->dhcp6_servers, 0);
-      if (server->valid)
-          goto reconfigure_it;
-      else
-          goto initialize_it;
-    }
+      server = dhcpv6_get_server(dm, rx_fib_index);
 
-  if (rx_fib_index < vec_len(dm->dhcp6_server_index_by_rx_fib_index))
-    {
-      server_fib_index = dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index];
-      if (server_fib_index != 0)
+      if (NULL == server)
         {
-          server = pool_elt_at_index (dm->dhcp6_servers, server_fib_index);
-          goto initialize_it;
+          rc = VNET_API_ERROR_NO_SUCH_ENTRY;
+          goto out;
         }
-    }
 
-  /*Allocate a new server*/
-  pool_get (dm->dhcp6_servers, server);
-
-  initialize_it:
-  {
-      const mfib_prefix_t all_dhcp_servers = {
-          .fp_len = 128,
-          .fp_proto = FIB_PROTOCOL_IP6,
-          .fp_grp_addr = {
-              .ip6 = dm->all_dhcpv6_server_relay_agent_address,
-          }
-      };
-      const fib_route_path_t path_for_us = {
-          .frp_proto = FIB_PROTOCOL_IP6,
-          .frp_addr = zero_addr,
-          .frp_sw_if_index = 0xffffffff,
-          .frp_fib_index = ~0,
-          .frp_weight = 0,
-          .frp_flags = FIB_ROUTE_PATH_LOCAL,
-      };
-      mfib_table_entry_path_update(rx_fib_index,
-                                   &all_dhcp_servers,
-                                   MFIB_SOURCE_DHCP,
-                                   &path_for_us,
-                                   MFIB_ITF_FLAG_FORWARD);
       /*
-       * Each interface that is enabled in this table, needs to be added
-       * as an accepting interface, but this is not easily doable in VPP.
-       * So we cheat. Add a flag to the entry that indicates accept form
-       * any interface.
-       * We will still only accept on v6 enabled interfaces, since the input
-       * feature ensures this.
+       * release the locks held on the server fib and rx mfib
        */
-      mfib_table_entry_update(rx_fib_index,
+      mfib_table_entry_delete(rx_fib_index,
                               &all_dhcp_servers,
-                              MFIB_SOURCE_DHCP,
-                              MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
-  }
+                              MFIB_SOURCE_DHCP);
+      mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6);
+      fib_table_unlock(server->server_fib6_index, FIB_PROTOCOL_IP6);
 
-reconfigure_it:
+      dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = ~0;
 
-  copy_ip6_address(&server->dhcp6_server, addr);
-  copy_ip6_address(&server->dhcp6_src_address, src_address);
-  server->server_fib6_index = server_fib_index;
-  server->valid = 1;
-  server->insert_vss = insert_vss;
+      memset (server, 0, sizeof (*server));
+      pool_put (dm->dhcp6_servers, server);
+    }
+  else
+    {
+      if (addr->as_u64[0] == 0 &&
+          addr->as_u64[1] == 0 )
+      {
+          rc = VNET_API_ERROR_INVALID_DST_ADDRESS;
+          goto out;
+      }
+      if (src_address->as_u64[0] == 0 &&
+          src_address->as_u64[1] == 0)
+      {
+          rc = VNET_API_ERROR_INVALID_SRC_ADDRESS;
+          goto out;
+      }
 
-  vec_validate (dm->dhcp6_server_index_by_rx_fib_index, rx_fib_index);
-  dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] =
-		  server - dm->dhcp6_servers;
+      server = dhcpv6_get_server(dm, rx_fib_index);
 
-  return 0;
+      if (NULL != server)
+        {
+          /* modify of an existing entry */
+          ip6_fib_t *fib;
+
+          fib = ip6_fib_get(server->server_fib6_index);
+
+          if (fib->table_id != server_fib_id)
+            {
+              /* swap tables */
+              fib_table_unlock(server->server_fib6_index, FIB_PROTOCOL_IP6);
+              server->server_fib6_index =
+                  ip6_fib_table_find_or_create_and_lock(server_fib_id);
+            }
+        }
+      else
+        {
+          /* Allocate a new server */
+          pool_get (dm->dhcp6_servers, server);
+
+          vec_validate_init_empty (dm->dhcp6_server_index_by_rx_fib_index,
+                                   rx_fib_index, ~0);
+          dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] =
+              server - dm->dhcp6_servers;
+
+          server->server_fib6_index =
+              ip6_fib_table_find_or_create_and_lock(server_fib_id);
+          mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6);
+
+          const mfib_prefix_t all_dhcp_servers = {
+              .fp_len = 128,
+              .fp_proto = FIB_PROTOCOL_IP6,
+              .fp_grp_addr = {
+                  .ip6 = dm->all_dhcpv6_server_relay_agent_address,
+              }
+            };
+          const fib_route_path_t path_for_us = {
+              .frp_proto = FIB_PROTOCOL_IP6,
+              .frp_addr = zero_addr,
+              .frp_sw_if_index = 0xffffffff,
+              .frp_fib_index = ~0,
+              .frp_weight = 0,
+              .frp_flags = FIB_ROUTE_PATH_LOCAL,
+          };
+          mfib_table_entry_path_update(rx_fib_index,
+                                       &all_dhcp_servers,
+                                       MFIB_SOURCE_DHCP,
+                                       &path_for_us,
+                                       MFIB_ITF_FLAG_FORWARD);
+          /*
+           * Each interface that is enabled in this table, needs to be added
+           * as an accepting interface, but this is not easily doable in VPP.
+           * So we cheat. Add a flag to the entry that indicates accept form
+           * any interface.
+           * We will still only accept on v6 enabled interfaces, since the
+           * input feature ensures this.
+           */
+          mfib_table_entry_update(rx_fib_index,
+                                  &all_dhcp_servers,
+                                  MFIB_SOURCE_DHCP,
+                                  MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
+        }
+      copy_ip6_address(&server->dhcp6_server, addr);
+      copy_ip6_address(&server->dhcp6_src_address, src_address);
+  }
+
+out:
+  mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6);
+
+  return (rc);
 }
 
 static clib_error_t *
@@ -910,7 +924,7 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm,
   ip6_address_t addr, src_addr;
   int set_server = 0, set_src_address = 0;
   u32 rx_fib_id = 0, server_fib_id = 0;
-  int is_del = 0, add_vss = 0;
+  int is_del = 0;
 
   while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
     {
@@ -924,9 +938,6 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm,
         ;
        else if (unformat (input, "rx-fib-id %d", &rx_fib_id))
          ;
-       else if (unformat (input, "add-vss-option")
-               || unformat (input, "insert-option"))
-          add_vss = 1;
        else if (unformat (input, "delete") ||
                 unformat (input, "del"))
            is_del = 1;
@@ -938,8 +949,8 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm,
   {
       int rv;
 
-      rv = dhcpv6_proxy_set_server_2 (&addr, &src_addr, rx_fib_id,
-    		  server_fib_id, add_vss, is_del);
+      rv = dhcpv6_proxy_set_server (&addr, &src_addr, rx_fib_id,
+                                    server_fib_id, is_del);
 
       //TODO: Complete the errors
       switch (rv)
@@ -962,7 +973,7 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm,
 VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = {
   .path = "set dhcpv6 proxy",
   .short_help = "set dhcpv6 proxy [del] server <ipv6-addr> src-address <ipv6-addr> "
-		  "[add-vss-option] [server-fib-id <fib-id>] [rx-fib-id <fib-id>] ",
+		  "[server-fib-id <fib-id>] [rx-fib-id <fib-id>] ",
   .function = dhcpv6_proxy_set_command_fn,
 };
 
@@ -976,8 +987,8 @@ u8 * format_dhcpv6_proxy_server (u8 * s, va_list * args)
 
   if (dm == 0)
     {
-      s = format (s, "%=40s%=40s%=14s%=14s%=20s", "Server Address", "Source Address",
-                  "Server FIB", "RX FIB", "Insert VSS Option");
+      s = format (s, "%=40s%=40s%=14s%=14s", "Server Address", "Source Address",
+                  "Server FIB", "RX FIB");
       return s;
     }
 
@@ -990,11 +1001,10 @@ u8 * format_dhcpv6_proxy_server (u8 * s, va_list * args)
   if (rx_fib)
 	  rx_fib_id = rx_fib->table_id;
 
-  s = format (s, "%=40U%=40U%=14u%=14u%=20s",
+  s = format (s, "%=40U%=40U%=14u%=14u",
               format_ip6_address, &server->dhcp6_server,
               format_ip6_address, &server->dhcp6_src_address,
-			  server_fib_id, rx_fib_id,
-			                server->insert_vss ? "yes" : "no");
+			  server_fib_id, rx_fib_id);
   return s;
 }
 
@@ -1003,25 +1013,25 @@ dhcpv6_proxy_show_command_fn (vlib_main_t * vm,
                             unformat_input_t * input,
                             vlib_cli_command_t * cmd)
 {
-  dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main;
-  ip6_main_t * im = &ip6_main;
+  dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main;
   int i;
   u32 server_index;
   dhcpv6_server_t * server;
 
   vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, 0 /* header line */,
 		  0, 0);
-  for (i = 0; i < vec_len (im->fibs); i++)
-      {
-        if (i < vec_len(dm->dhcp6_server_index_by_rx_fib_index))
-          server_index = dm->dhcp6_server_index_by_rx_fib_index[i];
-        else
-          server_index = 0;
-        server = pool_elt_at_index (dm->dhcp6_servers, server_index);
-        if (server->valid)
-          vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, dm,
-		  server, i);
-      }
+  vec_foreach_index (i, dpm->dhcp6_server_index_by_rx_fib_index)
+  {
+      server_index = dpm->dhcp6_server_index_by_rx_fib_index[i];
+      if (~0 == server_index)
+          continue;
+
+      server = pool_elt_at_index (dpm->dhcp6_servers, server_index);
+
+      vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, dpm, 
+                       server, i);
+    }
+
   return 0;
 }
 
@@ -1031,51 +1041,104 @@ VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = {
   .function = dhcpv6_proxy_show_command_fn,
 };
 
+void
+dhcpv6_proxy_dump (void *opaque,
+                   u32 context)
+{
+  dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main;
+  ip6_fib_t *s_fib, *r_fib;
+  dhcpv6_server_t * server;
+  u32 server_index, i;
+  dhcpv6_vss_info *v;
+
+  vec_foreach_index (i, dpm->dhcp6_server_index_by_rx_fib_index)
+  {
+      server_index = dpm->dhcp6_server_index_by_rx_fib_index[i];
+      if (~0 == server_index)
+          continue;
+
+      server = pool_elt_at_index (dpm->dhcp6_servers, server_index);
+      v = dhcpv6_get_vss_info(dpm, i);
+
+      ip46_address_t src_addr = {
+          .ip6 = server->dhcp6_src_address,
+      };
+      ip46_address_t server_addr = {
+          .ip6 = server->dhcp6_server,
+      };
+
+      s_fib = ip6_fib_get(server->server_fib6_index);
+      r_fib = ip6_fib_get(i);
+
+      dhcp_send_details(opaque,
+                        context,
+                        &server_addr,
+                        &src_addr,
+                        s_fib->table_id,
+                        r_fib->table_id,
+                        (v ? v->vpn_id.fib_id : 0),
+                        (v ? v->vpn_id.oui : 0));
+  }
+}
+
 int dhcpv6_proxy_set_vss(u32 tbl_id,
                          u32 oui,
                          u32 fib_id,
                          int is_del)
 {
   dhcpv6_proxy_main_t *dm = &dhcpv6_proxy_main;
-  u32 old_oui, old_fib_id;
-  uword *p;
-  dhcpv6_vss_info *v;
+  dhcpv6_vss_info *v = NULL;
+  u32  rx_fib_index;
+  int rc = 0;
 
-  p = hash_get (dm->vss_index_by_vrf_id, tbl_id);
+  rx_fib_index = ip6_fib_table_find_or_create_and_lock(tbl_id);
+  v = dhcpv6_get_vss_info(dm, rx_fib_index);
 
-  if (p) {
-      v = pool_elt_at_index (dm->vss, p[0]);
-      if (!v)
-        return VNET_API_ERROR_NO_SUCH_FIB;
-
-      old_oui = v->vpn_id.oui;
-      old_fib_id = v->vpn_id.fib_id;
+  if (NULL != v)
+  {
+      if (is_del)
+      {
+          /* release the lock held on the table when the VSS
+           * info was created */
+          fib_table_unlock (rx_fib_index,
+                            FIB_PROTOCOL_IP6);
 
+          pool_put (dm->vss, v);
+          dm->vss_index_by_rx_fib_index[rx_fib_index] = ~0;
+      }
+      else
+      {
+          /* this is a modify */
+          v->vpn_id.fib_id = fib_id;
+          v->vpn_id.oui = oui;
+      }
+  }
+  else
+  {
       if (is_del)
+          rc = VNET_API_ERROR_NO_SUCH_ENTRY;
+      else
       {
-          if (old_oui == oui &&
-              old_fib_id == fib_id )
-          {
-              pool_put(dm->vss, v);
-              hash_unset (dm->vss_index_by_vrf_id, tbl_id);
-              return 0;
-          }
-          else
-            return VNET_API_ERROR_NO_SUCH_ENTRY;
+          /* create a new entry */
+          vec_validate_init_empty(dm->vss_index_by_rx_fib_index,
+                                  rx_fib_index, ~0);
+
+          /* hold a lock on the table whilst the VSS info exist */
+          fib_table_lock (rx_fib_index,
+                          FIB_PROTOCOL_IP6);
+
+          pool_get (dm->vss, v);
+          v->vpn_id.fib_id = fib_id;
+          v->vpn_id.oui = oui;
+          dm->vss_index_by_rx_fib_index[rx_fib_index] = v - dm->vss;
       }
+  }
 
-      pool_put(dm->vss, v);
-      hash_unset (dm->vss_index_by_vrf_id, tbl_id);
-  } else if (is_del)
-    return VNET_API_ERROR_NO_SUCH_ENTRY;
-
-  pool_get (dm->vss, v);
-  memset (v, ~0, sizeof (*v));
-  v->vpn_id.fib_id = fib_id;
-  v->vpn_id.oui = oui;
-  hash_set (dm->vss_index_by_vrf_id, tbl_id, v - dm->vss);
+  /* Release the lock taken during the create_or_lock at the start */
+  fib_table_unlock (rx_fib_index,
+                    FIB_PROTOCOL_IP6);
 
-  return 0;
+  return (rc);
 }
 
 
@@ -1147,19 +1210,19 @@ dhcpv6_vss_show_command_fn (vlib_main_t * vm,
 {
   dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main;
   dhcpv6_vss_info *v;
-  u32 oui;
-  u32 fib_id;
-  u32 tbl_id;
-  uword index;
+  ip6_fib_t *fib;
+  u32 *fib_index;
 
   vlib_cli_output (vm, "%=6s%=6s%=12s","Table", "OUI", "VPN ID");
-  hash_foreach (tbl_id, index, dm->vss_index_by_vrf_id,
+  pool_foreach (fib_index, dm->vss_index_by_rx_fib_index,
   ({
-     v = pool_elt_at_index (dm->vss, index);
-     oui = v->vpn_id.oui;
-     fib_id = v->vpn_id.fib_id;
-     vlib_cli_output (vm, "%=6d%=6d%=12d",
-                      tbl_id, oui, fib_id);
+      fib = ip6_fib_get (*fib_index);
+      v = pool_elt_at_index (dm->vss, *fib_index);
+
+      vlib_cli_output (vm, "%=6d%=6d%=12d",
+                       fib->table_id,
+                       v->vpn_id.oui,
+                       v->vpn_id.fib_id);
   }));
 
   return 0;
diff --git a/src/vnet/dpo/receive_dpo.c b/src/vnet/dpo/receive_dpo.c
index 2b2571c6..83e33ed8 100644
--- a/src/vnet/dpo/receive_dpo.c
+++ b/src/vnet/dpo/receive_dpo.c
@@ -102,6 +102,11 @@ format_receive_dpo (u8 *s, va_list *ap)
     vnet_main_t * vnm = vnet_get_main();
     receive_dpo_t *rd;
 
+    if (pool_is_free_index(receive_dpo_pool, index))
+    {
+        return (format(s, "dpo-receive DELETED"));
+    }
+
     rd = receive_dpo_get(index);
 
     if (~0 != rd->rd_sw_if_index)
diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c
index a7dca989..70b4e4c9 100644
--- a/src/vpp/api/custom_dump.c
+++ b/src/vpp/api/custom_dump.c
@@ -772,37 +772,6 @@ static void *vl_api_dhcp_proxy_config_t_print
 {
   u8 *s;
 
-  s = format (0, "SCRIPT: dhcp_proxy_config ");
-
-  s = format (s, "vrf_id %d ", ntohl (mp->vrf_id));
-
-  if (mp->is_ipv6)
-    {
-      s = format (s, "svr %U ", format_ip6_address,
-		  (ip6_address_t *) mp->dhcp_server);
-      s = format (s, "src %U ", format_ip6_address,
-		  (ip6_address_t *) mp->dhcp_src_address);
-    }
-  else
-    {
-      s = format (s, "svr %U ", format_ip4_address,
-		  (ip4_address_t *) mp->dhcp_server);
-      s = format (s, "src %U ", format_ip4_address,
-		  (ip4_address_t *) mp->dhcp_src_address);
-    }
-  if (mp->is_add == 0)
-    s = format (s, "del ");
-
-  s = format (s, "insert-cid %d ", mp->insert_circuit_id);
-
-  FINISH;
-}
-
-static void *vl_api_dhcp_proxy_config_2_t_print
-  (vl_api_dhcp_proxy_config_2_t * mp, void *handle)
-{
-  u8 *s;
-
   s = format (0, "SCRIPT: dhcp_proxy_config_2 ");
 
   s = format (s, "rx_vrf_id %d ", ntohl (mp->rx_vrf_id));
@@ -825,8 +794,6 @@ static void *vl_api_dhcp_proxy_config_2_t_print
   if (mp->is_add == 0)
     s = format (s, "del ");
 
-  s = format (s, "insert-cid %d ", mp->insert_circuit_id);
-
   FINISH;
 }
 
@@ -2954,7 +2921,6 @@ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump)                               \
 _(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table)	\
 _(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables)	\
 _(ADD_NODE_NEXT, add_node_next)						\
-_(DHCP_PROXY_CONFIG_2, dhcp_proxy_config_2)	                        \
 _(DHCP_CLIENT_CONFIG, dhcp_client_config)	                        \
 _(L2TPV3_CREATE_TUNNEL, l2tpv3_create_tunnel)                           \
 _(L2TPV3_SET_TUNNEL_COOKIES, l2tpv3_set_tunnel_cookies)                 \
diff --git a/test/test_dhcp.py b/test/test_dhcp.py
index 04ab2e11..fbfb8a0c 100644
--- a/test/test_dhcp.py
+++ b/test/test_dhcp.py
@@ -65,7 +65,7 @@ class TestDHCP(VppTestCase):
         for i in self.pg_interfaces:
             i.assert_nothing_captured(remark=remark)
 
-    def validate_option_82(self, pkt, intf, ip_addr):
+    def validate_relay_options(self, pkt, intf, ip_addr, fib_id, oui):
         dhcp = pkt[DHCP]
         found = 0
         data = []
@@ -77,7 +77,10 @@ class TestDHCP(VppTestCase):
                     # There are two sb-options present - each of length 6.
                     #
                     data = i[1]
-                    self.assertEqual(len(data), 12)
+                    if oui != 0:
+                        self.assertEqual(len(data), 24)
+                    else:
+                        self.assertEqual(len(data), 12)
 
                     #
                     # First sub-option is ID 1, len 4, then encoded
@@ -107,12 +110,30 @@ class TestDHCP(VppTestCase):
                     self.assertEqual(data[10], claddr[2])
                     self.assertEqual(data[11], claddr[3])
 
+                    if oui != 0:
+                        # sub-option 151 encodes the 3 byte oui
+                        # and the 4 byte fib_id
+                        self.assertEqual(ord(data[12]), 151)
+                        self.assertEqual(ord(data[13]), 8)
+                        self.assertEqual(ord(data[14]), 1)
+                        self.assertEqual(ord(data[15]), 0)
+                        self.assertEqual(ord(data[16]), 0)
+                        self.assertEqual(ord(data[17]), oui)
+                        self.assertEqual(ord(data[18]), 0)
+                        self.assertEqual(ord(data[19]), 0)
+                        self.assertEqual(ord(data[20]), 0)
+                        self.assertEqual(ord(data[21]), fib_id)
+
+                        # VSS control sub-option
+                        self.assertEqual(ord(data[22]), 152)
+                        self.assertEqual(ord(data[23]), 0)
+
                     found = 1
         self.assertTrue(found)
 
         return data
 
-    def verify_dhcp_offer(self, pkt, intf, check_option_82=True):
+    def verify_dhcp_offer(self, pkt, intf):
         ether = pkt[Ether]
         self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff")
         self.assertEqual(ether.src, intf.local_mac)
@@ -134,11 +155,9 @@ class TestDHCP(VppTestCase):
                     is_offer = True
         self.assertTrue(is_offer)
 
-        if check_option_82:
-            data = self.validate_option_82(pkt, intf, intf.local_ip4)
+        data = self.validate_relay_options(pkt, intf, intf.local_ip4, 0, 0)
 
-    def verify_dhcp_discover(self, pkt, intf, src_intf=None,
-                             option_82_present=True):
+    def verify_dhcp_discover(self, pkt, intf, src_intf=None, fib_id=0, oui=0):
         ether = pkt[Ether]
         self.assertEqual(ether.dst, intf.remote_mac)
         self.assertEqual(ether.src, intf.local_mac)
@@ -161,13 +180,10 @@ class TestDHCP(VppTestCase):
                     is_discover = True
         self.assertTrue(is_discover)
 
-        if option_82_present:
-            data = self.validate_option_82(pkt, src_intf, src_intf.local_ip4)
-            return data
-        else:
-            for i in dhcp.options:
-                if type(i) is tuple:
-                    self.assertNotEqual(i[0], "relay_agent_Information")
+        data = self.validate_relay_options(pkt, src_intf,
+                                           src_intf.local_ip4,
+                                           fib_id, oui)
+        return data
 
     def verify_dhcp6_solicit(self, pkt, intf,
                              peer_ip, peer_mac,
@@ -193,18 +209,19 @@ class TestDHCP(VppTestCase):
         self.assertEqual(cll.lltype, 1)
         self.assertEqual(cll.clladdr, peer_mac)
 
-        vss = pkt[DHCP6OptVSS]
-        self.assertEqual(vss.optlen, 8)
-        self.assertEqual(vss.type, 1)
-        # the OUI and FIB-id are really 3 and 4 bytes resp.
-        # but the tested range is small
-        self.assertEqual(ord(vss.data[0]), 0)
-        self.assertEqual(ord(vss.data[1]), 0)
-        self.assertEqual(ord(vss.data[2]), oui)
-        self.assertEqual(ord(vss.data[3]), 0)
-        self.assertEqual(ord(vss.data[4]), 0)
-        self.assertEqual(ord(vss.data[5]), 0)
-        self.assertEqual(ord(vss.data[6]), fib_id)
+        if fib_id != 0:
+            vss = pkt[DHCP6OptVSS]
+            self.assertEqual(vss.optlen, 8)
+            self.assertEqual(vss.type, 1)
+            # the OUI and FIB-id are really 3 and 4 bytes resp.
+            # but the tested range is small
+            self.assertEqual(ord(vss.data[0]), 0)
+            self.assertEqual(ord(vss.data[1]), 0)
+            self.assertEqual(ord(vss.data[2]), oui)
+            self.assertEqual(ord(vss.data[3]), 0)
+            self.assertEqual(ord(vss.data[4]), 0)
+            self.assertEqual(ord(vss.data[5]), 0)
+            self.assertEqual(ord(vss.data[6]), fib_id)
 
         # the relay message should be an encoded Solicit
         msg = pkt[DHCP6OptRelayMsg]
@@ -267,29 +284,16 @@ class TestDHCP(VppTestCase):
                                     rx_table_id=0)
 
         #
-        # Now a DHCP request on pg2, which is in the same VRF
-        # as the DHCP config, will result in a relayed DHCP
-        # message to the [fake] server
-        #
-        self.pg2.add_stream(pkts_disc_vrf0)
-        self.pg_enable_capture(self.pg_interfaces)
-        self.pg_start()
-
-        rx = self.pg0.get_capture(1)
-        rx = rx[0]
-
-        #
-        # Rx'd packet should be to the server address and from the configured
-        # source address
-        # UDP source ports are unchanged
-        # we've no option 82 config so that should be absent
+        # Discover packets from the client are dropped because there is no
+        # IP address configured on the client facing interface
         #
-        self.verify_dhcp_discover(rx, self.pg0, option_82_present=False)
+        self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0,
+                                        "Discover DHCP no relay address")
 
         #
         # Inject a response from the server
-        #  VPP will only relay the offer if option 82 is present.
-        #  so this one is dropped
+        #  dropped, because there is no IP addrees on the
+        #  clinet interfce to fill in the option.
         #
         p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
              IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) /
@@ -298,24 +302,8 @@ class TestDHCP(VppTestCase):
              DHCP(options=[('message-type', 'offer'), ('end')]))
         pkts = [p]
 
-        self.send_and_assert_no_replies(self.pg0, pkts,
-                                        "DHCP offer no option 82")
-
-        #
-        # Configure sending option 82 in relayed messages
-        #
-        self.vapi.dhcp_proxy_config(server_addr,
-                                    src_addr,
-                                    rx_table_id=0,
-                                    insert_circuit_id=1)
-
-        #
-        # Send a request:
-        #  again dropped, but ths time because there is no IP addrees on the
-        #  clinet interfce to fill in the option.
-        #
-        self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0,
-                                        "DHCP no relay address")
+        self.send_and_assert_no_replies(self.pg2, pkts,
+                                        "Offer DHCP no relay address")
 
         #
         # configure an IP address on the client facing interface
@@ -376,15 +364,8 @@ class TestDHCP(VppTestCase):
                            ('relay_agent_Information', bad_ip),
                            ('end')]))
         pkts = [p]
-
-        self.pg0.add_stream(pkts)
-        self.pg_enable_capture(self.pg_interfaces)
-        self.pg_start()
-        rx = self.pg2.get_capture(1)
-        rx = rx[0]
-
-        self.verify_dhcp_offer(rx, self.pg2, check_option_82=False)
-        self.pg0.assert_nothing_captured(remark="")
+        self.send_and_assert_no_replies(self.pg0, pkts,
+                                        "DHCP offer option 82 bad address")
 
         # 2. Not a sw_if_index VPP knows
         bad_if_index = option_82[0:2] + chr(33) + option_82[3:]
@@ -413,8 +394,7 @@ class TestDHCP(VppTestCase):
         self.vapi.dhcp_proxy_config(server_addr,
                                     src_addr,
                                     rx_table_id=0,
-                                    is_add=0,
-                                    insert_circuit_id=1)
+                                    is_add=0)
 
         self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0,
                                         "DHCP config removed VRF 0")
@@ -429,8 +409,7 @@ class TestDHCP(VppTestCase):
         self.vapi.dhcp_proxy_config(server_addr,
                                     src_addr,
                                     rx_table_id=1,
-                                    server_table_id=1,
-                                    insert_circuit_id=1)
+                                    server_table_id=1)
 
         #
         # Confim DHCP requests ok in VRF 1.
@@ -452,14 +431,41 @@ class TestDHCP(VppTestCase):
         rx = rx[0]
         self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3)
 
+        #
+        # Add VSS config
+        #  table=1, fib=id=1, oui=4
+        self.vapi.dhcp_proxy_set_vss(1, 1, 4)
+
+        self.pg3.add_stream(pkts_disc_vrf1)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg1.get_capture(1)
+        rx = rx[0]
+        self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3,
+                                  fib_id=1, oui=4)
+
+        #
+        # Remove the VSS config
+        #  relayed DHCP has default vlaues in the option.
+        #
+        self.vapi.dhcp_proxy_set_vss(1, 1, 4, is_add=0)
+
+        self.pg3.add_stream(pkts_disc_vrf1)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg1.get_capture(1)
+        rx = rx[0]
+        self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3)
+
         #
         # remove DHCP config to cleanup
         #
         self.vapi.dhcp_proxy_config(server_addr,
                                     src_addr,
                                     rx_table_id=1,
-                                    server_table_id=1,
-                                    insert_circuit_id=1,
+                                    server_table_id=11,
                                     is_add=0)
 
         self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0,
@@ -510,7 +516,6 @@ class TestDHCP(VppTestCase):
                                     src_addr_vrf0,
                                     rx_table_id=0,
                                     server_table_id=0,
-                                    insert_circuit_id=1,
                                     is_ipv6=1)
 
         self.send_and_assert_no_replies(self.pg2, pkts_solicit_vrf0,
@@ -630,7 +635,6 @@ class TestDHCP(VppTestCase):
                                     src_addr_vrf1,
                                     rx_table_id=1,
                                     server_table_id=1,
-                                    insert_circuit_id=1,
                                     is_ipv6=1)
         self.pg3.config_ip6()
 
@@ -708,14 +712,12 @@ class TestDHCP(VppTestCase):
                                     src_addr_vrf1,
                                     rx_table_id=1,
                                     server_table_id=1,
-                                    insert_circuit_id=1,
                                     is_ipv6=1,
                                     is_add=0)
         self.vapi.dhcp_proxy_config(server_addr_vrf1,
                                     src_addr_vrf1,
                                     rx_table_id=0,
                                     server_table_id=0,
-                                    insert_circuit_id=1,
                                     is_ipv6=1,
                                     is_add=0)
 
diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py
index 32680424..59e58ad0 100644
--- a/test/vpp_papi_provider.py
+++ b/test/vpp_papi_provider.py
@@ -1240,16 +1240,14 @@ class VppPapiProvider(object):
                           rx_table_id=0,
                           server_table_id=0,
                           is_add=1,
-                          is_ipv6=0,
-                          insert_circuit_id=0):
+                          is_ipv6=0):
         return self.api(
-            self.papi.dhcp_proxy_config_2,
+            self.papi.dhcp_proxy_config,
             {
                 'rx_vrf_id': rx_table_id,
                 'server_vrf_id': server_table_id,
                 'is_ipv6': is_ipv6,
                 'is_add': is_add,
-                'insert_circuit_id': insert_circuit_id,
                 'dhcp_server': dhcp_server,
                 'dhcp_src_address': dhcp_src_address,
             })
-- 
cgit 1.2.3-korg


From 26cd8c129567b48ed0e3610293251ca78fa67103 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Thu, 23 Feb 2017 17:11:26 -0500
Subject: VPP-650: handle buffer failure in vlib_buffer_copy(...)

Change-Id: I6aac48d780fcd935818221044eae50067f225175
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlib/buffer_funcs.h          | 11 ++++++++++-
 src/vnet/dpo/replicate_dpo.c     | 32 +++++++++++++++++++++++++++++++-
 src/vnet/lawful-intercept/node.c | 18 ++++++++++++++----
 src/vnet/span/node.c             | 12 ++++++++----
 4 files changed, 63 insertions(+), 10 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index fd051de5..0b583a61 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -489,7 +489,15 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b)
 
   vec_validate (new_buffers, n_buffers - 1);
   n_alloc = vlib_buffer_alloc (vm, new_buffers, n_buffers);
-  ASSERT (n_alloc == n_buffers);
+
+  /* No guarantee that we'll get all the buffers we asked for */
+  if (PREDICT_FALSE (n_alloc < n_buffers))
+    {
+      if (n_alloc > 0)
+	vlib_buffer_free (vm, new_buffers, n_alloc);
+      vec_free (new_buffers);
+      return 0;
+    }
 
   /* 1st segment */
   s = b;
@@ -518,6 +526,7 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b)
       d->flags = s->flags & flag_mask;
     }
 
+  vec_free (new_buffers);
   return fd;
 }
 
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index c779516f..a67b19c8 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -34,6 +34,21 @@
 #define REP_DBG(_p, _fmt, _args...)
 #endif
 
+#define foreach_replicate_dpo_error                       \
+_(BUFFER_ALLOCATION_FAILURE, "Buffer Allocation Failure")
+
+typedef enum {
+#define _(sym,str) REPLICATE_DPO_ERROR_##sym,
+  foreach_replicate_dpo_error
+#undef _
+  REPLICATE_DPO_N_ERROR,
+} replicate_dpo_error_t;
+
+static char * replicate_dpo_error_strings[] = {
+#define _(sym,string) string,
+  foreach_replicate_dpo_error
+#undef _
+};
 
 /**
  * Pool of all DPOs. It's not static so the DP can have fast access
@@ -678,8 +693,17 @@ replicate_inline (vlib_main_t * vm,
                     vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
                 }
 
-                /* Make a copy */
+                /* Make a copy. This can fail, so deal with it. */
                 c0 = vlib_buffer_copy(vm, b0);
+                if (PREDICT_FALSE (c0 == 0))
+                  {
+                    vlib_node_increment_counter 
+                      (vm, node->node_index, 
+                       REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE,
+                       1);
+                    continue;
+                  }
+                
                 ci0 = vlib_get_buffer_index(vm, c0);
 
                 to_next[0] = ci0;
@@ -738,6 +762,9 @@ VLIB_REGISTER_NODE (ip4_replicate_node) = {
   .name = "ip4-replicate",
   .vector_size = sizeof (u32),
 
+  .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
+  .error_strings = replicate_dpo_error_strings,
+
   .format_trace = format_replicate_trace,
   .n_next_nodes = 1,
   .next_nodes = {
@@ -761,6 +788,9 @@ VLIB_REGISTER_NODE (ip6_replicate_node) = {
   .name = "ip6-replicate",
   .vector_size = sizeof (u32),
 
+  .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
+  .error_strings = replicate_dpo_error_strings,
+
   .format_trace = format_replicate_trace,
   .n_next_nodes = 1,
   .next_nodes = {
diff --git a/src/vnet/lawful-intercept/node.c b/src/vnet/lawful-intercept/node.c
index ea0cd8ef..50c76ec5 100644
--- a/src/vnet/lawful-intercept/node.c
+++ b/src/vnet/lawful-intercept/node.c
@@ -42,9 +42,10 @@ static u8 * format_li_hit_trace (u8 * s, va_list * args)
 
 vlib_node_registration_t li_hit_node;
 
-#define foreach_li_hit_error                    \
-_(HITS, "LI packets processed")                 \
-_(NO_COLLECTOR, "No collector configured")
+#define foreach_li_hit_error                                    \
+_(HITS, "LI packets processed")                                 \
+_(NO_COLLECTOR, "No collector configured")                      \
+_(BUFFER_ALLOCATION_FAILURE, "Buffer allocation failure")
 
 typedef enum {
 #define _(sym,str) LI_HIT_ERROR_##sym,
@@ -197,8 +198,16 @@ li_hit_node_fn (vlib_main_t * vm,
 	  b0 = vlib_get_buffer (vm, bi0);
           if (PREDICT_TRUE(to_int_next != 0))
             {
-              /* Make an intercept copy */
+              /* Make an intercept copy. This can fail. */
               c0 = vlib_buffer_copy (vm, b0);
+
+              if (PREDICT_FALSE (c0 == 0))
+                {
+                  vlib_node_increment_counter 
+                    (vm, node->node_index, 
+                     LI_HIT_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
+                  goto skip;
+                }
               
               vlib_buffer_advance(c0, -sizeof(*iu0));
 
@@ -225,6 +234,7 @@ li_hit_node_fn (vlib_main_t * vm,
               to_int_next++;
             }
 
+        skip:
           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
             {
diff --git a/src/vnet/span/node.c b/src/vnet/span/node.c
index 50d642c2..5037c120 100644
--- a/src/vnet/span/node.c
+++ b/src/vnet/span/node.c
@@ -83,11 +83,15 @@ span_mirror (vlib_main_t * vm, span_interface_t * si0, vlib_buffer_t * b0,
 	mirror_frames[i] = vnet_get_frame_to_sw_interface (vnm, i);
       to_mirror_next = vlib_frame_vector_args (mirror_frames[i]);
       to_mirror_next += mirror_frames[i]->n_vectors;
+      /* This can fail */
       c0 = vlib_buffer_copy (vm, b0);
-      vnet_buffer (c0)->sw_if_index[VLIB_TX] = i;
-      c0->flags |= VNET_BUFFER_SPAN_CLONE;
-      to_mirror_next[0] = vlib_get_buffer_index (vm, c0);
-      mirror_frames[i]->n_vectors++;
+      if (PREDICT_TRUE(c0 != 0))
+        {
+          vnet_buffer (c0)->sw_if_index[VLIB_TX] = i;
+          c0->flags |= VNET_BUFFER_SPAN_CLONE;
+          to_mirror_next[0] = vlib_get_buffer_index (vm, c0);
+          mirror_frames[i]->n_vectors++;
+        }
     }));
   /* *INDENT-ON* */
 }
-- 
cgit 1.2.3-korg


From c47ed032c6d036a9f942fc9ced48874fad55b48c Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Wed, 25 Jan 2017 14:18:03 +0100
Subject: vlib: add buffer cloning support

Change-Id: I50070611af15b2b4cc29664a8bee4f821ac3c835
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/scripts/vnet/mcast/ip4     |  19 +--
 src/vlib/buffer.c              | 254 ++++++++++-------------------------------
 src/vlib/buffer.h              |   4 +-
 src/vlib/buffer_funcs.h        | 113 +++++++++++++++++-
 src/vnet/devices/dpdk/buffer.c |  41 +++++--
 src/vnet/devices/dpdk/device.c |  11 +-
 src/vnet/dpo/replicate_dpo.c   |  76 ++++++------
 src/vnet/dpo/replicate_dpo.h   |   3 +
 8 files changed, 256 insertions(+), 265 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/scripts/vnet/mcast/ip4 b/src/scripts/vnet/mcast/ip4
index 69f1ee00..eb6bab27 100644
--- a/src/scripts/vnet/mcast/ip4
+++ b/src/scripts/vnet/mcast/ip4
@@ -2,7 +2,7 @@ packet-generator new {
   name x
   limit 1
   node ip4-input
-  size 64-64
+  size 512-512
   no-recycle
   data {
     ICMP: 1.0.0.2 -> 232.1.1.1
@@ -11,12 +11,15 @@ packet-generator new {
   }
 }
 
-trace add pg-input 100
-loop create
-loop create
-set int state loop0 up
-set int state loop1 up
+create packet-generator interface pg1
+create packet-generator interface pg2
+create packet-generator interface pg3
+
+set int state pg1 up
+set int state pg2 up
+set int state pg3 up
 
 ip mroute add 232.1.1.1 via pg0 Accept
-ip mroute add 232.1.1.1 via loop0 Forward
-ip mroute add 232.1.1.1 via loop1 Forward
+ip mroute add 232.1.1.1 via pg1 Forward
+ip mroute add 232.1.1.1 via pg2 Forward
+ip mroute add 232.1.1.1 via pg3 Forward
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index 95b4344f..4f5eb09d 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -68,8 +68,9 @@ format_vlib_buffer (u8 * s, va_list * args)
   vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
   uword indent = format_get_indent (s);
 
-  s = format (s, "current data %d, length %d, free-list %d",
-	      b->current_data, b->current_length, b->free_list_index);
+  s = format (s, "current data %d, length %d, free-list %d, clone-count %u",
+	      b->current_data, b->current_length, b->free_list_index,
+	      b->n_add_refs);
 
   if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)
     s = format (s, ", totlen-nifb %d",
@@ -84,8 +85,10 @@ format_vlib_buffer (u8 * s, va_list * args)
       u32 next_buffer = b->next_buffer;
       b = vlib_get_buffer (vm, next_buffer);
 
-      s = format (s, "\n%Unext-buffer 0x%x, segment length %d",
-		  format_white_space, indent, next_buffer, b->current_length);
+      s =
+	format (s, "\n%Unext-buffer 0x%x, segment length %d, clone-count %u",
+		format_white_space, indent, next_buffer, b->current_length,
+		b->n_add_refs);
     }
 
   return s;
@@ -262,7 +265,7 @@ vlib_main_t **vlib_mains;
 
 /* When dubugging validate that given buffers are either known allocated
    or known free. */
-static void __attribute__ ((unused))
+static void
 vlib_buffer_validate_alloc_free (vlib_main_t * vm,
 				 u32 * buffers,
 				 uword n_buffers,
@@ -362,6 +365,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm,
 
   /* Setup free buffer template. */
   f->buffer_init_template.free_list_index = f->index;
+  f->buffer_init_template.n_add_refs = 0;
 
   if (is_public)
     {
@@ -620,19 +624,11 @@ vlib_buffer_free_inline (vlib_main_t * vm,
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
   vlib_buffer_free_list_t *fl;
-  static u32 *next_to_free[2];	/* smp bad */
-  u32 i_next_to_free, *b, *n, *f, fi;
-  uword n_left;
+  u32 fi;
   int i;
-  static vlib_buffer_free_list_t **announce_list;
-  vlib_buffer_free_list_t *fl0 = 0, *fl1 = 0;
-  u32 bi0 = (u32) ~ 0, bi1 = (u32) ~ 0, fi0, fi1 = (u32) ~ 0;
-  u8 free0, free1 = 0, free_next0, free_next1;
   u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
 	     u32 follow_buffer_next);
 
-  ASSERT (os_get_cpu_number () == 0);
-
   cb = bm->buffer_free_callback;
 
   if (PREDICT_FALSE (cb != 0))
@@ -641,203 +637,68 @@ vlib_buffer_free_inline (vlib_main_t * vm,
   if (!n_buffers)
     return;
 
-  /* Use first buffer to get default free list. */
-  {
-    u32 bi0 = buffers[0];
-    vlib_buffer_t *b0;
-
-    b0 = vlib_get_buffer (vm, bi0);
-    fl = vlib_buffer_get_buffer_free_list (vm, b0, &fi);
-    if (fl->buffers_added_to_freelist_function)
-      vec_add1 (announce_list, fl);
-  }
-
-  vec_validate (next_to_free[0], n_buffers - 1);
-  vec_validate (next_to_free[1], n_buffers - 1);
-
-  i_next_to_free = 0;
-  n_left = n_buffers;
-  b = buffers;
-
-again:
-  /* Verify that buffers are known allocated. */
-  vlib_buffer_validate_alloc_free (vm, b,
-				   n_left, VLIB_BUFFER_KNOWN_ALLOCATED);
-
-  vec_add2_aligned (fl->buffers, f, n_left, CLIB_CACHE_LINE_BYTES);
-
-  n = next_to_free[i_next_to_free];
-  while (n_left >= 4)
-    {
-      vlib_buffer_t *b0, *b1, *binit0, *binit1, dummy_buffers[2];
-
-      bi0 = b[0];
-      bi1 = b[1];
-
-      f[0] = bi0;
-      f[1] = bi1;
-      f += 2;
-      b += 2;
-      n_left -= 2;
-
-      /* Prefetch buffers for next iteration. */
-      vlib_prefetch_buffer_with_index (vm, b[0], WRITE);
-      vlib_prefetch_buffer_with_index (vm, b[1], WRITE);
-
-      b0 = vlib_get_buffer (vm, bi0);
-      b1 = vlib_get_buffer (vm, bi1);
-
-      free0 = (b0->flags & VLIB_BUFFER_RECYCLE) == 0;
-      free1 = (b1->flags & VLIB_BUFFER_RECYCLE) == 0;
-
-      /* Must be before init which will over-write buffer flags. */
-      if (follow_buffer_next)
-	{
-	  n[0] = b0->next_buffer;
-	  free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
-	  n += free_next0;
-
-	  n[0] = b1->next_buffer;
-	  free_next1 = free1 && (b1->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
-	  n += free_next1;
-	}
-      else
-	free_next0 = free_next1 = 0;
-
-      /* Must be before init which will over-write buffer free list. */
-      fi0 = b0->free_list_index;
-      fi1 = b1->free_list_index;
-
-      if (PREDICT_FALSE (fi0 != fi || fi1 != fi))
-	goto slow_path_x2;
-
-      binit0 = free0 ? b0 : &dummy_buffers[0];
-      binit1 = free1 ? b1 : &dummy_buffers[1];
-
-      vlib_buffer_init_two_for_free_list (binit0, binit1, fl);
-      continue;
-
-    slow_path_x2:
-      /* Backup speculation. */
-      f -= 2;
-      n -= free_next0 + free_next1;
-
-      _vec_len (fl->buffers) = f - fl->buffers;
-
-      fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0);
-      fl1 = pool_elt_at_index (bm->buffer_free_list_pool, fi1);
-
-      vlib_buffer_add_to_free_list (vm, fl0, bi0, free0);
-      if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0))
-	{
-	  int i;
-	  for (i = 0; i < vec_len (announce_list); i++)
-	    if (fl0 == announce_list[i])
-	      goto no_fl0;
-	  vec_add1 (announce_list, fl0);
-	}
-    no_fl0:
-      if (PREDICT_FALSE (fl1->buffers_added_to_freelist_function != 0))
-	{
-	  int i;
-	  for (i = 0; i < vec_len (announce_list); i++)
-	    if (fl1 == announce_list[i])
-	      goto no_fl1;
-	  vec_add1 (announce_list, fl1);
-	}
-
-    no_fl1:
-      vlib_buffer_add_to_free_list (vm, fl1, bi1, free1);
-
-      /* Possibly change current free list. */
-      if (fi0 != fi && fi1 != fi)
-	{
-	  fi = fi1;
-	  fl = pool_elt_at_index (bm->buffer_free_list_pool, fi);
-	}
-
-      vec_add2_aligned (fl->buffers, f, n_left, CLIB_CACHE_LINE_BYTES);
-    }
-
-  while (n_left >= 1)
+  for (i = 0; i < n_buffers; i++)
     {
-      vlib_buffer_t *b0, *binit0, dummy_buffers[1];
+      vlib_buffer_t *b;
+      u32 bi = buffers[i];
 
-      bi0 = b[0];
-      f[0] = bi0;
-      f += 1;
-      b += 1;
-      n_left -= 1;
-
-      b0 = vlib_get_buffer (vm, bi0);
+      b = vlib_get_buffer (vm, bi);
 
-      free0 = (b0->flags & VLIB_BUFFER_RECYCLE) == 0;
+      fl = vlib_buffer_get_buffer_free_list (vm, b, &fi);
 
-      /* Must be before init which will over-write buffer flags. */
-      if (follow_buffer_next)
+      /* The only current use of this callback: multicast recycle */
+      if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0))
 	{
-	  n[0] = b0->next_buffer;
-	  free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
-	  n += free_next0;
+	  int j;
+
+	  vlib_buffer_add_to_free_list
+	    (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0);
+
+	  for (j = 0; j < vec_len (bm->announce_list); j++)
+	    {
+	      if (fl == bm->announce_list[j])
+		goto already_announced;
+	    }
+	  vec_add1 (bm->announce_list, fl);
+	already_announced:
+	  ;
 	}
       else
-	free_next0 = 0;
-
-      /* Must be before init which will over-write buffer free list. */
-      fi0 = b0->free_list_index;
-
-      if (PREDICT_FALSE (fi0 != fi))
-	goto slow_path_x1;
-
-      binit0 = free0 ? b0 : &dummy_buffers[0];
-
-      vlib_buffer_init_for_free_list (binit0, fl);
-      continue;
-
-    slow_path_x1:
-      /* Backup speculation. */
-      f -= 1;
-      n -= free_next0;
-
-      _vec_len (fl->buffers) = f - fl->buffers;
-
-      fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0);
-
-      vlib_buffer_add_to_free_list (vm, fl0, bi0, free0);
-      if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0))
 	{
-	  int i;
-	  for (i = 0; i < vec_len (announce_list); i++)
-	    if (fl0 == announce_list[i])
-	      goto no_fl00;
-	  vec_add1 (announce_list, fl0);
+	  if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0))
+	    {
+	      u32 flags, next;
+
+	      do
+		{
+		  vlib_buffer_t *nb = vlib_get_buffer (vm, bi);
+		  flags = nb->flags;
+		  next = nb->next_buffer;
+		  if (nb->n_add_refs)
+		    nb->n_add_refs--;
+		  else
+		    {
+		      vlib_buffer_validate_alloc_free (vm, &bi, 1,
+						       VLIB_BUFFER_KNOWN_ALLOCATED);
+		      vlib_buffer_add_to_free_list (vm, fl, bi, 1);
+		    }
+		  bi = next;
+		}
+	      while (follow_buffer_next
+		     && (flags & VLIB_BUFFER_NEXT_PRESENT));
+
+	    }
 	}
-
-    no_fl00:
-      fi = fi0;
-      fl = pool_elt_at_index (bm->buffer_free_list_pool, fi);
-
-      vec_add2_aligned (fl->buffers, f, n_left, CLIB_CACHE_LINE_BYTES);
     }
-
-  if (follow_buffer_next && ((n_left = n - next_to_free[i_next_to_free]) > 0))
-    {
-      b = next_to_free[i_next_to_free];
-      i_next_to_free ^= 1;
-      goto again;
-    }
-
-  _vec_len (fl->buffers) = f - fl->buffers;
-
-  if (vec_len (announce_list))
+  if (vec_len (bm->announce_list))
     {
       vlib_buffer_free_list_t *fl;
-      for (i = 0; i < vec_len (announce_list); i++)
+      for (i = 0; i < vec_len (bm->announce_list); i++)
 	{
-	  fl = announce_list[i];
+	  fl = bm->announce_list[i];
 	  fl->buffers_added_to_freelist_function (vm, fl);
 	}
-      _vec_len (announce_list) = 0;
+      _vec_len (bm->announce_list) = 0;
     }
 }
 
@@ -922,6 +783,7 @@ vlib_packet_template_init (vlib_main_t * vm,
   fl->buffer_init_template.current_data = 0;
   fl->buffer_init_template.current_length = n_packet_data_bytes;
   fl->buffer_init_template.flags = 0;
+  fl->buffer_init_template.n_add_refs = 0;
   vlib_worker_thread_barrier_release (vm);
 }
 
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
index 8ea79502..b4015b30 100644
--- a/src/vlib/buffer.h
+++ b/src/vlib/buffer.h
@@ -119,7 +119,9 @@ typedef struct
                            feature node
                         */
 
-  u8 dont_waste_me[3]; /**< Available space in the (precious)
+  u8 n_add_refs; /**< Number of additional references to this buffer. */
+
+  u8 dont_waste_me[2]; /**< Available space in the (precious)
                           first 32 octets of buffer metadata
                           Before allocating any of it, discussion required!
                        */
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 0b583a61..e0fde5f2 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -530,6 +530,110 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b)
   return fd;
 }
 
+/** \brief Create multiple clones of buffer and store them in the supplied array
+
+    @param vm - (vlib_main_t *) vlib main data structure pointer
+    @param src_buffer - (u32) source buffer index
+    @param buffers - (u32 * ) buffer index array
+    @param n_buffers - (u8) number of buffer clones requested
+    @param head_end_offset - (u16) offset relative to current position
+           where packet head ends
+    @return - (u8) number of buffers actually cloned, may be
+    less than the number requested or zero
+*/
+
+always_inline u8
+vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers,
+		   u8 n_buffers, u16 head_end_offset)
+{
+  u8 i;
+  vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer);
+
+  ASSERT (s->n_add_refs == 0);
+  ASSERT (n_buffers);
+
+  if (s->current_length <= head_end_offset + CLIB_CACHE_LINE_BYTES * 2)
+    {
+      buffers[0] = src_buffer;
+      for (i = 1; i < n_buffers; i++)
+	{
+	  vlib_buffer_t *d;
+	  d = vlib_buffer_copy (vm, s);
+	  if (d == 0)
+	    return i;
+	  buffers[i] = vlib_get_buffer_index (vm, d);
+
+	}
+      return n_buffers;
+    }
+
+  n_buffers = vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers,
+						s->free_list_index);
+  if (PREDICT_FALSE (n_buffers == 0))
+    {
+      buffers[0] = src_buffer;
+      return 1;
+    }
+
+  for (i = 0; i < n_buffers; i++)
+    {
+      vlib_buffer_t *d = vlib_get_buffer (vm, buffers[i]);
+      d->current_data = s->current_data;
+      d->current_length = head_end_offset;
+      d->free_list_index = s->free_list_index;
+      d->total_length_not_including_first_buffer =
+	s->total_length_not_including_first_buffer + s->current_length -
+	head_end_offset;
+      d->flags = s->flags | VLIB_BUFFER_NEXT_PRESENT;
+      d->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
+      clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque));
+      clib_memcpy (vlib_buffer_get_current (d), vlib_buffer_get_current (s),
+		   head_end_offset);
+      d->next_buffer = src_buffer;
+    }
+  vlib_buffer_advance (s, head_end_offset);
+  s->n_add_refs = n_buffers - 1;
+  while (s->flags & VLIB_BUFFER_NEXT_PRESENT)
+    {
+      s = vlib_get_buffer (vm, s->next_buffer);
+      s->n_add_refs = n_buffers - 1;
+    }
+
+  return n_buffers;
+}
+
+/** \brief Attach cloned tail to the buffer
+
+    @param vm - (vlib_main_t *) vlib main data structure pointer
+    @param head - (vlib_buffer_t *) head buffer
+    @param tail - (Vlib buffer_t *) tail buffer to clone and attach to head
+*/
+
+always_inline void
+vlib_buffer_attach_clone (vlib_main_t * vm, vlib_buffer_t * head,
+			  vlib_buffer_t * tail)
+{
+  ASSERT ((head->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
+  ASSERT (head->free_list_index == tail->free_list_index);
+
+  head->flags |= VLIB_BUFFER_NEXT_PRESENT;
+  head->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+  head->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
+  head->flags |= (tail->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID);
+  head->next_buffer = vlib_get_buffer_index (vm, tail);
+  head->total_length_not_including_first_buffer = tail->current_length +
+    tail->total_length_not_including_first_buffer;
+
+next_segment:
+  __sync_add_and_fetch (&tail->n_add_refs, 1);
+
+  if (tail->flags & VLIB_BUFFER_NEXT_PRESENT)
+    {
+      tail = vlib_get_buffer (vm, tail->next_buffer);
+      goto next_segment;
+    }
+}
+
 /* Initializes the buffer as an empty packet with no chained buffers. */
 always_inline void
 vlib_buffer_chain_init (vlib_buffer_t * first)
@@ -695,7 +799,8 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * dst,
   _(flags);
   _(free_list_index);
 #undef _
-  ASSERT (dst->total_length_not_including_first_buffer == 0);
+  dst->total_length_not_including_first_buffer = 0;
+  ASSERT (dst->n_add_refs == 0);
 }
 
 always_inline void
@@ -727,8 +832,10 @@ vlib_buffer_init_two_for_free_list (vlib_buffer_t * dst0,
   _(flags);
   _(free_list_index);
 #undef _
-  ASSERT (dst0->total_length_not_including_first_buffer == 0);
-  ASSERT (dst1->total_length_not_including_first_buffer == 0);
+  dst0->total_length_not_including_first_buffer = 0;
+  dst1->total_length_not_including_first_buffer = 0;
+  ASSERT (dst0->n_add_refs == 0);
+  ASSERT (dst1->n_add_refs == 0);
 }
 
 #if CLIB_DEBUG > 0
diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c
index 007093e4..f95d4cb5 100644
--- a/src/vnet/devices/dpdk/buffer.c
+++ b/src/vnet/devices/dpdk/buffer.c
@@ -79,20 +79,46 @@
 STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM,
 	       "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM");
 
+static_always_inline void
+dpdk_rte_pktmbuf_free (vlib_main_t * vm, vlib_buffer_t * b)
+{
+  vlib_buffer_t *hb = b;
+  struct rte_mbuf *mb;
+  u32 next, flags;
+  mb = rte_mbuf_from_vlib_buffer (hb);
+
+next:
+  flags = b->flags;
+  next = b->next_buffer;
+  mb = rte_mbuf_from_vlib_buffer (b);
+
+  if (PREDICT_FALSE (b->n_add_refs))
+    {
+      rte_mbuf_refcnt_update (mb, b->n_add_refs);
+      b->n_add_refs = 0;
+    }
+
+  rte_pktmbuf_free_seg (mb);
+
+  if (flags & VLIB_BUFFER_NEXT_PRESENT)
+    {
+      b = vlib_get_buffer (vm, next);
+      goto next;
+    }
+}
+
 static void
 del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
 {
   u32 i;
-  struct rte_mbuf *mb;
   vlib_buffer_t *b;
 
   for (i = 0; i < vec_len (f->buffers); i++)
     {
       b = vlib_get_buffer (vm, f->buffers[i]);
-      mb = rte_mbuf_from_vlib_buffer (b);
-      ASSERT (rte_mbuf_refcnt_read (mb) == 1);
-      rte_pktmbuf_free (mb);
+      dpdk_rte_pktmbuf_free (vm, b);
     }
+
   vec_free (f->name);
   vec_free (f->buffers);
 }
@@ -325,7 +351,6 @@ vlib_buffer_free_inline (vlib_main_t * vm,
   for (i = 0; i < n_buffers; i++)
     {
       vlib_buffer_t *b;
-      struct rte_mbuf *mb;
 
       b = vlib_get_buffer (vm, buffers[i]);
 
@@ -351,11 +376,7 @@ vlib_buffer_free_inline (vlib_main_t * vm,
       else
 	{
 	  if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0))
-	    {
-	      mb = rte_mbuf_from_vlib_buffer (b);
-	      ASSERT (rte_mbuf_refcnt_read (mb) == 1);
-	      rte_pktmbuf_free (mb);
-	    }
+	    dpdk_rte_pktmbuf_free (vm, b);
 	}
     }
   if (vec_len (bm->announce_list))
diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c
index c9d9a567..17397900 100644
--- a/src/vnet/devices/dpdk/device.c
+++ b/src/vnet/devices/dpdk/device.c
@@ -168,13 +168,11 @@ dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b,
 	{
 	  b2 = vlib_get_buffer (vm, b2->next_buffer);
 	  mb = rte_mbuf_from_vlib_buffer (b2);
-	  last_mb->next = mb;
-	  last_mb = mb;
 	  rte_pktmbuf_reset (mb);
 	}
     }
 
-  first_mb = mb = rte_mbuf_from_vlib_buffer (b);
+  last_mb = first_mb = mb = rte_mbuf_from_vlib_buffer (b);
   first_mb->nb_segs = 1;
   mb->data_len = b->current_length;
   mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) :
@@ -185,10 +183,17 @@ dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b,
     {
       b = vlib_get_buffer (vm, b->next_buffer);
       mb = rte_mbuf_from_vlib_buffer (b);
+      last_mb->next = mb;
+      last_mb = mb;
       mb->data_len = b->current_length;
       mb->pkt_len = b->current_length;
       mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data;
       first_mb->nb_segs++;
+      if (PREDICT_FALSE (b->n_add_refs))
+	{
+	  rte_mbuf_refcnt_update (mb, b->n_add_refs);
+	  b->n_add_refs = 0;
+	}
     }
 }
 
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index a67b19c8..a9f334be 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -625,6 +625,7 @@ replicate_inline (vlib_main_t * vm,
                   vlib_frame_t * frame)
 {
     vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
+    replicate_main_t * rm = &replicate_main;
     u32 n_left_from, * from, * to_next, next_index;
     u32 cpu_index = os_get_cpu_number();
 
@@ -645,13 +646,11 @@ replicate_inline (vlib_main_t * vm,
             const replicate_t *rep0;
             vlib_buffer_t * b0, *c0;
             const dpo_id_t *dpo0;
+	    u8 num_cloned;
 
             bi0 = from[0];
-            to_next[0] = bi0;
             from += 1;
-            to_next += 1;
             n_left_from -= 1;
-            n_left_to_next -= 1;
 
             b0 = vlib_get_buffer (vm, bi0);
             repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
@@ -661,50 +660,21 @@ replicate_inline (vlib_main_t * vm,
                 cm, cpu_index, repi0, 1,
                 vlib_buffer_length_in_chain(vm, b0));
 
-            /* ship the original to the first bucket */
-            dpo0 = replicate_get_bucket_i(rep0, 0);
-            next0 = dpo0->dpoi_next_node;
-            vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+	    vec_validate (rm->clones[cpu_index], rep0->rep_n_buckets - 1);
 
-            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-            {
-                replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
-                t->rep_index = repi0;
-                t->dpo = *dpo0;
-            }
-            vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                             to_next, n_left_to_next,
-                                             bi0, next0);
+	    num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[cpu_index], rep0->rep_n_buckets, 128);
 
-            /* ship copies to the rest of the buckets */
-            for (bucket = 1; bucket < rep0->rep_n_buckets; bucket++)
-            {
-                /*
-                 * After the enqueue of the first buffer, and of all subsequent
-                 * buffers in this loop, it is possible that we over-flow the
-                 * frame of the to-next node. When this happens we need to 'put'
-                 * that full frame to the node and get a fresh empty one.
-                 * Note that these are macros with side effects that change
-                 * to_next & n_left_to_next
-                 */
-                if (PREDICT_FALSE(0 == n_left_to_next))
-                {
-                    vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-                    vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-                }
+	    if (num_cloned != rep0->rep_n_buckets)
+	      {
+		vlib_node_increment_counter
+		  (vm, node->node_index,
+		   REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
+	      }
 
-                /* Make a copy. This can fail, so deal with it. */
-                c0 = vlib_buffer_copy(vm, b0);
-                if (PREDICT_FALSE (c0 == 0))
-                  {
-                    vlib_node_increment_counter 
-                      (vm, node->node_index, 
-                       REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE,
-                       1);
-                    continue;
-                  }
-                
-                ci0 = vlib_get_buffer_index(vm, c0);
+            for (bucket = 0; bucket < num_cloned; bucket++)
+            {
+                ci0 = rm->clones[cpu_index][bucket];
+                c0 = vlib_get_buffer(vm, ci0);
 
                 to_next[0] = ci0;
                 to_next += 1;
@@ -724,7 +694,13 @@ replicate_inline (vlib_main_t * vm,
                 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
                                                  to_next, n_left_to_next,
                                                  ci0, next0);
+		if (PREDICT_FALSE (n_left_to_next == 0))
+		  {
+		    vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+		    vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+		  }
             }
+	    vec_reset_length (rm->clones[cpu_index]);
         }
 
         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -797,3 +773,15 @@ VLIB_REGISTER_NODE (ip6_replicate_node) = {
       [0] = "error-drop",
   },
 };
+
+clib_error_t *
+replicate_dpo_init (vlib_main_t * vm)
+{
+  replicate_main_t * rm = &replicate_main;
+
+  vec_validate (rm->clones, vlib_num_workers());
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (replicate_dpo_init);
diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h
index a564739c..77273015 100644
--- a/src/vnet/dpo/replicate_dpo.h
+++ b/src/vnet/dpo/replicate_dpo.h
@@ -32,6 +32,9 @@
 typedef struct replicate_main_t_
 {
     vlib_combined_counter_main_t repm_counters;
+
+    /* per-cpu vector of cloned packets */
+    u32 **clones;
 } replicate_main_t;
 
 extern replicate_main_t replicate_main;
-- 
cgit 1.2.3-korg


From 696e88da9799056036f329676213f3c0c0a1db9c Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Thu, 16 Mar 2017 07:34:55 -0400
Subject: MPLS performance improvments.

 1 - Quad loop lookup and label imposition.
 2 - optimise imposition for the 1 label case
 3 - input gets TTL from header directly (no byte swap)

Change-Id: I59204c9e5d134b0df75d7afa43e360f946d1ffe7
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet.am                   |   2 +-
 src/vnet/dpo/mpls_label_dpo.c | 146 +++++++++++++++----
 src/vnet/dpo/mpls_label_dpo.h |   4 +-
 src/vnet/mpls/error.def       |  10 +-
 src/vnet/mpls/mpls.c          |   8 ++
 src/vnet/mpls/mpls.h          |  46 ------
 src/vnet/mpls/mpls_input.c    | 324 ++++++++++++++++++++++++++++++++++++++++++
 src/vnet/mpls/mpls_lookup.c   | 116 +++++++++++++--
 src/vnet/mpls/mpls_output.c   |  10 ++
 src/vnet/mpls/node.c          | 317 -----------------------------------------
 10 files changed, 571 insertions(+), 412 deletions(-)
 create mode 100644 src/vnet/mpls/mpls_input.c
 delete mode 100644 src/vnet/mpls/node.c

(limited to 'src/vnet/dpo')

diff --git a/src/vnet.am b/src/vnet.am
index 223d5d93..9e099f33 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -531,7 +531,7 @@ libvnet_la_SOURCES +=				\
  vnet/mpls/mpls_lookup.c			\
  vnet/mpls/mpls_output.c			\
  vnet/mpls/mpls_features.c			\
- vnet/mpls/node.c				\
+ vnet/mpls/mpls_input.c				\
  vnet/mpls/interface.c			        \
  vnet/mpls/mpls_tunnel.c		        \
  vnet/mpls/pg.c			        \
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index bbdc9666..be9b2850 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -160,6 +160,33 @@ typedef struct mpls_label_imposition_trace_t_
     mpls_unicast_header_t hdr;
 } mpls_label_imposition_trace_t;
 
+always_inline mpls_unicast_header_t *
+mpls_label_paint (vlib_buffer_t * b0,
+                  mpls_label_dpo_t *mld0,
+                  u8 ttl0)
+{
+    mpls_unicast_header_t *hdr0;
+
+    vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
+
+    hdr0 = vlib_buffer_get_current(b0);
+
+    if (PREDICT_TRUE(1 == mld0->mld_n_labels))
+    {
+        /* optimise for the common case of one label */
+        *hdr0 = mld0->mld_hdr[0];
+    }
+    else
+    {
+        clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
+        hdr0 = hdr0 + (mld0->mld_n_labels - 1);
+    }
+    /* fixup the TTL for the inner most label */
+    ((char*)hdr0)[3] = ttl0;
+
+    return (hdr0);
+}
+
 always_inline uword
 mpls_label_imposition_inline (vlib_main_t * vm,
                               vlib_node_runtime_t * node,
@@ -180,45 +207,59 @@ mpls_label_imposition_inline (vlib_main_t * vm,
 
         vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
 
-        while (n_left_from >= 4 && n_left_to_next >= 2)
+        while (n_left_from >= 8 && n_left_to_next >= 4)
         {
-            mpls_unicast_header_t *hdr0, *hdr1;
-            mpls_label_dpo_t *mld0, *mld1;
-            u32 bi0, mldi0, bi1, mldi1;
-            vlib_buffer_t * b0, *b1;
-            u32 next0, next1;
-            u8 ttl0, ttl1;
+            u32 bi0, mldi0, bi1, mldi1, bi2, mldi2, bi3, mldi3;
+            mpls_unicast_header_t *hdr0, *hdr1, *hdr2, *hdr3;
+            mpls_label_dpo_t *mld0, *mld1, *mld2, *mld3;
+            vlib_buffer_t * b0, *b1, * b2, *b3;
+            u32 next0, next1, next2, next3;
+            u8 ttl0, ttl1,ttl2, ttl3 ;
 
             bi0 = to_next[0] = from[0];
             bi1 = to_next[1] = from[1];
+            bi2 = to_next[2] = from[2];
+            bi3 = to_next[3] = from[3];
 
             /* Prefetch next iteration. */
             {
-                vlib_buffer_t * p2, * p3;
+                vlib_buffer_t * p2, * p3, *p4, *p5;
 
                 p2 = vlib_get_buffer (vm, from[2]);
                 p3 = vlib_get_buffer (vm, from[3]);
+                p4 = vlib_get_buffer (vm, from[4]);
+                p5 = vlib_get_buffer (vm, from[5]);
 
                 vlib_prefetch_buffer_header (p2, STORE);
                 vlib_prefetch_buffer_header (p3, STORE);
+                vlib_prefetch_buffer_header (p4, STORE);
+                vlib_prefetch_buffer_header (p5, STORE);
 
                 CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
                 CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE);
+                CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
+                CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
             }
 
-            from += 2;
-            to_next += 2;
-            n_left_from -= 2;
-            n_left_to_next -= 2;
+            from += 4;
+            to_next += 4;
+            n_left_from -= 4;
+            n_left_to_next -= 4;
 
             b0 = vlib_get_buffer (vm, bi0);
             b1 = vlib_get_buffer (vm, bi1);
+            b2 = vlib_get_buffer (vm, bi2);
+            b3 = vlib_get_buffer (vm, bi3);
 
             /* dst lookup was done by ip4 lookup */
             mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
             mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+            mldi2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX];
+            mldi3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX];
             mld0 = mpls_label_dpo_get(mldi0);
             mld1 = mpls_label_dpo_get(mldi1);
+            mld2 = mpls_label_dpo_get(mldi2);
+            mld3 = mpls_label_dpo_get(mldi3);
 
             if (payload_is_ip4)
             {
@@ -227,23 +268,37 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                  */
                 ip4_header_t * ip0 = vlib_buffer_get_current(b0);
                 ip4_header_t * ip1 = vlib_buffer_get_current(b1);
+                ip4_header_t * ip2 = vlib_buffer_get_current(b2);
+                ip4_header_t * ip3 = vlib_buffer_get_current(b3);
                 u32 checksum0;
                 u32 checksum1;
+                u32 checksum2;
+                u32 checksum3;
 
                 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
                 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+                checksum2 = ip2->checksum + clib_host_to_net_u16 (0x0100);
+                checksum3 = ip3->checksum + clib_host_to_net_u16 (0x0100);
 
                 checksum0 += checksum0 >= 0xffff;
                 checksum1 += checksum1 >= 0xffff;
+                checksum2 += checksum2 >= 0xffff;
+                checksum3 += checksum3 >= 0xffff;
 
                 ip0->checksum = checksum0;
                 ip1->checksum = checksum1;
+                ip2->checksum = checksum2;
+                ip3->checksum = checksum3;
 
                 ip0->ttl -= 1;
                 ip1->ttl -= 1;
+                ip2->ttl -= 1;
+                ip3->ttl -= 1;
 
                 ttl1 = ip1->ttl;
                 ttl0 = ip0->ttl;
+                ttl3 = ip3->ttl;
+                ttl2 = ip2->ttl;
             }
             else if (payload_is_ip6)
             {
@@ -252,13 +307,18 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                  */
                 ip6_header_t * ip0 = vlib_buffer_get_current(b0);
                 ip6_header_t * ip1 = vlib_buffer_get_current(b1);
-
+                ip6_header_t * ip2 = vlib_buffer_get_current(b2);
+                ip6_header_t * ip3 = vlib_buffer_get_current(b3);
 
                 ip0->hop_limit -= 1;
                 ip1->hop_limit -= 1;
+                ip2->hop_limit -= 1;
+                ip3->hop_limit -= 1;
 
                 ttl0 = ip0->hop_limit;
                 ttl1 = ip1->hop_limit;
+                ttl2 = ip2->hop_limit;
+                ttl3 = ip3->hop_limit;
             }
             else
             {
@@ -294,30 +354,45 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                 {
                     ttl1 = 255;
                 }
+                if (PREDICT_TRUE(vnet_buffer(b2)->mpls.first))
+                {
+                    ASSERT(2 != vnet_buffer (b2)->mpls.ttl);
+
+                    ttl2 = vnet_buffer(b2)->mpls.ttl - 1;
+                }
+                else
+                {
+                    ttl2 = 255;
+                }
+                if (PREDICT_TRUE(vnet_buffer(b3)->mpls.first))
+                {
+                    ASSERT(1 != vnet_buffer (b3)->mpls.ttl);
+                    ttl3 = vnet_buffer(b3)->mpls.ttl - 1;
+                }
+                else
+                {
+                    ttl3 = 255;
+                }
             }
             vnet_buffer(b0)->mpls.first = 0;
             vnet_buffer(b1)->mpls.first = 0;
+            vnet_buffer(b2)->mpls.first = 0;
+            vnet_buffer(b3)->mpls.first = 0;
 
             /* Paint the MPLS header */
-            vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
-            vlib_buffer_advance(b1, -(mld1->mld_n_hdr_bytes));
-
-            hdr0 = vlib_buffer_get_current(b0);
-            hdr1 = vlib_buffer_get_current(b1);
-
-            clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
-            clib_memcpy(hdr1, mld1->mld_hdr, mld1->mld_n_hdr_bytes);
-
-            /* fixup the TTL for the inner most label */
-            hdr0 = hdr0 + (mld0->mld_n_labels - 1);
-            hdr1 = hdr1 + (mld1->mld_n_labels - 1);
-            ((char*)hdr0)[3] = ttl0;
-            ((char*)hdr1)[3] = ttl1;
+            hdr0 = mpls_label_paint(b0, mld0, ttl0);
+            hdr1 = mpls_label_paint(b1, mld1, ttl1);
+            hdr2 = mpls_label_paint(b2, mld2, ttl2);
+            hdr3 = mpls_label_paint(b3, mld3, ttl3);
 
             next0 = mld0->mld_dpo.dpoi_next_node;
             next1 = mld1->mld_dpo.dpoi_next_node;
+            next2 = mld2->mld_dpo.dpoi_next_node;
+            next3 = mld3->mld_dpo.dpoi_next_node;
             vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
             vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index;
+            vnet_buffer(b2)->ip.adj_index[VLIB_TX] = mld2->mld_dpo.dpoi_index;
+            vnet_buffer(b3)->ip.adj_index[VLIB_TX] = mld3->mld_dpo.dpoi_index;
 
             if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
             {
@@ -331,10 +406,23 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                     vlib_add_trace (vm, node, b1, sizeof (*tr));
                 tr->hdr = *hdr1;
             }
+            if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_imposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b2, sizeof (*tr));
+                tr->hdr = *hdr2;
+            }
+            if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_imposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b3, sizeof (*tr));
+                tr->hdr = *hdr3;
+            }
 
-            vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+            vlib_validate_buffer_enqueue_x4(vm, node, next_index, to_next,
                                             n_left_to_next,
-                                            bi0, bi1, next0, next1);
+                                            bi0, bi1, bi2, bi3,
+                                            next0, next1, next2, next3);
         }
 
         while (n_left_from > 0 && n_left_to_next > 0)
diff --git a/src/vnet/dpo/mpls_label_dpo.h b/src/vnet/dpo/mpls_label_dpo.h
index 89bcb093..e23f3d26 100644
--- a/src/vnet/dpo/mpls_label_dpo.h
+++ b/src/vnet/dpo/mpls_label_dpo.h
@@ -61,8 +61,8 @@ typedef struct mpls_label_dpo_t
  * Should this get any bigger then we will need to reconsider how many labels
  * can be pushed in one object.
  */
-_Static_assert((sizeof(mpls_label_dpo_t) <= CLIB_CACHE_LINE_BYTES),
-	       "MPLS label DPO is larger than one cache line.");
+STATIC_ASSERT((sizeof(mpls_label_dpo_t) <= CLIB_CACHE_LINE_BYTES),
+              "MPLS label DPO is larger than one cache line.");
 
 /**
  * @brief Create an MPLS label object
diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def
index de8b9665..34a46522 100644
--- a/src/vnet/mpls/error.def
+++ b/src/vnet/mpls/error.def
@@ -18,11 +18,11 @@
 mpls_error (NONE, "no error")
 mpls_error (UNKNOWN_PROTOCOL, "unknown protocol")
 mpls_error (UNSUPPORTED_VERSION, "unsupported version")
-mpls_error (PKTS_DECAP, "MPLS-GRE input packets decapsulated")
-mpls_error (PKTS_ENCAP, "MPLS-GRE output packets encapsulated")
-mpls_error (NO_LABEL, "MPLS-GRE no label for fib/dst")
-mpls_error (TTL_EXPIRED, "MPLS-GRE ttl expired")
-mpls_error (S_NOT_SET, "MPLS-GRE s-bit not set")
+mpls_error (PKTS_DECAP, "MPLS input packets decapsulated")
+mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated")
+mpls_error (NO_LABEL, "MPLS no label for fib/dst")
+mpls_error (TTL_EXPIRED, "MPLS ttl expired")
+mpls_error (S_NOT_SET, "MPLS s-bit not set")
 mpls_error (BAD_LABEL, "invalid FIB id in label")
 mpls_error (NOT_IP4, "non-ip4 packets dropped")
 mpls_error (DISALLOWED_FIB, "disallowed FIB id")
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
index 7ae4aa00..482577b1 100644
--- a/src/vnet/mpls/mpls.c
+++ b/src/vnet/mpls/mpls.c
@@ -161,6 +161,14 @@ u8 * format_mpls_unicast_header_net_byte_order (u8 * s, va_list * args)
                  &h_host);
 }
 
+typedef struct {
+  u32 fib_index;
+  u32 entry_index;
+  u32 dest;
+  u32 s_bit;
+  u32 label;
+} show_mpls_fib_t;
+
 int
 mpls_dest_cmp(void * a1, void * a2)
 {
diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h
index b6fdbce7..300f2cfd 100644
--- a/src/vnet/mpls/mpls.h
+++ b/src/vnet/mpls/mpls.h
@@ -86,16 +86,12 @@ extern mpls_main_t mpls_main;
 
 extern clib_error_t * mpls_feature_init(vlib_main_t * vm);
 
-format_function_t format_mpls_protocol;
-format_function_t format_mpls_encap_index;
-
 format_function_t format_mpls_eos_bit;
 format_function_t format_mpls_unicast_header_net_byte_order;
 format_function_t format_mpls_unicast_label;
 format_function_t format_mpls_header;
 
 extern vlib_node_registration_t mpls_input_node;
-extern vlib_node_registration_t mpls_policy_encap_node;
 extern vlib_node_registration_t mpls_output_node;
 extern vlib_node_registration_t mpls_midchain_node;
 
@@ -118,48 +114,6 @@ u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
 
 int mpls_fib_reset_labels (u32 fib_id);
 
-#define foreach_mpls_input_next			\
-_(DROP, "error-drop")                           \
-_(LOOKUP, "mpls-lookup")
-
-typedef enum {
-#define _(s,n) MPLS_INPUT_NEXT_##s,
-  foreach_mpls_input_next
-#undef _
-  MPLS_INPUT_N_NEXT,
-} mpls_input_next_t;
-
-#define foreach_mpls_lookup_next        	\
-_(DROP, "error-drop")                           \
-_(IP4_INPUT, "ip4-input")                       \
-_(L2_OUTPUT, "l2-output")
-
-// FIXME remove.
-typedef enum {
-#define _(s,n) MPLS_LOOKUP_NEXT_##s,
-  foreach_mpls_lookup_next
-#undef _
-  MPLS_LOOKUP_N_NEXT,
-} mpls_lookup_next_t;
-
-#define foreach_mpls_output_next        	\
-_(DROP, "error-drop")
-
-typedef enum {
-#define _(s,n) MPLS_OUTPUT_NEXT_##s,
-  foreach_mpls_output_next
-#undef _
-  MPLS_OUTPUT_N_NEXT,
-} mpls_output_next_t;
-
-typedef struct {
-  u32 fib_index;
-  u32 entry_index;
-  u32 dest;
-  u32 s_bit;
-  u32 label;
-} show_mpls_fib_t;
-
 int
 mpls_dest_cmp(void * a1, void * a2);
 
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
new file mode 100644
index 00000000..893c4511
--- /dev/null
+++ b/src/vnet/mpls/mpls_input.c
@@ -0,0 +1,324 @@
+/*
+ * node.c: MPLS input
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/feature/feature.h>
+
+typedef struct {
+  u32 next_index;
+  u32 label_net_byte_order;
+} mpls_input_trace_t;
+
+#define foreach_mpls_input_next			\
+_(DROP, "error-drop")                           \
+_(LOOKUP, "mpls-lookup")
+
+typedef enum {
+#define _(s,n) MPLS_INPUT_NEXT_##s,
+  foreach_mpls_input_next
+#undef _
+  MPLS_INPUT_N_NEXT,
+} mpls_input_next_t;
+
+static u8 *
+format_mpls_input_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *);
+  char * next_name;
+  u32 label;
+  next_name = "BUG!";
+  label = clib_net_to_host_u32(t->label_net_byte_order);
+
+#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b;
+  foreach_mpls_input_next;
+#undef _
+  
+  s = format (s, "MPLS: next %s[%d]  label %d ttl %d", 
+              next_name, t->next_index,
+	      vnet_mpls_uc_get_label(label),
+	      vnet_mpls_uc_get_ttl(label));
+
+  return s;
+}
+
+vlib_node_registration_t mpls_input_node;
+
+typedef struct {
+  u32 last_label;
+  u32 last_inner_fib_index;
+  u32 last_outer_fib_index;
+  mpls_main_t * mpls_main;
+} mpls_input_runtime_t;
+
+static inline uword
+mpls_input_inline (vlib_main_t * vm,
+                   vlib_node_runtime_t * node,
+                   vlib_frame_t * from_frame)
+{
+  u32 n_left_from, next_index, * from, * to_next;
+  mpls_input_runtime_t * rt;
+  mpls_main_t * mm;
+  u32 cpu_index = os_get_cpu_number();
+  vlib_simple_counter_main_t * cm;
+  vnet_main_t * vnm = vnet_get_main();
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+  rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+  mm = rt->mpls_main;
+  /* 
+   * Force an initial lookup every time, in case the control-plane
+   * changed the label->FIB mapping.
+   */
+  rt->last_label = ~0;
+
+  next_index = node->cached_next_index;
+
+  cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+                         VNET_INTERFACE_COUNTER_MPLS);
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index,
+                           to_next, n_left_to_next);
+
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+        {
+          u32 bi0, next0, sw_if_index0;
+          u32 bi1, next1, sw_if_index1;
+          vlib_buffer_t *b0, *b1;
+          char *h0, *h1;
+
+          /* Prefetch next iteration. */
+          {
+              vlib_buffer_t * p2, * p3;
+
+              p2 = vlib_get_buffer (vm, from[2]);
+              p3 = vlib_get_buffer (vm, from[3]);
+
+              vlib_prefetch_buffer_header (p2, STORE);
+              vlib_prefetch_buffer_header (p3, STORE);
+
+              CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
+              CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE);
+          }
+
+          bi0 = to_next[0] = from[0];
+          bi1 = to_next[1] = from[1];
+
+          from += 2;
+          to_next += 2;
+          n_left_from -= 2;
+          n_left_to_next -= 2;
+
+          b0 = vlib_get_buffer (vm, bi0);
+          b1 = vlib_get_buffer (vm, bi1);
+
+          h0 = vlib_buffer_get_current (b0);
+          h1 = vlib_buffer_get_current (b1);
+
+          sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+          sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+          /* TTL expired? */
+          if (PREDICT_FALSE(h0[3] == 0))
+          {
+              next0 = MPLS_INPUT_NEXT_DROP;
+              b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+          }
+          else
+          {
+              next0 = MPLS_INPUT_NEXT_LOOKUP;
+              vnet_feature_arc_start(mm->input_feature_arc_index,
+                                     sw_if_index0, &next0, b0);
+              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+          }
+
+          if (PREDICT_FALSE(h1[3] == 0))
+          {
+              next1 = MPLS_INPUT_NEXT_DROP;
+              b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+          }
+          else
+          {
+              next1 = MPLS_INPUT_NEXT_LOOKUP;
+              vnet_feature_arc_start(mm->input_feature_arc_index,
+                                     sw_if_index1, &next1, b1);
+              vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+          }
+
+          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+          {
+              mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+                                                       b0, sizeof (*tr));
+              tr->next_index = next0;
+              tr->label_net_byte_order = *((u32*)h0);
+          }
+          if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+          {
+              mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+                                                       b1, sizeof (*tr));
+              tr->next_index = next1;
+              tr->label_net_byte_order = *((u32*)h1);
+          }
+
+          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+                                           to_next, n_left_to_next,
+                                           bi0, bi1,
+                                           next0, next1);
+        }
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+          u32 sw_if_index0, next0, bi0;
+	  vlib_buffer_t * b0;
+	  char * h0;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+          h0 = vlib_buffer_get_current (b0);
+	  sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+	  /* TTL expired? */
+	  if (PREDICT_FALSE(h0[3] == 0))
+           {
+              next0 = MPLS_INPUT_NEXT_DROP;
+              b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+            }
+	  else
+            {
+              next0 = MPLS_INPUT_NEXT_LOOKUP;
+	      vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
+              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+            }
+
+          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
+            {
+              mpls_input_trace_t *tr = vlib_add_trace (vm, node, 
+						       b0, sizeof (*tr));
+              tr->next_index = next0;
+              tr->label_net_byte_order = *(u32*)h0;
+            }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+  vlib_node_increment_counter (vm, mpls_input_node.index,
+                               MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
+  return from_frame->n_vectors;
+}
+
+static uword
+mpls_input (vlib_main_t * vm,
+            vlib_node_runtime_t * node,
+            vlib_frame_t * from_frame)
+{
+  return mpls_input_inline (vm, node, from_frame);
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+VLIB_REGISTER_NODE (mpls_input_node) = {
+  .function = mpls_input,
+  .name = "mpls-input",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+
+  .runtime_data_bytes = sizeof(mpls_input_runtime_t),
+
+  .n_errors = MPLS_N_ERROR,
+  .error_strings = mpls_error_strings,
+
+  .n_next_nodes = MPLS_INPUT_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [MPLS_INPUT_NEXT_##s] = n,
+    foreach_mpls_input_next
+#undef _
+  },
+
+  .format_buffer = format_mpls_unicast_header_net_byte_order,
+  .format_trace = format_mpls_input_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input)
+
+static void
+mpls_setup_nodes (vlib_main_t * vm)
+{
+  mpls_input_runtime_t * rt;
+  pg_node_t * pn;
+
+  pn = pg_get_node (mpls_input_node.index);
+  pn->unformat_edit = unformat_pg_mpls_header;
+
+  rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+  rt->last_label = (u32) ~0;
+  rt->last_inner_fib_index = 0;
+  rt->last_outer_fib_index = 0;
+  rt->mpls_main = &mpls_main;
+
+  ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST,
+                                mpls_input_node.index);
+}
+
+static clib_error_t * mpls_input_init (vlib_main_t * vm)
+{
+  clib_error_t * error; 
+
+  error = vlib_call_init_function (vm, mpls_init);
+  if (error)
+    clib_error_report (error);
+
+  mpls_setup_nodes (vm);
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (mpls_input_init);
+
+static clib_error_t * mpls_input_worker_init (vlib_main_t * vm)
+{
+  mpls_input_runtime_t * rt;
+  rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+  rt->last_label = (u32) ~0;
+  rt->last_inner_fib_index = 0;
+  rt->last_outer_fib_index = 0;
+  rt->mpls_main = &mpls_main;
+  return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (mpls_input_worker_init);
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 2d34cbde..475bb204 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -80,7 +80,7 @@ mpls_lookup (vlib_main_t * vm,
       vlib_get_next_frame (vm, node, next_index,
                            to_next, n_left_to_next);
 
-      while (n_left_from >= 4 && n_left_to_next >= 2)
+      while (n_left_from >= 8 && n_left_to_next >= 4)
         {
           u32 lbi0, next0, lfib_index0, bi0, hash_c0;
           const mpls_unicast_header_t * h0;
@@ -92,46 +92,79 @@ mpls_lookup (vlib_main_t * vm,
           const load_balance_t *lb1;
           const dpo_id_t *dpo1;
           vlib_buffer_t * b1;
+          u32 lbi2, next2, lfib_index2, bi2, hash_c2;
+          const mpls_unicast_header_t * h2;
+          const load_balance_t *lb2;
+          const dpo_id_t *dpo2;
+          vlib_buffer_t * b2;
+          u32 lbi3, next3, lfib_index3, bi3, hash_c3;
+          const mpls_unicast_header_t * h3;
+          const load_balance_t *lb3;
+          const dpo_id_t *dpo3;
+          vlib_buffer_t * b3;
 
            /* Prefetch next iteration. */
           {
-            vlib_buffer_t * p2, * p3;
+              vlib_buffer_t * p2, * p3, *p4, *p5;
 
             p2 = vlib_get_buffer (vm, from[2]);
             p3 = vlib_get_buffer (vm, from[3]);
+            p4 = vlib_get_buffer (vm, from[4]);
+            p5 = vlib_get_buffer (vm, from[5]);
 
             vlib_prefetch_buffer_header (p2, STORE);
             vlib_prefetch_buffer_header (p3, STORE);
+            vlib_prefetch_buffer_header (p4, STORE);
+            vlib_prefetch_buffer_header (p5, STORE);
 
             CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
             CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE);
+            CLIB_PREFETCH (p4->data, sizeof (h0[0]), STORE);
+            CLIB_PREFETCH (p5->data, sizeof (h0[0]), STORE);
           }
 
           bi0 = to_next[0] = from[0];
           bi1 = to_next[1] = from[1];
+          bi2 = to_next[2] = from[2];
+          bi3 = to_next[3] = from[3];
 
-          from += 2;
-          n_left_from -= 2;
-          to_next += 2;
-          n_left_to_next -= 2;
+          from += 4;
+          n_left_from -= 4;
+          to_next += 4;
+          n_left_to_next -= 4;
 
           b0 = vlib_get_buffer (vm, bi0);
           b1 = vlib_get_buffer (vm, bi1);
+          b2 = vlib_get_buffer (vm, bi2);
+          b3 = vlib_get_buffer (vm, bi3);
           h0 = vlib_buffer_get_current (b0);
           h1 = vlib_buffer_get_current (b1);
+          h2 = vlib_buffer_get_current (b2);
+          h3 = vlib_buffer_get_current (b3);
 
           lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
                                 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
           lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index,
                                 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+          lfib_index2 = vec_elt(mm->fib_index_by_sw_if_index,
+                                vnet_buffer(b2)->sw_if_index[VLIB_RX]);
+          lfib_index3 = vec_elt(mm->fib_index_by_sw_if_index,
+                                vnet_buffer(b3)->sw_if_index[VLIB_RX]);
 
           lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0);
           lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1);
+          lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2);
+          lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3);
+
           lb0 = load_balance_get(lbi0);
           lb1 = load_balance_get(lbi1);
+          lb2 = load_balance_get(lbi2);
+          lb3 = load_balance_get(lbi3);
 
           hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
           hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0;
+          hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0;
+          hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0;
 
           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
           {
@@ -143,11 +176,25 @@ mpls_lookup (vlib_main_t * vm,
               hash_c1 = vnet_buffer (b1)->ip.flow_hash =
                   mpls_compute_flow_hash(h1, lb1->lb_hash_config);
           }
+          if (PREDICT_FALSE(lb2->lb_n_buckets > 1))
+          {
+              hash_c2 = vnet_buffer (b2)->ip.flow_hash =
+                  mpls_compute_flow_hash(h2, lb2->lb_hash_config);
+          }
+          if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
+          {
+              hash_c3 = vnet_buffer (b3)->ip.flow_hash =
+                  mpls_compute_flow_hash(h3, lb3->lb_hash_config);
+          }
 
           ASSERT (lb0->lb_n_buckets > 0);
           ASSERT (is_pow2 (lb0->lb_n_buckets));
           ASSERT (lb1->lb_n_buckets > 0);
           ASSERT (is_pow2 (lb1->lb_n_buckets));
+          ASSERT (lb2->lb_n_buckets > 0);
+          ASSERT (is_pow2 (lb2->lb_n_buckets));
+          ASSERT (lb3->lb_n_buckets > 0);
+          ASSERT (is_pow2 (lb3->lb_n_buckets));
 
           dpo0 = load_balance_get_bucket_i(lb0,
                                            (hash_c0 &
@@ -155,12 +202,22 @@ mpls_lookup (vlib_main_t * vm,
           dpo1 = load_balance_get_bucket_i(lb1,
                                            (hash_c1 &
                                             (lb1->lb_n_buckets_minus_1)));
+          dpo2 = load_balance_get_bucket_i(lb2,
+                                           (hash_c2 &
+                                            (lb2->lb_n_buckets_minus_1)));
+          dpo3 = load_balance_get_bucket_i(lb3,
+                                           (hash_c3 &
+                                            (lb3->lb_n_buckets_minus_1)));
 
           next0 = dpo0->dpoi_next_node;
           next1 = dpo1->dpoi_next_node;
+          next2 = dpo2->dpoi_next_node;
+          next3 = dpo3->dpoi_next_node;
 
           vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
           vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+          vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+          vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
 
           vlib_increment_combined_counter
               (cm, cpu_index, lbi0, 1,
@@ -168,6 +225,12 @@ mpls_lookup (vlib_main_t * vm,
           vlib_increment_combined_counter
               (cm, cpu_index, lbi1, 1,
                vlib_buffer_length_in_chain (vm, b1));
+          vlib_increment_combined_counter
+              (cm, cpu_index, lbi2, 1,
+               vlib_buffer_length_in_chain (vm, b2));
+          vlib_increment_combined_counter
+              (cm, cpu_index, lbi3, 1,
+               vlib_buffer_length_in_chain (vm, b3));
 
           /*
            * before we pop the label copy th values we need to maintain.
@@ -181,12 +244,20 @@ mpls_lookup (vlib_main_t * vm,
           vnet_buffer (b1)->mpls.ttl = ((char*)h1)[3];
           vnet_buffer (b1)->mpls.exp = (((char*)h1)[2] & 0xe) >> 1;
           vnet_buffer (b1)->mpls.first = 1;
+          vnet_buffer (b2)->mpls.ttl = ((char*)h2)[3];
+          vnet_buffer (b2)->mpls.exp = (((char*)h2)[2] & 0xe) >> 1;
+          vnet_buffer (b2)->mpls.first = 1;
+          vnet_buffer (b3)->mpls.ttl = ((char*)h3)[3];
+          vnet_buffer (b3)->mpls.exp = (((char*)h3)[2] & 0xe) >> 1;
+          vnet_buffer (b3)->mpls.first = 1;
 
           /*
            * pop the label that was just used in the lookup
            */
           vlib_buffer_advance(b0, sizeof(*h0));
           vlib_buffer_advance(b1, sizeof(*h1));
+          vlib_buffer_advance(b2, sizeof(*h2));
+          vlib_buffer_advance(b3, sizeof(*h3));
 
           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
           {
@@ -210,9 +281,32 @@ mpls_lookup (vlib_main_t * vm,
               tr->label_net_byte_order = h1->label_exp_s_ttl;
           }
 
-          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+          if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+          {
+              mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+                                                        b2, sizeof (*tr));
+              tr->next_index = next2;
+              tr->lb_index = lbi2;
+              tr->lfib_index = lfib_index2;
+              tr->hash = hash_c2;
+              tr->label_net_byte_order = h2->label_exp_s_ttl;
+          }
+
+          if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+          {
+              mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+                                                        b3, sizeof (*tr));
+              tr->next_index = next3;
+              tr->lb_index = lbi3;
+              tr->lfib_index = lfib_index3;
+              tr->hash = hash_c3;
+              tr->label_net_byte_order = h3->label_exp_s_ttl;
+          }
+
+          vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
                                            to_next, n_left_to_next,
-                                           bi0, bi1, next0, next1);
+                                           bi0, bi1, bi2, bi3,
+                                           next0, next1, next2, next3);
         }
 
       while (n_left_from > 0 && n_left_to_next > 0)
@@ -361,10 +455,9 @@ mpls_load_balance (vlib_main_t * vm,
 
       while (n_left_from >= 4 && n_left_to_next >= 2)
         {
-          mpls_lookup_next_t next0, next1;
           const load_balance_t *lb0, *lb1;
           vlib_buffer_t * p0, *p1;
-          u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
+          u32 pi0, lbi0, hc0, pi1, lbi1, hc1, next0, next1;
           const mpls_unicast_header_t *mpls0, *mpls1;
           const dpo_id_t *dpo0, *dpo1;
 
@@ -465,10 +558,9 @@ mpls_load_balance (vlib_main_t * vm,
 
       while (n_left_from > 0 && n_left_to_next > 0)
         {
-          mpls_lookup_next_t next0;
           const load_balance_t *lb0;
           vlib_buffer_t * p0;
-          u32 pi0, lbi0, hc0;
+          u32 pi0, lbi0, hc0, next0;
           const mpls_unicast_header_t *mpls0;
           const dpo_id_t *dpo0;
 
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index cf354006..2d8bd0c9 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -29,6 +29,16 @@ typedef struct {
   u8 packet_data[64 - 1*sizeof(u32)];
 } mpls_output_trace_t;
 
+#define foreach_mpls_output_next        	\
+_(DROP, "error-drop")
+
+typedef enum {
+#define _(s,n) MPLS_OUTPUT_NEXT_##s,
+  foreach_mpls_output_next
+#undef _
+  MPLS_OUTPUT_N_NEXT,
+} mpls_output_next_t;
+
 static u8 *
 format_mpls_output_trace (u8 * s, va_list * args)
 {
diff --git a/src/vnet/mpls/node.c b/src/vnet/mpls/node.c
deleted file mode 100644
index 5b407fae..00000000
--- a/src/vnet/mpls/node.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * node.c: MPLS input
- *
- * Copyright (c) 2012-2014 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/pg/pg.h>
-#include <vnet/mpls/mpls.h>
-#include <vnet/feature/feature.h>
-
-typedef struct {
-  u32 next_index;
-  u32 label_host_byte_order;
-} mpls_input_trace_t;
-
-static u8 *
-format_mpls_input_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *);
-  char * next_name;
-
-  next_name = "BUG!";
-
-#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b;
-  foreach_mpls_input_next;
-#undef _
-  
-  s = format (s, "MPLS: next %s[%d]  label %d ttl %d", 
-              next_name, t->next_index,
-	      vnet_mpls_uc_get_label(t->label_host_byte_order),
-	      vnet_mpls_uc_get_ttl(t->label_host_byte_order));
-
-  return s;
-}
-
-vlib_node_registration_t mpls_input_node;
-
-typedef struct {
-  u32 last_label;
-  u32 last_inner_fib_index;
-  u32 last_outer_fib_index;
-  mpls_main_t * mpls_main;
-} mpls_input_runtime_t;
-
-static inline uword
-mpls_input_inline (vlib_main_t * vm,
-                   vlib_node_runtime_t * node,
-                   vlib_frame_t * from_frame)
-{
-  u32 n_left_from, next_index, * from, * to_next;
-  mpls_input_runtime_t * rt;
-  mpls_main_t * mm;
-  u32 cpu_index = os_get_cpu_number();
-  vlib_simple_counter_main_t * cm;
-  vnet_main_t * vnm = vnet_get_main();
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-  rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
-  mm = rt->mpls_main;
-  /* 
-   * Force an initial lookup every time, in case the control-plane
-   * changed the label->FIB mapping.
-   */
-  rt->last_label = ~0;
-
-  next_index = node->cached_next_index;
-
-  cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
-                         VNET_INTERFACE_COUNTER_MPLS);
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index,
-			   to_next, n_left_to_next);
-
-      while (n_left_from >= 4 && n_left_to_next >= 2)
-        {
-          u32 label0, bi0, next0, sw_if_index0;
-          u32 label1, bi1, next1, sw_if_index1;
-          mpls_unicast_header_t *h0, *h1;
-          vlib_buffer_t *b0, *b1;
-
-          /* Prefetch next iteration. */
-          {
-            vlib_buffer_t * p2, * p3;
-
-            p2 = vlib_get_buffer (vm, from[2]);
-            p3 = vlib_get_buffer (vm, from[3]);
-
-            vlib_prefetch_buffer_header (p2, STORE);
-            vlib_prefetch_buffer_header (p3, STORE);
-
-            CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
-            CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE);
-          }
-
-
-          bi0 = to_next[0] = from[0];
-          bi1 = to_next[1] = from[1];
-
-          from += 2;
-          to_next += 2;
-          n_left_from -= 2;
-          n_left_to_next -= 2;
-
-          b0 = vlib_get_buffer (vm, bi0);
-          b1 = vlib_get_buffer (vm, bi1);
-
-          h0 = vlib_buffer_get_current (b0);
-          h1 = vlib_buffer_get_current (b1);
-
-          sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-          sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
-
-          label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl);
-          label1 = clib_net_to_host_u32 (h1->label_exp_s_ttl);
-
-          /* TTL expired? */
-          if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0))
-           {
-              next0 = MPLS_INPUT_NEXT_DROP;
-              b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
-            }
-          else
-            {
-              next0 = MPLS_INPUT_NEXT_LOOKUP;
-              vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-            }
-
-          if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label1) == 0))
-           {
-              next1 = MPLS_INPUT_NEXT_DROP;
-              b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
-            }
-          else
-            {
-              next1 = MPLS_INPUT_NEXT_LOOKUP;
-              vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index1, &next1, b1);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
-            }
-
-          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-            {
-              mpls_input_trace_t *tr = vlib_add_trace (vm, node,
-                                                       b0, sizeof (*tr));
-              tr->next_index = next0;
-              tr->label_host_byte_order = label0;
-            }
-          if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
-            {
-              mpls_input_trace_t *tr = vlib_add_trace (vm, node,
-                                                       b1, sizeof (*tr));
-              tr->next_index = next1;
-              tr->label_host_byte_order = label1;
-            }
-
-          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-                                           to_next, n_left_to_next,
-                                           bi0, bi1, next0, next1);
-        }
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t * b0;
-	  mpls_unicast_header_t * h0;
-          u32 label0;
-	  u32 next0 = 0;
-          u32 sw_if_index0;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-          h0 = vlib_buffer_get_current (b0);
-	  sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
-	  label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl);
-	  /* TTL expired? */
-	  if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0))
-           {
-              next0 = MPLS_INPUT_NEXT_DROP;
-              b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
-            }
-	  else
-            {
-              next0 = MPLS_INPUT_NEXT_LOOKUP;
-	      vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-            }
-
-          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
-            {
-              mpls_input_trace_t *tr = vlib_add_trace (vm, node, 
-						       b0, sizeof (*tr));
-              tr->next_index = next0;
-              tr->label_host_byte_order = label0;
-            }
-
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-  vlib_node_increment_counter (vm, mpls_input_node.index,
-                               MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
-  return from_frame->n_vectors;
-}
-
-static uword
-mpls_input (vlib_main_t * vm,
-            vlib_node_runtime_t * node,
-            vlib_frame_t * from_frame)
-{
-  return mpls_input_inline (vm, node, from_frame);
-}
-
-static char * mpls_error_strings[] = {
-#define mpls_error(n,s) s,
-#include "error.def"
-#undef mpls_error
-};
-
-VLIB_REGISTER_NODE (mpls_input_node) = {
-  .function = mpls_input,
-  .name = "mpls-input",
-  /* Takes a vector of packets. */
-  .vector_size = sizeof (u32),
-
-  .runtime_data_bytes = sizeof(mpls_input_runtime_t),
-
-  .n_errors = MPLS_N_ERROR,
-  .error_strings = mpls_error_strings,
-
-  .n_next_nodes = MPLS_INPUT_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [MPLS_INPUT_NEXT_##s] = n,
-    foreach_mpls_input_next
-#undef _
-  },
-
-  .format_buffer = format_mpls_unicast_header_net_byte_order,
-  .format_trace = format_mpls_input_trace,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input)
-
-static void
-mpls_setup_nodes (vlib_main_t * vm)
-{
-  mpls_input_runtime_t * rt;
-  pg_node_t * pn;
-
-  pn = pg_get_node (mpls_input_node.index);
-  pn->unformat_edit = unformat_pg_mpls_header;
-
-  rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
-  rt->last_label = (u32) ~0;
-  rt->last_inner_fib_index = 0;
-  rt->last_outer_fib_index = 0;
-  rt->mpls_main = &mpls_main;
-
-  ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST,
-                                mpls_input_node.index);
-}
-
-static clib_error_t * mpls_input_init (vlib_main_t * vm)
-{
-  clib_error_t * error; 
-
-  error = vlib_call_init_function (vm, mpls_init);
-  if (error)
-    clib_error_report (error);
-
-  mpls_setup_nodes (vm);
-
-  return 0;
-}
-
-VLIB_INIT_FUNCTION (mpls_input_init);
-
-static clib_error_t * mpls_input_worker_init (vlib_main_t * vm)
-{
-  mpls_input_runtime_t * rt;
-  rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
-  rt->last_label = (u32) ~0;
-  rt->last_inner_fib_index = 0;
-  rt->last_outer_fib_index = 0;
-  rt->mpls_main = &mpls_main;
-  return 0;
-}
-
-VLIB_WORKER_INIT_FUNCTION (mpls_input_worker_init);
-- 
cgit 1.2.3-korg


From 04a75e3230ab71248fc29a56b9f64bdaee0c17ac Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Thu, 23 Mar 2017 06:46:01 -0700
Subject: Mtrie optimisations

1 - make the default route non-special, i.e. like any other less specific route. Consequently, all buckets have a valid valid index of either a leaf or a ply. Checks for special indeices in the data-path can thus be removed.
2 - since all leaves are now 'real' i.e. they represent a real load-balance object, to tell if a ply slot is 'empty' requeirs chekcing that the prefix length of the leaf occupying the slot is slot than the minium value for that ply.
3 - when removing a leaf find the cover first, then recurse down the ply and replace the old leaf with the cover. This saves us a ply walk.

Change-Id: Idd523019e8bb1b6ef527b1f5279a5e24bcf18332
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet/adj/adj.c             |  12 ---
 src/vnet/adj/adj_l2.c          |   3 -
 src/vnet/cop/ip4_whitelist.c   |  15 +--
 src/vnet/dpo/load_balance.c    |   7 ++
 src/vnet/dpo/lookup_dpo.c      |  16 +---
 src/vnet/fib/ip4_fib.c         |   3 +-
 src/vnet/fib/ip4_fib.h         |   6 +-
 src/vnet/ip/ip4_forward.c      |  65 +++----------
 src/vnet/ip/ip4_mtrie.c        | 204 +++++++++++++++++++++++++----------------
 src/vnet/ip/ip4_mtrie.h        | 125 +++++++++++--------------
 src/vnet/ip/ip4_source_check.c |  13 +--
 src/vnet/ip/ip6_forward.c      |   6 --
 src/vnet/mpls/mpls_output.c    |   8 --
 13 files changed, 208 insertions(+), 275 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index 9a01e89d..c1d036a0 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -20,13 +20,6 @@
 #include <vnet/adj/adj_mcast.h>
 #include <vnet/fib/fib_node_list.h>
 
-/*
- * Special Adj with index zero. we need to define this since the v4 mtrie
- * assumes an index of 0 implies the ply is empty. therefore all 'real'
- * adjs need a non-zero index.
- */
-static ip_adjacency_t *special_v4_miss_adj_with_index_zero;
-
 /* Adjacency packet/byte counters indexed by adjacency index. */
 vlib_combined_counter_main_t adjacency_counters;
 
@@ -426,11 +419,6 @@ adj_module_init (vlib_main_t * vm)
     adj_midchain_module_init();
     adj_mcast_module_init();
 
-    /*
-     * one special adj to reserve index 0
-     */
-    special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4);
-
     return (NULL);
 }
 
diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c
index fb64e505..f68e54e0 100644
--- a/src/vnet/adj/adj_l2.c
+++ b/src/vnet/adj/adj_l2.c
@@ -81,9 +81,6 @@ adj_l2_rewrite_inline (vlib_main_t * vm,
 
 	    adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
 
-	    /* We should never rewrite a pkt using the MISS adjacency */
-	    ASSERT(adj_index0);
-
 	    adj0 = adj_get (adj_index0);
 
 	    /* Guess we are only writing on simple Ethernet header. */
diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c
index d5121e72..ccb9dc03 100644
--- a/src/vnet/cop/ip4_whitelist.c
+++ b/src/vnet/cop/ip4_whitelist.c
@@ -125,10 +125,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
 
 	  mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
 
-      	  leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
-      	  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
-                                             &ip0->src_address, 0);
+          leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
       	  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
                                              &ip0->src_address, 1);
@@ -167,10 +164,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
                sizeof (c1[0]));
 	  mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
 
-      	  leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
-      	  leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
-                                             &ip1->src_address, 0);
+          leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
 
       	  leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
                                              &ip1->src_address, 1);
@@ -267,10 +261,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
 
 	  mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
 
-	  leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
-	  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, 
-                                             &ip0->src_address, 0);
+          leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
 	  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, 
                                              &ip0->src_address, 1);
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index e9fb5d9d..d5e98e4e 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -829,6 +829,13 @@ load_balance_module_init (void)
 {
     dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
 
+    /*
+     * Special LB with index zero. we need to define this since the v4 mtrie
+     * assumes an index of 0 implies the ply is empty. therefore all 'real'
+     * adjs need a non-zero index.
+     */
+    load_balance_create(0, DPO_PROTO_IP4, 0);
+
     load_balance_map_module_init();
 }
 
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index 96fedd27..3726c8fe 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -205,19 +205,16 @@ ip4_src_fib_lookup_one (u32 src_fib_index0,
                         const ip4_address_t * addr0,
                         u32 * src_adj_index0)
 {
-    ip4_fib_mtrie_leaf_t leaf0, leaf1;
+    ip4_fib_mtrie_leaf_t leaf0;
     ip4_fib_mtrie_t * mtrie0;
 
     mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
 
-    leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
+    leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
 
-    /* Handle default route. */
-    leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
     src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
 }
 
@@ -235,10 +232,8 @@ ip4_src_fib_lookup_two (u32 src_fib_index0,
     mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
     mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie;
 
-    leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
-    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
-    leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0);
+    leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
+    leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1);
 
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
     leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1);
@@ -249,9 +244,6 @@ ip4_src_fib_lookup_two (u32 src_fib_index0,
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
     leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3);
 
-    /* Handle default route. */
-    leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
-    leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
     src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
     src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
 }
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
index e8211c80..a7915620 100644
--- a/src/vnet/fib/ip4_fib.c
+++ b/src/vnet/fib/ip4_fib.c
@@ -158,8 +158,9 @@ ip4_fib_table_destroy (ip4_fib_t *fib)
 
     /*
      * remove all the specials we added when the table was created.
+     * In reverse order so the default route is last.
      */
-    for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+    for (ii = ARRAY_LEN(ip4_specials) - 1; ii >= 0; ii--)
     {
 	fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
 
diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h
index a8dc68b5..243fd77f 100644
--- a/src/vnet/fib/ip4_fib.h
+++ b/src/vnet/fib/ip4_fib.h
@@ -133,15 +133,11 @@ ip4_fib_forwarding_lookup (u32 fib_index,
 
     mtrie = &ip4_fib_get(fib_index)->mtrie;
 
-    leaf = IP4_FIB_MTRIE_LEAF_ROOT;
-    leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 0);
+    leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr);
     leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1);
     leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2);
     leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3);
 
-    /* Handle default route. */
-    leaf = (leaf == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie->default_leaf : leaf);
-    
     return (ip4_fib_mtrie_leaf_get_adj_index(leaf));
 }
 
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index bbba4b70..60e15d41 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -186,12 +186,11 @@ ip4_lookup_inline (vlib_main_t * vm,
 	      mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
 	      mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
 
-	      leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
 
-	      leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
-	      leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
-	      leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
-	      leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
+	      leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
+	      leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
+	      leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
+	      leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
 	    }
 
 	  tcp0 = (void *) (ip0 + 1);
@@ -241,25 +240,13 @@ ip4_lookup_inline (vlib_main_t * vm,
 	    }
 	  else
 	    {
-	      /* Handle default route. */
-	      leaf0 =
-		(leaf0 ==
-		 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
-	      leaf1 =
-		(leaf1 ==
-		 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
-	      leaf2 =
-		(leaf2 ==
-		 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
-	      leaf3 =
-		(leaf3 ==
-		 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
 	      lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
 	      lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
 	      lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
 	      lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
 	    }
 
+	  ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
 	  lb0 = load_balance_get (lb_index0);
 	  lb1 = load_balance_get (lb_index1);
 	  lb2 = load_balance_get (lb_index2);
@@ -384,9 +371,7 @@ ip4_lookup_inline (vlib_main_t * vm,
 	    {
 	      mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
 
-	      leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
-	      leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
+	      leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
 	    }
 
 	  tcp0 = (void *) (ip0 + 1);
@@ -408,12 +393,10 @@ ip4_lookup_inline (vlib_main_t * vm,
 	  else
 	    {
 	      /* Handle default route. */
-	      leaf0 =
-		(leaf0 ==
-		 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
 	      lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
 	    }
 
+	  ASSERT (lbi0);
 	  lb0 = load_balance_get (lbi0);
 
 	  /* Use flow hash to compute multipath adjacency. */
@@ -1623,12 +1606,8 @@ ip4_local_inline (vlib_main_t * vm,
 	  mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
 	  mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
 
-	  leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
-	  leaf0 =
-	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
-	  leaf1 =
-	    ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
+	  leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+	  leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
 
 	  /* Treat IP frag packets as "experimental" protocol for now
 	     until support of IP frag reassembly is implemented */
@@ -1722,12 +1701,6 @@ ip4_local_inline (vlib_main_t * vm,
 	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
 	  leaf1 =
 	    ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
-	  leaf0 =
-	    (leaf0 ==
-	     IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
-	  leaf1 =
-	    (leaf1 ==
-	     IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
 
 	  vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
 	    ip4_fib_mtrie_leaf_get_adj_index (leaf0);
@@ -1831,10 +1804,7 @@ ip4_local_inline (vlib_main_t * vm,
 
 	  mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
 
-	  leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
-	  leaf0 =
-	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+	  leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
 	  /* Treat IP frag packets as "experimental" protocol for now
 	     until support of IP frag reassembly is implemented */
@@ -1897,9 +1867,6 @@ ip4_local_inline (vlib_main_t * vm,
 
 	  leaf0 =
 	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
-	  leaf0 =
-	    (leaf0 ==
-	     IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
 
 	  lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
@@ -2453,9 +2420,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
 					      cpu_index, adj_index1);
 	    }
 
-	  /* We should never rewrite a pkt using the MISS adjacency */
-	  ASSERT (adj_index0 && adj_index1);
-
 	  ip0 = vlib_buffer_get_current (p0);
 	  ip1 = vlib_buffer_get_current (p1);
 
@@ -2643,9 +2607,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
 	  adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
 
-	  /* We should never rewrite a pkt using the MISS adjacency */
-	  ASSERT (adj_index0);
-
 	  adj0 = ip_get_adjacency (lm, adj_index0);
 
 	  ip0 = vlib_buffer_get_current (p0);
@@ -2967,15 +2928,11 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
 
   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
 
-  leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
+  leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
 
-  /* Handle default route. */
-  leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
-
   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
 
   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 6e3d0e80..317d8f10 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -40,14 +40,64 @@
 #include <vnet/ip/ip.h>
 #include <vnet/fib/fib_entry.h>
 
+always_inline u32
+ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte)
+{
+  /*
+   * It's 'non-empty' if the length of the leaf stored is greater than the
+   * length of a leaf in the covering ply. i.e. the leaf is more specific
+   * than it's would be cover in the covering ply
+   */
+  if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
+    return (1);
+  return (0);
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
+{
+  ip4_fib_mtrie_leaf_t l;
+  l = 1 + 2 * adj_index;
+  ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
+  return l;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
+{
+  return (n & 1) == 0;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
+{
+  ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
+  return n >> 1;
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
+{
+  ip4_fib_mtrie_leaf_t l;
+  l = 0 + 2 * i;
+  ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
+  return l;
+}
+
 static void
-ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init,
-	  uword prefix_len)
+ply_init (ip4_fib_mtrie_ply_t * p,
+	  ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len)
 {
-  p->n_non_empty_leafs =
-    ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves);
+  /*
+   * A leaf is 'empty' if it represents a leaf from the covering PLY
+   * i.e. if the prefix length of the leaf is less than or equal to
+   * the prefix length of the PLY
+   */
+  p->n_non_empty_leafs = (prefix_len > ply_base_len ?
+			  ARRAY_LEN (p->leaves) : 0);
   memset (p->dst_address_bits_of_leaves, prefix_len,
 	  sizeof (p->dst_address_bits_of_leaves));
+  p->dst_address_bits_base = ply_base_len;
 
   /* Initialize leaves. */
 #ifdef CLIB_HAVE_VEC128
@@ -92,15 +142,16 @@ ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init,
 }
 
 static ip4_fib_mtrie_leaf_t
-ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf,
-	    uword prefix_len)
+ply_create (ip4_fib_mtrie_t * m,
+	    ip4_fib_mtrie_leaf_t init_leaf,
+	    u32 leaf_prefix_len, u32 ply_base_len)
 {
   ip4_fib_mtrie_ply_t *p;
 
   /* Get cache aligned ply. */
   pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
 
-  ply_init (p, init_leaf, prefix_len);
+  ply_init (p, init_leaf, leaf_prefix_len, ply_base_len);
   return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
 }
 
@@ -128,7 +179,7 @@ ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
     }
 
   if (is_root)
-    ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0);
+    ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0);
   else
     pool_put (m->ply_pool, p);
 }
@@ -140,38 +191,13 @@ ip4_fib_free (ip4_fib_mtrie_t * m)
   ply_free (m, root_ply);
 }
 
-u32
-ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst)
-{
-  ip4_fib_mtrie_ply_t *p = pool_elt_at_index (m->ply_pool, 0);
-  ip4_fib_mtrie_leaf_t l;
-
-  l = p->leaves[dst.as_u8[0]];
-  if (ip4_fib_mtrie_leaf_is_terminal (l))
-    return ip4_fib_mtrie_leaf_get_adj_index (l);
-
-  p = get_next_ply_for_leaf (m, l);
-  l = p->leaves[dst.as_u8[1]];
-  if (ip4_fib_mtrie_leaf_is_terminal (l))
-    return ip4_fib_mtrie_leaf_get_adj_index (l);
-
-  p = get_next_ply_for_leaf (m, l);
-  l = p->leaves[dst.as_u8[2]];
-  if (ip4_fib_mtrie_leaf_is_terminal (l))
-    return ip4_fib_mtrie_leaf_get_adj_index (l);
-
-  p = get_next_ply_for_leaf (m, l);
-  l = p->leaves[dst.as_u8[3]];
-
-  ASSERT (ip4_fib_mtrie_leaf_is_terminal (l));
-  return ip4_fib_mtrie_leaf_get_adj_index (l);
-}
-
 typedef struct
 {
   ip4_address_t dst_address;
   u32 dst_address_length;
   u32 adj_index;
+  u32 cover_address_length;
+  u32 cover_adj_index;
 } ip4_fib_mtrie_set_unset_leaf_args_t;
 
 static void
@@ -184,7 +210,6 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
   uword i;
 
   ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
-  ASSERT (!ip4_fib_mtrie_leaf_is_empty (new_leaf));
 
   for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
     {
@@ -205,7 +230,7 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
 	  __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
 	  ASSERT (ply->leaves[i] == new_leaf);
 	  ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
-	  ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf);
+	  ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
 	}
     }
 }
@@ -219,7 +244,7 @@ set_leaf (ip4_fib_mtrie_t * m,
   i32 n_dst_bits_next_plies;
   u8 dst_byte;
 
-  ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+  ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
 
   n_dst_bits_next_plies =
@@ -232,7 +257,7 @@ set_leaf (ip4_fib_mtrie_t * m,
     {
       uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
 
-      n_dst_bits_this_ply = -n_dst_bits_next_plies;
+      n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
       ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
 	       pow2_mask (n_dst_bits_this_ply)) == 0);
 
@@ -252,13 +277,16 @@ set_leaf (ip4_fib_mtrie_t * m,
 
 	      if (old_leaf_is_terminal)
 		{
+		  old_ply->n_non_empty_leafs -=
+		    ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
 		  old_ply->dst_address_bits_of_leaves[i] =
 		    a->dst_address_length;
 		  __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
 					       new_leaf);
 		  ASSERT (old_ply->leaves[i] == new_leaf);
+
 		  old_ply->n_non_empty_leafs +=
-		    ip4_fib_mtrie_leaf_is_empty (old_leaf);
+		    ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
 		  ASSERT (old_ply->n_non_empty_leafs <=
 			  ARRAY_LEN (old_ply->leaves));
 		}
@@ -283,14 +311,20 @@ set_leaf (ip4_fib_mtrie_t * m,
   else
     {
       ip4_fib_mtrie_ply_t *old_ply, *new_ply;
+      u8 ply_base_len;
 
+      ply_base_len = 8 * (dst_address_byte_index + 1);
       old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
       old_leaf = old_ply->leaves[dst_byte];
       if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
 	{
-	  new_leaf =
-	    ply_create (m, old_leaf,
-			old_ply->dst_address_bits_of_leaves[dst_byte]);
+	  old_ply->n_non_empty_leafs -=
+	    ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
+
+	  new_leaf = ply_create (m, old_leaf,
+				 clib_max (old_ply->dst_address_bits_of_leaves
+					   [dst_byte], ply_base_len),
+				 ply_base_len);
 	  new_ply = get_next_ply_for_leaf (m, new_leaf);
 
 	  /* Refetch since ply_create may move pool. */
@@ -299,14 +333,11 @@ set_leaf (ip4_fib_mtrie_t * m,
 	  __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
 				       new_leaf);
 	  ASSERT (old_ply->leaves[dst_byte] == new_leaf);
-	  old_ply->dst_address_bits_of_leaves[dst_byte] = 0;
-
-	  old_ply->n_non_empty_leafs -=
-	    ip4_fib_mtrie_leaf_is_non_empty (old_leaf);
-	  ASSERT (old_ply->n_non_empty_leafs >= 0);
+	  old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
 
 	  /* Account for the ply we just created. */
 	  old_ply->n_non_empty_leafs += 1;
+	  ASSERT (old_ply->n_non_empty_leafs >= 0);
 	}
       else
 	new_ply = get_next_ply_for_leaf (m, old_leaf);
@@ -325,7 +356,7 @@ unset_leaf (ip4_fib_mtrie_t * m,
   i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
   u8 dst_byte;
 
-  ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+  ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
 
   n_dst_bits_next_plies =
@@ -351,12 +382,17 @@ unset_leaf (ip4_fib_mtrie_t * m,
 	      && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
 			     dst_address_byte_index + 1)))
 	{
-	  old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY;
-	  old_ply->dst_address_bits_of_leaves[i] = 0;
+	  old_ply->n_non_empty_leafs -=
+	    ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
+	  old_ply->leaves[i] =
+	    ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
+	  old_ply->dst_address_bits_of_leaves[i] =
+	    clib_max (old_ply->dst_address_bits_base,
+		      a->cover_address_length);
 
-	  /* No matter what we just deleted a non-empty leaf. */
-	  ASSERT (!ip4_fib_mtrie_leaf_is_empty (old_leaf));
-	  old_ply->n_non_empty_leafs -= 1;
+	  old_ply->n_non_empty_leafs +=
+	    ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
 
 	  ASSERT (old_ply->n_non_empty_leafs >= 0);
 	  if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
@@ -365,6 +401,17 @@ unset_leaf (ip4_fib_mtrie_t * m,
 	      /* Old ply was deleted. */
 	      return 1;
 	    }
+#if CLIB_DEBUG > 0
+	  else if (dst_address_byte_index)
+	    {
+	      int ii, count = 0;
+	      for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
+		{
+		  count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
+		}
+	      ASSERT (count);
+	    }
+#endif
 	}
     }
 
@@ -377,9 +424,7 @@ ip4_mtrie_init (ip4_fib_mtrie_t * m)
 {
   ip4_fib_mtrie_leaf_t root;
   memset (m, 0, sizeof (m[0]));
-  m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
-  root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY,	/* dst_address_bits_of_leaves */
-		     0);
+  root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0);
   ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
 }
 
@@ -406,25 +451,21 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
 
   if (!is_del)
     {
-      if (dst_address_length == 0)
-	m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index);
-      else
-	set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
+      set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
     }
   else
     {
-      if (dst_address_length == 0)
-	m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
+      ip4_main_t *im = &ip4_main;
 
-      else
+      if (dst_address_length)
 	{
-	  ip4_main_t *im = &ip4_main;
-	  uword i;
+	  word i;
 
-	  unset_leaf (m, &a, root_ply, 0);
-
-	  /* Find next less specific route and insert into mtrie. */
-	  for (i = dst_address_length - 1; i >= 1; i--)
+	  /* If the ply was not deleted, then we need to fill the
+	   * bucket just reset will the leaf from the less specfic
+	   * cover.
+	   * Find next less specific route and insert into mtrie. */
+	  for (i = dst_address_length - 1; i >= 0; i--)
 	    {
 	      uword *p;
 	      index_t lbi;
@@ -441,16 +482,21 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
 		  if (INDEX_INVALID == lbi)
 		    continue;
 
-		  a.dst_address = key;
-		  a.adj_index = lbi;
-		  a.dst_address_length = i;
+		  a.cover_adj_index = lbi;
+		  a.cover_address_length = i;
 
-		  set_leaf (m, &a, /* ply_index */ 0,
-			    /* dst_address_byte_index */ 0);
 		  break;
 		}
 	    }
 	}
+      else
+	{
+	  a.cover_adj_index = 0;
+	  a.cover_address_length = 0;
+	}
+
+      /* the top level ply is never removed, so we can ignore the return code */
+      unset_leaf (m, &a, root_ply, 0);
     }
 }
 
@@ -483,10 +529,8 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
 {
   ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
 
-  if (ip4_fib_mtrie_leaf_is_empty (l))
-    s = format (s, "miss");
-  else if (ip4_fib_mtrie_leaf_is_terminal (l))
-    s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l));
+  if (ip4_fib_mtrie_leaf_is_terminal (l))
+    s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
   else
     s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
   return s;
@@ -511,7 +555,7 @@ format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
     {
       ip4_fib_mtrie_leaf_t l = p->leaves[i];
 
-      if (!ip4_fib_mtrie_leaf_is_empty (l))
+      if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
 	{
 	  u32 a, ia_length;
 	  ip4_address_t ia;
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index c0afc2cf..128195d3 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -52,67 +52,15 @@
 typedef u32 ip4_fib_mtrie_leaf_t;
 
 #define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
-#define IP4_FIB_MTRIE_LEAF_ROOT  (0 + 2*0)
 
-always_inline u32
-ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n)
-{
-  return n == IP4_FIB_MTRIE_LEAF_EMPTY;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_leaf_t n)
-{
-  return n != IP4_FIB_MTRIE_LEAF_EMPTY;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
-{
-  return n & 1;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
-{
-  ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
-  return n >> 1;
-}
-
-always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
-{
-  ip4_fib_mtrie_leaf_t l;
-  l = 1 + 2 * adj_index;
-  ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
-  return l;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
-{
-  return (n & 1) == 0;
-}
-
-always_inline u32
-ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
-{
-  ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
-  return n >> 1;
-}
-
-always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
-{
-  ip4_fib_mtrie_leaf_t l;
-  l = 0 + 2 * i;
-  ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
-  return l;
-}
-
-/* One ply of the 4 ply mtrie fib. */
+/**
+ * @brief One ply of the 4 ply mtrie fib.
+ */
 typedef struct
 {
+  /**
+   * The leaves/slots/buckets to be filed with leafs
+   */
   union
   {
     ip4_fib_mtrie_leaf_t leaves[256];
@@ -122,14 +70,25 @@ typedef struct
 #endif
   };
 
-  /* Prefix length for terminal leaves. */
+  /**
+   * Prefix length for leaves/ply.
+   */
   u8 dst_address_bits_of_leaves[256];
 
-  /* Number of non-empty leafs (whether terminal or not). */
+  /**
+   * Number of non-empty leafs (whether terminal or not).
+   */
   i32 n_non_empty_leafs;
 
+  /**
+   * The length of the ply's coviering prefix. Also a measure of its depth
+   * If a leaf in a slot has a mask length longer than this then it is
+   * 'non-empty'. Otherwise it is the value of the cover.
+   */
+  i32 dst_address_bits_base;
+
   /* Pad to cache line boundary. */
-  u8 pad[CLIB_CACHE_LINE_BYTES - 1 * sizeof (i32)];
+  u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
 }
 ip4_fib_mtrie_ply_t;
 
@@ -140,9 +99,6 @@ typedef struct
 {
   /* Pool of plies.  Index zero is root ply. */
   ip4_fib_mtrie_ply_t *ply_pool;
-
-  /* Special case leaf for default route 0.0.0.0/0. */
-  ip4_fib_mtrie_leaf_t default_leaf;
 } ip4_fib_mtrie_t;
 
 void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m);
@@ -154,25 +110,50 @@ void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f,
 				  u32 dst_address_length,
 				  u32 adj_index, u32 is_del);
 
-/* Returns adjacency index. */
-u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst);
-
 format_function_t format_ip4_fib_mtrie;
 
+always_inline u32
+ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
+{
+  return n & 1;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
+{
+  ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
+  return n >> 1;
+}
+
 /* Lookup step.  Processes 1 byte of 4 byte ip4 address. */
 always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m,
+ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
 			   ip4_fib_mtrie_leaf_t current_leaf,
 			   const ip4_address_t * dst_address,
 			   u32 dst_address_byte_index)
 {
-  ip4_fib_mtrie_leaf_t next_leaf;
   ip4_fib_mtrie_ply_t *ply;
   uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
 
-  ply = m->ply_pool + (current_is_terminal ? 0 : (current_leaf >> 1));
-  next_leaf = ply->leaves[dst_address->as_u8[dst_address_byte_index]];
-  next_leaf = current_is_terminal ? current_leaf : next_leaf;
+  if (!current_is_terminal)
+    {
+      ply = m->ply_pool + (current_leaf >> 1);
+      return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
+    }
+
+  return current_leaf;
+}
+
+/* Lookup step.  Processes 1 byte of 4 byte ip4 address. */
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m,
+			       const ip4_address_t * dst_address)
+{
+  ip4_fib_mtrie_leaf_t next_leaf;
+  ip4_fib_mtrie_ply_t *ply;
+
+  ply = m->ply_pool;
+  next_leaf = ply->leaves[dst_address->as_u8[0]];
 
   return next_leaf;
 }
diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c
index 3af32f2e..7c2b7be8 100644
--- a/src/vnet/ip/ip4_source_check.c
+++ b/src/vnet/ip/ip4_source_check.c
@@ -162,12 +162,8 @@ ip4_source_check_inline (vlib_main_t * vm,
 	  mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
 	  mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
 
-	  leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
-
-	  leaf0 =
-	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
-	  leaf1 =
-	    ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
+	  leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+	  leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
 
 	  leaf0 =
 	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
@@ -250,10 +246,7 @@ ip4_source_check_inline (vlib_main_t * vm,
 
 	  mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
 
-	  leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
-
-	  leaf0 =
-	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+	  leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
 	  leaf0 =
 	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index ecc3bd2c..c120f12c 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1943,9 +1943,6 @@ ip6_rewrite_inline (vlib_main_t * vm,
 	  adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
 	  adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
 
-	  /* We should never rewrite a pkt using the MISS adjacency */
-	  ASSERT (adj_index0 && adj_index1);
-
 	  ip0 = vlib_buffer_get_current (p0);
 	  ip1 = vlib_buffer_get_current (p1);
 
@@ -2111,9 +2108,6 @@ ip6_rewrite_inline (vlib_main_t * vm,
 
 	  adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
 
-	  /* We should never rewrite a pkt using the MISS adjacency */
-	  ASSERT (adj_index0);
-
 	  adj0 = ip_get_adjacency (lm, adj_index0);
 
 	  ip0 = vlib_buffer_get_current (p0);
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 2d8bd0c9..08018fd1 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -121,10 +121,6 @@ mpls_output_inline (vlib_main_t * vm,
           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
 
-          /* We should never rewrite a pkt using the MISS adjacency */
-          ASSERT(adj_index0);
-          ASSERT(adj_index1);
-
           adj0 = adj_get(adj_index0);
           adj1 = adj_get(adj_index1);
           hdr0 = vlib_buffer_get_current (p0);
@@ -237,9 +233,6 @@ mpls_output_inline (vlib_main_t * vm,
 
 	  adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
 
-          /* We should never rewrite a pkt using the MISS adjacency */
-          ASSERT(adj_index0);
-
 	  adj0 = adj_get(adj_index0);
       	  hdr0 = vlib_buffer_get_current (p0);
 
@@ -431,7 +424,6 @@ mpls_adj_incomplete (vlib_main_t * vm,
 	  n_left_to_next -= 1;
 
           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
-          ASSERT(adj_index0);
 
 	  adj0 = adj_get(adj_index0);
 
-- 
cgit 1.2.3-korg


From a3af337e06a79f7d1dacf42a319f241c907122fc Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Tue, 28 Mar 2017 03:49:52 -0700
Subject: MTRIE Optimisations 2

1) 16-8-8 stride. Reduce trie depth walk traded with increased memory in the top PLY.
2) separate the vector of protocol-independent (PI) fib_table_t with the vector of protocol dependent (PD) FIBs. PD FIBs are large structures, we don't want to burn the memory for ech PD type
3) Go straight to the PD FIB in the data-path thus avoiding an indirection through, e.g., a PLY pool.

Change-Id: I800d1ed0b2049040d5da95213f3ed6b12bdd78b7
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet/cop/ip4_whitelist.c   |   9 -
 src/vnet/dpo/load_balance.c    |   6 +-
 src/vnet/dpo/lookup_dpo.c      |   4 -
 src/vnet/fib/fib.c             |   2 +
 src/vnet/fib/fib_entry.c       |   4 +-
 src/vnet/fib/fib_path.c        |   1 +
 src/vnet/fib/fib_table.c       |  24 +-
 src/vnet/fib/fib_table.h       |  12 -
 src/vnet/fib/fib_test.c        |   6 +-
 src/vnet/fib/ip4_fib.c         |  60 +++-
 src/vnet/fib/ip4_fib.h         |  35 ++-
 src/vnet/fib/ip6_fib.c         |  18 +-
 src/vnet/fib/ip6_fib.h         |   2 +-
 src/vnet/fib/mpls_fib.c        |  21 +-
 src/vnet/fib/mpls_fib.h        |  28 +-
 src/vnet/ip/ip4.h              |  29 +-
 src/vnet/ip/ip4_forward.c      |  21 --
 src/vnet/ip/ip4_mtrie.c        | 611 +++++++++++++++++++++++++++--------------
 src/vnet/ip/ip4_mtrie.h        | 106 +++++--
 src/vnet/ip/ip4_packet.h       |   1 +
 src/vnet/ip/ip4_source_check.c |   8 -
 src/vnet/ip/ip6.h              |   3 +
 src/vnet/ip/ip_api.c           |  43 ++-
 src/vnet/mpls/interface.c      |   1 +
 src/vnet/mpls/mpls.h           |  26 +-
 src/vnet/mpls/mpls_api.c       |  38 ++-
 src/vpp/api/api.c              |   6 +-
 src/vpp/stats/stats.c          |   8 +-
 28 files changed, 720 insertions(+), 413 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c
index ccb9dc03..6ef3d7d7 100644
--- a/src/vnet/cop/ip4_whitelist.c
+++ b/src/vnet/cop/ip4_whitelist.c
@@ -127,9 +127,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
 
           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
-      	  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
-                                             &ip0->src_address, 1);
-
       	  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
                                              &ip0->src_address, 2);
 
@@ -166,9 +163,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
 
           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
 
-      	  leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
-                                             &ip1->src_address, 1);
-
       	  leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
                                              &ip1->src_address, 2);
 
@@ -263,9 +257,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
 
           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
-	  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, 
-                                             &ip0->src_address, 1);
-
 	  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, 
                                              &ip0->src_address, 2);
 
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index d5e98e4e..6b0eda0e 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -827,14 +827,18 @@ const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
 void
 load_balance_module_init (void)
 {
+    index_t lbi;
+
     dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
 
     /*
      * Special LB with index zero. we need to define this since the v4 mtrie
      * assumes an index of 0 implies the ply is empty. therefore all 'real'
      * adjs need a non-zero index.
+     * This should never be used, but just in case, stack it on a drop.
      */
-    load_balance_create(0, DPO_PROTO_IP4, 0);
+    lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
+    load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
 
     load_balance_map_module_init();
 }
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index 3726c8fe..e94e871c 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -211,7 +211,6 @@ ip4_src_fib_lookup_one (u32 src_fib_index0,
     mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
 
     leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
-    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
 
@@ -235,9 +234,6 @@ ip4_src_fib_lookup_two (u32 src_fib_index0,
     leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
     leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1);
 
-    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
-    leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1);
-
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
     leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2);
 
diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c
index 413f93e8..b430e113 100644
--- a/src/vnet/fib/fib.c
+++ b/src/vnet/fib/fib.c
@@ -28,6 +28,8 @@ fib_module_init (vlib_main_t * vm)
 	return (error);
     if ((error = vlib_call_init_function (vm, adj_module_init)))
 	return (error);
+    if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
+	return (error);
 
     fib_entry_module_init();
     fib_entry_src_module_init();
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index 25005e11..6ac5461d 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -924,10 +924,10 @@ fib_entry_path_remove (fib_node_index_t fib_entry_index,
 		/*
 		 * no more sources left. this entry is toast.
 		 */
-		fib_entry_src_action_uninstall(fib_entry);
 		fib_entry = fib_entry_post_flag_update_actions(fib_entry,
                                                                source,
                                                                bflags);
+		fib_entry_src_action_uninstall(fib_entry);
 
 		return (FIB_ENTRY_SRC_FLAG_NONE);
 	    }
@@ -1014,10 +1014,10 @@ fib_entry_special_remove (fib_node_index_t fib_entry_index,
 		/*
 		 * no more sources left. this entry is toast.
 		 */
-		fib_entry_src_action_uninstall(fib_entry);
 		fib_entry = fib_entry_post_flag_update_actions(fib_entry,
                                                                source,
                                                                bflags);
+		fib_entry_src_action_uninstall(fib_entry);
 
 		return (FIB_ENTRY_SRC_FLAG_NONE);
 	    }
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 3ed309f3..928a9d43 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -32,6 +32,7 @@
 #include <vnet/fib/fib_path_list.h>
 #include <vnet/fib/fib_internal.h>
 #include <vnet/fib/fib_urpf_list.h>
+#include <vnet/fib/mpls_fib.h>
 
 /**
  * Enurmeration of path types
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 7818d02e..6c3162e7 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -47,7 +47,7 @@ fib_table_lookup_i (fib_table_t *fib_table,
     switch (prefix->fp_proto)
     {
     case FIB_PROTOCOL_IP4:
-	return (ip4_fib_table_lookup(&fib_table->v4,
+	return (ip4_fib_table_lookup(ip4_fib_get(fib_table->ft_index),
 				     &prefix->fp_addr.ip4,
 				     prefix->fp_len));
     case FIB_PROTOCOL_IP6:
@@ -55,7 +55,7 @@ fib_table_lookup_i (fib_table_t *fib_table,
 				     &prefix->fp_addr.ip6,
 				     prefix->fp_len));
     case FIB_PROTOCOL_MPLS:
-	return (mpls_fib_table_lookup(&fib_table->mpls,
+	return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index),
 				      prefix->fp_label,
 				      prefix->fp_eos));
     }
@@ -76,7 +76,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table,
     switch (prefix->fp_proto)
     {
     case FIB_PROTOCOL_IP4:
-	return (ip4_fib_table_lookup_exact_match(&fib_table->v4,
+	return (ip4_fib_table_lookup_exact_match(ip4_fib_get(fib_table->ft_index),
 						 &prefix->fp_addr.ip4,
 						 prefix->fp_len));
     case FIB_PROTOCOL_IP6:
@@ -84,7 +84,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table,
 						 &prefix->fp_addr.ip6,
 						 prefix->fp_len));
     case FIB_PROTOCOL_MPLS:
-	return (mpls_fib_table_lookup(&fib_table->mpls,
+	return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index),
 				      prefix->fp_label,
 				      prefix->fp_eos));
     }
@@ -148,7 +148,7 @@ fib_table_entry_remove (fib_table_t *fib_table,
     switch (prefix->fp_proto)
     {
     case FIB_PROTOCOL_IP4:
-	ip4_fib_table_entry_remove(&fib_table->v4,
+	ip4_fib_table_entry_remove(ip4_fib_get(fib_table->ft_index),
 				   &prefix->fp_addr.ip4,
 				   prefix->fp_len);
 	break;
@@ -158,7 +158,7 @@ fib_table_entry_remove (fib_table_t *fib_table,
 				   prefix->fp_len);
 	break;
     case FIB_PROTOCOL_MPLS:
-	mpls_fib_table_entry_remove(&fib_table->mpls,
+	mpls_fib_table_entry_remove(mpls_fib_get(fib_table->ft_index),
 				    prefix->fp_label,
 				    prefix->fp_eos);
 	break;
@@ -208,7 +208,7 @@ fib_table_entry_insert (fib_table_t *fib_table,
     switch (prefix->fp_proto)
     {
     case FIB_PROTOCOL_IP4:
-	ip4_fib_table_entry_insert(&fib_table->v4,
+	ip4_fib_table_entry_insert(ip4_fib_get(fib_table->ft_index),
 				   &prefix->fp_addr.ip4,
 				   prefix->fp_len,
 				   fib_entry_index);
@@ -220,7 +220,7 @@ fib_table_entry_insert (fib_table_t *fib_table,
 				   fib_entry_index);
 	break;
     case FIB_PROTOCOL_MPLS:
-	mpls_fib_table_entry_insert(&fib_table->mpls,
+	mpls_fib_table_entry_insert(mpls_fib_get(fib_table->ft_index),
 				    prefix->fp_label,
 				    prefix->fp_eos,
 				    fib_entry_index);
@@ -270,7 +270,9 @@ fib_table_fwding_dpo_remove (u32 fib_index,
 	return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index),
 						&prefix->fp_addr.ip4,
 						prefix->fp_len,
-						dpo));
+						dpo,
+                                                fib_table_get_less_specific(fib_index,
+                                                                            prefix)));
     case FIB_PROTOCOL_IP6:
 	return (ip6_fib_table_fwding_dpo_remove(fib_index,
 						&prefix->fp_addr.ip6,
@@ -1034,13 +1036,13 @@ fib_table_destroy (fib_table_t *fib_table)
     switch (fib_table->ft_proto)
     {
     case FIB_PROTOCOL_IP4:
-	ip4_fib_table_destroy(&fib_table->v4);
+	ip4_fib_table_destroy(fib_table->ft_index);
 	break;
     case FIB_PROTOCOL_IP6:
 	ip6_fib_table_destroy(fib_table->ft_index);
 	break;
     case FIB_PROTOCOL_MPLS:
-	mpls_fib_table_destroy(&fib_table->mpls);
+	mpls_fib_table_destroy(fib_table->ft_index);
 	break;
     }
 }
diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h
index e7e66acb..b310aea6 100644
--- a/src/vnet/fib/fib_table.h
+++ b/src/vnet/fib/fib_table.h
@@ -28,18 +28,6 @@
  */
 typedef struct fib_table_t_
 {
-    /**
-     * A union of the protocol specific FIBs that provide the
-     * underlying LPM mechanism.
-     * This element is first in the struct so that it is in the
-     * first cache line.
-     */
-    union {
-	ip4_fib_t v4;
-	ip6_fib_t v6;
-	mpls_fib_t mpls;
-    };
-
     /**
      * Which protocol this table serves. Used to switch on the union above.
      */
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index 1a9cce24..92141ddf 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -40,8 +40,6 @@
 	fformat(stderr, "FAIL:%d: " _comment "\n",		\
 		__LINE__, ##_args);				\
     } else {							\
-	fformat(stderr, "PASS:%d: " _comment "\n",		\
-		__LINE__, ##_args);				\
     }								\
     _evald;							\
 })
@@ -5727,7 +5725,7 @@ fib_test_label (void)
 				     &a_o_10_10_11_1,
 				     &adj_o_10_10_11_2),
 	     "1.1.1.1/32 LB 2 buckets via: "
-	     "adj over 10.10.11.1",
+	     "adj over 10.10.11.1, "
 	     "adj-v4 over 10.10.11.2");
 
     fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
@@ -5738,7 +5736,7 @@ fib_test_label (void)
 				     &a_o_10_10_11_1,
 				     &adj_o_10_10_11_2),
 	     "24001/eos LB 2 buckets via: "
-	     "adj over 10.10.11.1",
+	     "adj over 10.10.11.1, "
 	     "adj-v4 over 10.10.11.2");
 
     fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
index a7915620..98d4e52f 100644
--- a/src/vnet/fib/ip4_fib.c
+++ b/src/vnet/fib/ip4_fib.c
@@ -104,29 +104,35 @@ static u32
 ip4_create_fib_with_table_id (u32 table_id)
 {
     fib_table_t *fib_table;
+    ip4_fib_t *v4_fib;
 
     pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
     memset(fib_table, 0, sizeof(*fib_table));
 
+    pool_get_aligned(ip4_main.v4_fibs, v4_fib, CLIB_CACHE_LINE_BYTES);
+
+    ASSERT((fib_table - ip4_main.fibs) ==
+           (v4_fib - ip4_main.v4_fibs));
+
     fib_table->ft_proto = FIB_PROTOCOL_IP4;
     fib_table->ft_index =
-	fib_table->v4.index =
+	v4_fib->index =
 	    (fib_table - ip4_main.fibs);
 
     hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index);
 
     fib_table->ft_table_id =
-	fib_table->v4.table_id =
+	v4_fib->table_id =
 	    table_id;
     fib_table->ft_flow_hash_config = 
-	fib_table->v4.flow_hash_config =
+	v4_fib->flow_hash_config =
 	    IP_FLOW_HASH_DEFAULT;
-    fib_table->v4.fwd_classify_table_index = ~0;
-    fib_table->v4.rev_classify_table_index = ~0;
+    v4_fib->fwd_classify_table_index = ~0;
+    v4_fib->rev_classify_table_index = ~0;
     
     fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4);
 
-    ip4_mtrie_init(&fib_table->v4.mtrie);
+    ip4_mtrie_init(&v4_fib->mtrie);
 
     /*
      * add the special entries into the new FIB
@@ -151,9 +157,10 @@ ip4_create_fib_with_table_id (u32 table_id)
 }
 
 void
-ip4_fib_table_destroy (ip4_fib_t *fib)
+ip4_fib_table_destroy (u32 fib_index)
 {
-    fib_table_t *fib_table = (fib_table_t*)fib;
+    fib_table_t *fib_table = pool_elt_at_index(ip4_main.fibs, fib_index);
+    ip4_fib_t *v4_fib = pool_elt_at_index(ip4_main.v4_fibs, fib_index);
     int ii;
 
     /*
@@ -185,6 +192,10 @@ ip4_fib_table_destroy (ip4_fib_t *fib)
     {
 	hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id);
     }
+
+    ip4_mtrie_free(&v4_fib->mtrie);
+
+    pool_put(ip4_main.v4_fibs, v4_fib);
     pool_put(ip4_main.fibs, fib_table);
 }
 
@@ -367,16 +378,33 @@ ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib,
 				 u32 len,
 				 const dpo_id_t *dpo)
 {
-    ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 0); // ADD
+    ip4_fib_mtrie_route_add(&fib->mtrie, addr, len, dpo->dpoi_index);
 }
 
 void
 ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib,
 				 const ip4_address_t *addr,
 				 u32 len,
-				 const dpo_id_t *dpo)
+				 const dpo_id_t *dpo,
+                                 u32 cover_index)
 {
-    ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE
+    fib_prefix_t cover_prefix = {
+        .fp_len = 0,
+    };
+    const dpo_id_t *cover_dpo;
+
+    /*
+     * We need to pass the MTRIE the LB index and address length of the
+     * covering prefix, so it can fill the plys with the correct replacement
+     * for the entry being removed
+     */
+    fib_entry_get_prefix(cover_index, &cover_prefix);
+    cover_dpo = fib_entry_contribute_ip_forwarding(cover_index);
+
+    ip4_fib_mtrie_route_del(&fib->mtrie,
+                            addr, len, dpo->dpoi_index,
+                            cover_prefix.fp_len,
+                            cover_dpo->dpoi_index);
 }
 
 void
@@ -498,7 +526,7 @@ ip4_show_fib (vlib_main_t * vm,
 
     pool_foreach (fib_table, im4->fibs,
     ({
-	ip4_fib_t *fib = &fib_table->v4;
+	ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index);
 
 	if (table_id >= 0 && table_id != (int)fib->table_id)
 	    continue;
@@ -523,6 +551,11 @@ ip4_show_fib (vlib_main_t * vm,
 	    }
 	    continue;
 	}
+	if (mtrie)
+        {
+	    vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
+            continue;
+        }
 
 	if (!matching)
 	{
@@ -532,9 +565,6 @@ ip4_show_fib (vlib_main_t * vm,
 	{
 	    ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask);
 	}
-
-	if (mtrie)
-	    vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
     }));
 
     return 0;
diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h
index 243fd77f..4cf9e58a 100644
--- a/src/vnet/fib/ip4_fib.h
+++ b/src/vnet/fib/ip4_fib.h
@@ -34,6 +34,33 @@
 #include <vnet/ip/ip.h>
 #include <vnet/fib/fib_entry.h>
 #include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip4_mtrie.h>
+
+typedef struct ip4_fib_t_
+{
+  /**
+   * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes.
+   * First member so it's in the first cacheline.
+   */
+  ip4_fib_mtrie_t mtrie;
+
+  /* Hash table for each prefix length mapping. */
+  uword *fib_entry_by_dst_address[33];
+
+  /* Table ID (hash key) for this FIB. */
+  u32 table_id;
+
+  /* Index into FIB vector. */
+  u32 index;
+
+  /* flow hash configuration */
+  flow_hash_config_t flow_hash_config;
+
+  /* N-tuple classifier indices */
+  u32 fwd_classify_table_index;
+  u32 rev_classify_table_index;
+
+} ip4_fib_t;
 
 extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib,
 					     const ip4_address_t *addr,
@@ -50,7 +77,7 @@ extern void ip4_fib_table_entry_insert(ip4_fib_t *fib,
 				       const ip4_address_t *addr,
 				       u32 len,
 				       fib_node_index_t fib_entry_index);
-extern void ip4_fib_table_destroy(ip4_fib_t *fib);
+extern void ip4_fib_table_destroy(u32 fib_index);
 
 extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib,
 					    const ip4_address_t *addr,
@@ -60,7 +87,8 @@ extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib,
 extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib,
 					    const ip4_address_t *addr,
 					    u32 len,
-					    const dpo_id_t *dpo);
+					    const dpo_id_t *dpo,
+                                            fib_node_index_t cover_index);
 extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib,
 				    const ip4_address_t * dst);
 
@@ -79,7 +107,7 @@ extern void ip4_fib_table_walk(ip4_fib_t *fib,
 static inline ip4_fib_t *
 ip4_fib_get (u32 index)
 {
-    return (&(pool_elt_at_index(ip4_main.fibs, index)->v4));
+    return (pool_elt_at_index(ip4_main.v4_fibs, index));
 }
 
 always_inline u32
@@ -134,7 +162,6 @@ ip4_fib_forwarding_lookup (u32 fib_index,
     mtrie = &ip4_fib_get(fib_index)->mtrie;
 
     leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr);
-    leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1);
     leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2);
     leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3);
 
diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c
index 343ff55e..0ee029d3 100644
--- a/src/vnet/fib/ip6_fib.c
+++ b/src/vnet/fib/ip6_fib.c
@@ -55,22 +55,29 @@ static u32
 create_fib_with_table_id (u32 table_id)
 {
     fib_table_t *fib_table;
+    ip6_fib_t *v6_fib;
 
     pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+    pool_get_aligned(ip6_main.v6_fibs, v6_fib, CLIB_CACHE_LINE_BYTES);
+
     memset(fib_table, 0, sizeof(*fib_table));
+    memset(v6_fib, 0, sizeof(*v6_fib));
 
+    ASSERT((fib_table - ip6_main.fibs) ==
+           (v6_fib - ip6_main.v6_fibs));
+    
     fib_table->ft_proto = FIB_PROTOCOL_IP6;
     fib_table->ft_index =
-	fib_table->v6.index =
-	    (fib_table - ip6_main.fibs);
+	    v6_fib->index =
+                (fib_table - ip6_main.fibs);
 
     hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index);
 
     fib_table->ft_table_id =
-	fib_table->v6.table_id =
+	v6_fib->table_id =
 	    table_id;
     fib_table->ft_flow_hash_config = 
-	fib_table->v6.flow_hash_config =
+	v6_fib->flow_hash_config =
 	    IP_FLOW_HASH_DEFAULT;
 
     vnet_ip6_fib_init(fib_table->ft_index);
@@ -188,6 +195,7 @@ ip6_fib_table_destroy (u32 fib_index)
     {
 	hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id);
     }
+    pool_put_index(ip6_main.v6_fibs, fib_table->ft_index);
     pool_put(ip6_main.fibs, fib_table);
 }
 
@@ -620,7 +628,7 @@ ip6_show_fib (vlib_main_t * vm,
 
     pool_foreach (fib_table, im6->fibs,
     ({
-	fib = &(fib_table->v6);
+	fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index);
 	if (table_id >= 0 && table_id != (int)fib->table_id)
 	    continue;
 	if (fib_index != ~0 && fib_index != (int)fib->index)
diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h
index af864a75..e2f28452 100644
--- a/src/vnet/fib/ip6_fib.h
+++ b/src/vnet/fib/ip6_fib.h
@@ -115,7 +115,7 @@ static inline ip6_fib_t *
 ip6_fib_get (fib_node_index_t index)
 {
     ASSERT(!pool_is_free_index(ip6_main.fibs, index));
-    return (&pool_elt_at_index (ip6_main.fibs, index)->v6);
+    return (pool_elt_at_index (ip6_main.v6_fibs, index));
 }
 
 static inline 
diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c
index 5cd0fd23..4b2b76ea 100644
--- a/src/vnet/fib/mpls_fib.c
+++ b/src/vnet/fib/mpls_fib.c
@@ -97,11 +97,15 @@ mpls_fib_create_with_table_id (u32 table_id)
     int i;
 
     pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+    pool_get_aligned(mpls_main.mpls_fibs, mf, CLIB_CACHE_LINE_BYTES);
+
+    ASSERT((fib_table - mpls_main.fibs) ==
+           (mf - mpls_main.mpls_fibs));
+
     memset(fib_table, 0, sizeof(*fib_table));
 
     fib_table->ft_proto = FIB_PROTOCOL_MPLS;
-    fib_table->ft_index =
-	(fib_table - mpls_main.fibs);
+    fib_table->ft_index = (fib_table - mpls_main.fibs);
 
     hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index);
 
@@ -109,8 +113,6 @@ mpls_fib_create_with_table_id (u32 table_id)
 	table_id;
     fib_table->ft_flow_hash_config = 
 	MPLS_FLOW_HASH_DEFAULT;
-    fib_table->v4.fwd_classify_table_index = ~0;
-    fib_table->v4.rev_classify_table_index = ~0;
     
     fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS);
 
@@ -122,7 +124,6 @@ mpls_fib_create_with_table_id (u32 table_id)
                                 drop_dpo_get(DPO_PROTO_MPLS));
     }
 
-    mf = &fib_table->mpls;
     mf->mf_entries = hash_create(0, sizeof(fib_node_index_t));
     for (i = 0; i < MPLS_FIB_DB_SIZE; i++)
     {
@@ -241,9 +242,10 @@ mpls_fib_table_create_and_lock (void)
 }
 
 void
-mpls_fib_table_destroy (mpls_fib_t *mf)
+mpls_fib_table_destroy (u32 fib_index)
 {
-    fib_table_t *fib_table = (fib_table_t*)mf;
+    fib_table_t *fib_table = pool_elt_at_index(mpls_main.fibs, fib_index);
+    mpls_fib_t *mf = pool_elt_at_index(mpls_main.mpls_fibs, fib_index);
     fib_prefix_t prefix = {
 	.fp_proto = FIB_PROTOCOL_MPLS,
     };
@@ -274,6 +276,7 @@ mpls_fib_table_destroy (mpls_fib_t *mf)
     }
     hash_free(mf->mf_entries);
 
+    pool_put(mpls_main.mpls_fibs, mf);
     pool_put(mpls_main.fibs, fib_table);
 }
 
@@ -436,11 +439,11 @@ mpls_fib_show (vlib_main_t * vm,
 
 	if (MPLS_LABEL_INVALID == label)
 	{
-	    mpls_fib_table_show_all(&(fib_table->mpls), vm);
+	    mpls_fib_table_show_all(mpls_fib_get(fib_table->ft_index), vm);
 	}
 	else
 	{
-	    mpls_fib_table_show_one(&(fib_table->mpls), label, vm);
+	    mpls_fib_table_show_one(mpls_fib_get(fib_table->ft_index), label, vm);
 	}
     }));
 
diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h
index 779decaa..78a61a14 100644
--- a/src/vnet/fib/mpls_fib.h
+++ b/src/vnet/fib/mpls_fib.h
@@ -25,10 +25,33 @@
 #include <vnet/mpls/mpls.h>
 #include <vnet/fib/fib_table.h>
 
+#define MPLS_FIB_DEFAULT_TABLE_ID 0
+
+/**
+ * Type exposure is to allow the DP fast/inlined access
+ */
+#define MPLS_FIB_KEY_SIZE 21
+#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))
+
+typedef struct mpls_fib_t_
+{
+  /**
+   * A hash table of entries. 21 bit key
+   * Hash table for reduced memory footprint
+   */
+  uword * mf_entries;
+
+  /**
+   * The load-balance indices keyed by 21 bit label+eos bit.
+   * A flat array for maximum lookup performace.
+   */
+  index_t mf_lbs[MPLS_FIB_DB_SIZE];
+} mpls_fib_t;
+
 static inline mpls_fib_t*
 mpls_fib_get (fib_node_index_t index)
 {
-    return (&(pool_elt_at_index(mpls_main.fibs, index)->mpls));
+    return (pool_elt_at_index(mpls_main.mpls_fibs, index));
 }
 
 extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id);
@@ -56,8 +79,7 @@ extern void mpls_fib_table_entry_insert(mpls_fib_t *mf,
 					mpls_label_t label,
 					mpls_eos_bit_t eos,
 					fib_node_index_t fei);
-extern void mpls_fib_table_destroy(mpls_fib_t *mf);
-
+extern void mpls_fib_table_destroy(u32 fib_index);
 
 
 extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf,
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index 4e075d0f..71640def 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -40,34 +40,10 @@
 #ifndef included_ip_ip4_h
 #define included_ip_ip4_h
 
-#include <vnet/ip/ip4_mtrie.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/lookup.h>
 #include <vnet/feature/feature.h>
 
-typedef struct ip4_fib_t
-{
-  /* Hash table for each prefix length mapping. */
-  uword *fib_entry_by_dst_address[33];
-
-  /* Mtrie for fast lookups.  Hash is used to maintain overlapping prefixes. */
-  ip4_fib_mtrie_t mtrie;
-
-  /* Table ID (hash key) for this FIB. */
-  u32 table_id;
-
-  /* Index into FIB vector. */
-  u32 index;
-
-  /* flow hash configuration */
-  flow_hash_config_t flow_hash_config;
-
-  /* N-tuple classifier indices */
-  u32 fwd_classify_table_index;
-  u32 rev_classify_table_index;
-
-} ip4_fib_t;
-
 typedef struct ip4_mfib_t
 {
   /* Hash table for each prefix length mapping. */
@@ -111,6 +87,9 @@ typedef struct ip4_main_t
   /** Vector of FIBs. */
   struct fib_table_t_ *fibs;
 
+  /** Vector of MTries. */
+  struct ip4_fib_t_ *v4_fibs;
+
   /** Vector of MFIBs. */
   struct mfib_table_t_ *mfibs;
 
@@ -284,8 +263,6 @@ serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main;
 int vnet_set_ip4_flow_hash (u32 table_id,
 			    flow_hash_config_t flow_hash_config);
 
-void ip4_mtrie_init (ip4_fib_mtrie_t * m);
-
 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
 				 u32 table_index);
 
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index ef6dded5..ee1703e7 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -182,7 +182,6 @@ ip4_lookup_inline (vlib_main_t * vm,
 	      mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
 	      mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
 
-
 	      leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
 	      leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
 	      leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
@@ -194,14 +193,6 @@ ip4_lookup_inline (vlib_main_t * vm,
 	  tcp2 = (void *) (ip2 + 1);
 	  tcp3 = (void *) (ip3 + 1);
 
-	  if (!lookup_for_responses_to_locally_received_packets)
-	    {
-	      leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
-	      leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
-	      leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
-	      leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
-	    }
-
 	  if (!lookup_for_responses_to_locally_received_packets)
 	    {
 	      leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
@@ -363,9 +354,6 @@ ip4_lookup_inline (vlib_main_t * vm,
 
 	  tcp0 = (void *) (ip0 + 1);
 
-	  if (!lookup_for_responses_to_locally_received_packets)
-	    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
-
 	  if (!lookup_for_responses_to_locally_received_packets)
 	    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
 
@@ -1622,11 +1610,6 @@ ip4_local_inline (vlib_main_t * vm,
 	  good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
 	  good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
 
-	  leaf0 =
-	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-	  leaf1 =
-	    ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
-
 	  /* Verify UDP length. */
 	  ip_len0 = clib_net_to_host_u16 (ip0->length);
 	  ip_len1 = clib_net_to_host_u16 (ip1->length);
@@ -1812,9 +1795,6 @@ ip4_local_inline (vlib_main_t * vm,
 	  /* Don't verify UDP checksum for packets with explicit zero checksum. */
 	  good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
 
-	  leaf0 =
-	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-
 	  /* Verify UDP length. */
 	  ip_len0 = clib_net_to_host_u16 (ip0->length);
 	  udp_len0 = clib_net_to_host_u16 (udp0->length);
@@ -2913,7 +2893,6 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
 
   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
-  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
 
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 317d8f10..adc95125 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -38,10 +38,17 @@
  */
 
 #include <vnet/ip/ip.h>
-#include <vnet/fib/fib_entry.h>
+#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/fib/ip4_fib.h>
+
+
+/**
+ * Global pool of IPv4 8bit PLYs
+ */
+ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
 
 always_inline u32
-ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte)
+ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
 {
   /*
    * It's 'non-empty' if the length of the leaf stored is greater than the
@@ -84,61 +91,83 @@ ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
   return l;
 }
 
-static void
-ply_init (ip4_fib_mtrie_ply_t * p,
-	  ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len)
-{
-  /*
-   * A leaf is 'empty' if it represents a leaf from the covering PLY
-   * i.e. if the prefix length of the leaf is less than or equal to
-   * the prefix length of the PLY
-   */
-  p->n_non_empty_leafs = (prefix_len > ply_base_len ?
-			  ARRAY_LEN (p->leaves) : 0);
-  memset (p->dst_address_bits_of_leaves, prefix_len,
-	  sizeof (p->dst_address_bits_of_leaves));
-  p->dst_address_bits_base = ply_base_len;
-
-  /* Initialize leaves. */
-#ifdef CLIB_HAVE_VEC128
-  {
-    u32x4 *l, init_x4;
-
 #ifndef __ALTIVEC__
-    init_x4 = u32x4_splat (init);
+#define PLY_X4_SPLAT_INIT(init_x4, init) \
+  init_x4 = u32x4_splat (init);
 #else
-    {
-      u32x4_union_t y;
-      y.as_u32[0] = init;
-      y.as_u32[1] = init;
-      y.as_u32[2] = init;
-      y.as_u32[3] = init;
-      init_x4 = y.as_u32x4;
-    }
+#define PLY_X4_SPLAT_INIT(init_x4, init)                                \
+{                                                                       \
+  u32x4_union_t y;                                                      \
+  y.as_u32[0] = init;                                                   \
+  y.as_u32[1] = init;                                                   \
+  y.as_u32[2] = init;                                                   \
+  y.as_u32[3] = init;                                                   \
+  init_x4 = y.as_u32x4;                                                 \
+}
 #endif
 
-    for (l = p->leaves_as_u32x4;
-	 l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4)
-      {
-	l[0] = init_x4;
-	l[1] = init_x4;
-	l[2] = init_x4;
-	l[3] = init_x4;
-      }
-  }
+#ifdef CLIB_HAVE_VEC128
+#define PLY_INIT_LEAVES(p)                                              \
+{                                                                       \
+    u32x4 *l, init_x4;                                                  \
+                                                                        \
+    PLY_X4_SPLAT_INIT(init_x4, init);                                   \
+    for (l = p->leaves_as_u32x4;                                        \
+	 l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4);       \
+         l += 4)                                                        \
+      {                                                                 \
+	l[0] = init_x4;                                                 \
+	l[1] = init_x4;                                                 \
+	l[2] = init_x4;                                                 \
+	l[3] = init_x4;                                                 \
+      }                                                                 \
+}
 #else
-  {
-    u32 *l;
-
-    for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)
-      {
-	l[0] = init;
-	l[1] = init;
-	l[2] = init;
-	l[3] = init;
-      }
-  }
+#define PLY_INIT_LEAVES(p)                                              \
+{                                                                       \
+  u32 *l;                                                               \
+                                                                        \
+  for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)    \
+    {                                                                   \
+      l[0] = init;                                                      \
+      l[1] = init;                                                      \
+      l[2] = init;                                                      \
+      l[3] = init;                                                      \
+      }                                                                 \
+}
 #endif
+
+#define PLY_INIT(p, init, prefix_len, ply_base_len)                     \
+{                                                                       \
+  /*                                                                    \
+   * A leaf is 'empty' if it represents a leaf from the covering PLY    \
+   * i.e. if the prefix length of the leaf is less than or equal to     \
+   * the prefix length of the PLY                                       \
+   */                                                                   \
+  p->n_non_empty_leafs = (prefix_len > ply_base_len ?                   \
+			  ARRAY_LEN (p->leaves) : 0);                   \
+  memset (p->dst_address_bits_of_leaves, prefix_len,                    \
+	  sizeof (p->dst_address_bits_of_leaves));                      \
+  p->dst_address_bits_base = ply_base_len;                              \
+                                                                        \
+  /* Initialize leaves. */                                              \
+  PLY_INIT_LEAVES(p);                                                   \
+}
+
+static void
+ply_8_init (ip4_fib_mtrie_8_ply_t * p,
+	    ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
+{
+  PLY_INIT (p, init, prefix_len, ply_base_len);
+}
+
+static void
+ply_16_init (ip4_fib_mtrie_16_ply_t * p,
+	     ip4_fib_mtrie_leaf_t init, uword prefix_len)
+{
+  memset (p->dst_address_bits_of_leaves, prefix_len,
+	  sizeof (p->dst_address_bits_of_leaves));
+  PLY_INIT_LEAVES (p);
 }
 
 static ip4_fib_mtrie_leaf_t
@@ -146,49 +175,43 @@ ply_create (ip4_fib_mtrie_t * m,
 	    ip4_fib_mtrie_leaf_t init_leaf,
 	    u32 leaf_prefix_len, u32 ply_base_len)
 {
-  ip4_fib_mtrie_ply_t *p;
+  ip4_fib_mtrie_8_ply_t *p;
 
   /* Get cache aligned ply. */
-  pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
+  pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
 
-  ply_init (p, init_leaf, leaf_prefix_len, ply_base_len);
-  return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
+  ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
+  return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
 }
 
-always_inline ip4_fib_mtrie_ply_t *
+always_inline ip4_fib_mtrie_8_ply_t *
 get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
 {
   uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
-  /* It better not be the root ply. */
-  ASSERT (n != 0);
-  return pool_elt_at_index (m->ply_pool, n);
+
+  return pool_elt_at_index (ip4_ply_pool, n);
 }
 
-static void
-ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+void
+ip4_mtrie_free (ip4_fib_mtrie_t * m)
 {
-  uword i, is_root;
-
-  is_root = p - m->ply_pool == 0;
-
-  for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+  /* the root ply is embedded so the is nothing to do,
+   * the assumption being that the IP4 FIB table has emptied the trie
+   * before deletion.
+   */
+#if CLIB_DEBUG > 0
+  int i;
+  for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
     {
-      ip4_fib_mtrie_leaf_t l = p->leaves[i];
-      if (ip4_fib_mtrie_leaf_is_next_ply (l))
-	ply_free (m, get_next_ply_for_leaf (m, l));
+      ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
     }
-
-  if (is_root)
-    ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0);
-  else
-    pool_put (m->ply_pool, p);
+#endif
 }
 
 void
-ip4_fib_free (ip4_fib_mtrie_t * m)
+ip4_mtrie_init (ip4_fib_mtrie_t * m)
 {
-  ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0);
-  ply_free (m, root_ply);
+  ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
 }
 
 typedef struct
@@ -202,7 +225,7 @@ typedef struct
 
 static void
 set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
-				 ip4_fib_mtrie_ply_t * ply,
+				 ip4_fib_mtrie_8_ply_t * ply,
 				 ip4_fib_mtrie_leaf_t new_leaf,
 				 uword new_leaf_dst_address_bits)
 {
@@ -218,7 +241,8 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
       /* Recurse into sub plies. */
       if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
 	{
-	  ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf);
+	  ip4_fib_mtrie_8_ply_t *sub_ply =
+	    get_next_ply_for_leaf (m, old_leaf);
 	  set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
 					   new_leaf_dst_address_bits);
 	}
@@ -237,16 +261,20 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
 
 static void
 set_leaf (ip4_fib_mtrie_t * m,
-	  ip4_fib_mtrie_set_unset_leaf_args_t * a,
+	  const ip4_fib_mtrie_set_unset_leaf_args_t * a,
 	  u32 old_ply_index, u32 dst_address_byte_index)
 {
   ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
   i32 n_dst_bits_next_plies;
   u8 dst_byte;
+  ip4_fib_mtrie_8_ply_t *old_ply;
+
+  old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
 
   ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
 
+  /* how many bits of the destination address are in the next PLY */
   n_dst_bits_next_plies =
     a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
 
@@ -255,30 +283,36 @@ set_leaf (ip4_fib_mtrie_t * m,
   /* Number of bits next plies <= 0 => insert leaves this ply. */
   if (n_dst_bits_next_plies <= 0)
     {
+      /* The mask length of the address to insert maps to this ply */
       uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
 
+      /* The number of bits, and hence slots/buckets, we will fill */
       n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
       ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
 	       pow2_mask (n_dst_bits_this_ply)) == 0);
 
+      /* Starting at the value of the byte at this section of the v4 address
+       * fill the buckets/slots of the ply */
       for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
 	{
-	  ip4_fib_mtrie_ply_t *old_ply, *new_ply;
-
-	  old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+	  ip4_fib_mtrie_8_ply_t *new_ply;
 
 	  old_leaf = old_ply->leaves[i];
 	  old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
 
-	  /* Is leaf to be inserted more specific? */
 	  if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
 	    {
+	      /* The new leaf is more or equally specific than the one currently
+	       * occupying the slot */
 	      new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
 
 	      if (old_leaf_is_terminal)
 		{
+		  /* The current leaf is terminal, we can replace it with
+		   * the new one */
 		  old_ply->n_non_empty_leafs -=
 		    ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
 		  old_ply->dst_address_bits_of_leaves[i] =
 		    a->dst_address_length;
 		  __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
@@ -292,32 +326,42 @@ set_leaf (ip4_fib_mtrie_t * m,
 		}
 	      else
 		{
-		  /* Existing leaf points to another ply.  We need to place new_leaf into all
-		     more specific slots. */
+		  /* Existing leaf points to another ply.  We need to place
+		   * new_leaf into all more specific slots. */
 		  new_ply = get_next_ply_for_leaf (m, old_leaf);
 		  set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
 						   a->dst_address_length);
 		}
 	    }
-
 	  else if (!old_leaf_is_terminal)
 	    {
+	      /* The current leaf is less specific and not termial (i.e. a ply),
+	       * recurse on down the trie */
 	      new_ply = get_next_ply_for_leaf (m, old_leaf);
-	      set_leaf (m, a, new_ply - m->ply_pool,
+	      set_leaf (m, a, new_ply - ip4_ply_pool,
 			dst_address_byte_index + 1);
 	    }
+	  /*
+	   * else
+	   *  the route we are adding is less specific than the leaf currently
+	   *  occupying this slot. leave it there
+	   */
 	}
     }
   else
     {
-      ip4_fib_mtrie_ply_t *old_ply, *new_ply;
+      /* The address to insert requires us to move down at a lower level of
+       * the trie - recurse on down */
+      ip4_fib_mtrie_8_ply_t *new_ply;
       u8 ply_base_len;
 
       ply_base_len = 8 * (dst_address_byte_index + 1);
-      old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+
       old_leaf = old_ply->leaves[dst_byte];
+
       if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
 	{
+	  /* There is a leaf occupying the slot. Replace it with a new ply */
 	  old_ply->n_non_empty_leafs -=
 	    ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
 
@@ -328,28 +372,143 @@ set_leaf (ip4_fib_mtrie_t * m,
 	  new_ply = get_next_ply_for_leaf (m, new_leaf);
 
 	  /* Refetch since ply_create may move pool. */
-	  old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+	  old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
 
 	  __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
 				       new_leaf);
 	  ASSERT (old_ply->leaves[dst_byte] == new_leaf);
 	  old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
 
-	  /* Account for the ply we just created. */
-	  old_ply->n_non_empty_leafs += 1;
+	  old_ply->n_non_empty_leafs +=
+	    ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
 	  ASSERT (old_ply->n_non_empty_leafs >= 0);
 	}
       else
 	new_ply = get_next_ply_for_leaf (m, old_leaf);
 
-      set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
+      set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
+    }
+}
+
+static void
+set_root_leaf (ip4_fib_mtrie_t * m,
+	       const ip4_fib_mtrie_set_unset_leaf_args_t * a)
+{
+  ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+  ip4_fib_mtrie_16_ply_t *old_ply;
+  i32 n_dst_bits_next_plies;
+  u16 dst_byte;
+
+  old_ply = &m->root_ply;
+
+  ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
+
+  /* how many bits of the destination address are in the next PLY */
+  n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
+
+  dst_byte = a->dst_address.as_u16[0];
+
+  /* Number of bits next plies <= 0 => insert leaves this ply. */
+  if (n_dst_bits_next_plies <= 0)
+    {
+      /* The mask length of the address to insert maps to this ply */
+      uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
+
+      /* The number of bits, and hence slots/buckets, we will fill */
+      n_dst_bits_this_ply = 16 - a->dst_address_length;
+      ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
+	       pow2_mask (n_dst_bits_this_ply)) == 0);
+
+      /* Starting at the value of the byte at this section of the v4 address
+       * fill the buckets/slots of the ply */
+      for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
+	{
+	  ip4_fib_mtrie_8_ply_t *new_ply;
+	  u16 slot;
+
+	  slot = clib_net_to_host_u16 (dst_byte);
+	  slot += i;
+	  slot = clib_host_to_net_u16 (slot);
+
+	  old_leaf = old_ply->leaves[slot];
+	  old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+	  if (a->dst_address_length >=
+	      old_ply->dst_address_bits_of_leaves[slot])
+	    {
+	      /* The new leaf is more or equally specific than the one currently
+	       * occupying the slot */
+	      new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+	      if (old_leaf_is_terminal)
+		{
+		  /* The current leaf is terminal, we can replace it with
+		   * the new one */
+		  old_ply->dst_address_bits_of_leaves[slot] =
+		    a->dst_address_length;
+		  __sync_val_compare_and_swap (&old_ply->leaves[slot],
+					       old_leaf, new_leaf);
+		  ASSERT (old_ply->leaves[slot] == new_leaf);
+		}
+	      else
+		{
+		  /* Existing leaf points to another ply.  We need to place
+		   * new_leaf into all more specific slots. */
+		  new_ply = get_next_ply_for_leaf (m, old_leaf);
+		  set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+						   a->dst_address_length);
+		}
+	    }
+	  else if (!old_leaf_is_terminal)
+	    {
+	      /* The current leaf is less specific and not termial (i.e. a ply),
+	       * recurse on down the trie */
+	      new_ply = get_next_ply_for_leaf (m, old_leaf);
+	      set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+	    }
+	  /*
+	   * else
+	   *  the route we are adding is less specific than the leaf currently
+	   *  occupying this slot. leave it there
+	   */
+	}
+    }
+  else
+    {
+      /* The address to insert requires us to move down at a lower level of
+       * the trie - recurse on down */
+      ip4_fib_mtrie_8_ply_t *new_ply;
+      u8 ply_base_len;
+
+      ply_base_len = 16;
+
+      old_leaf = old_ply->leaves[dst_byte];
+
+      if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+	{
+	  /* There is a leaf occupying the slot. Replace it with a new ply */
+	  new_leaf = ply_create (m, old_leaf,
+				 clib_max (old_ply->dst_address_bits_of_leaves
+					   [dst_byte], ply_base_len),
+				 ply_base_len);
+	  new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+	  __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
+				       new_leaf);
+	  ASSERT (old_ply->leaves[dst_byte] == new_leaf);
+	  old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
+	}
+      else
+	new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+      set_leaf (m, a, new_ply - ip4_ply_pool, 2);
     }
 }
 
 static uword
 unset_leaf (ip4_fib_mtrie_t * m,
-	    ip4_fib_mtrie_set_unset_leaf_args_t * a,
-	    ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index)
+	    const ip4_fib_mtrie_set_unset_leaf_args_t * a,
+	    ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
 {
   ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
   i32 n_dst_bits_next_plies;
@@ -397,7 +556,7 @@ unset_leaf (ip4_fib_mtrie_t * m,
 	  ASSERT (old_ply->n_non_empty_leafs >= 0);
 	  if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
 	    {
-	      pool_put (m->ply_pool, old_ply);
+	      pool_put (ip4_ply_pool, old_ply);
 	      /* Old ply was deleted. */
 	      return 1;
 	    }
@@ -419,106 +578,120 @@ unset_leaf (ip4_fib_mtrie_t * m,
   return 0;
 }
 
-void
-ip4_mtrie_init (ip4_fib_mtrie_t * m)
+static void
+unset_root_leaf (ip4_fib_mtrie_t * m,
+		 const ip4_fib_mtrie_set_unset_leaf_args_t * a)
 {
-  ip4_fib_mtrie_leaf_t root;
-  memset (m, 0, sizeof (m[0]));
-  root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0);
-  ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
-}
+  ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+  i32 n_dst_bits_next_plies;
+  i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
+  u16 dst_byte;
+  ip4_fib_mtrie_16_ply_t *old_ply;
 
-void
-ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
-			     ip4_address_t dst_address,
-			     u32 dst_address_length,
-			     u32 adj_index, u32 is_del)
-{
-  ip4_fib_mtrie_t *m = &fib->mtrie;
-  ip4_fib_mtrie_ply_t *root_ply;
-  ip4_fib_mtrie_set_unset_leaf_args_t a;
-  ip4_main_t *im = &ip4_main;
+  ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
 
-  ASSERT (m->ply_pool != 0);
+  old_ply = &m->root_ply;
+  n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
 
-  root_ply = pool_elt_at_index (m->ply_pool, 0);
+  dst_byte = a->dst_address.as_u16[0];
 
-  /* Honor dst_address_length. Fib masks are in network byte order */
-  dst_address.as_u32 &= im->fib_masks[dst_address_length];
-  a.dst_address = dst_address;
-  a.dst_address_length = dst_address_length;
-  a.adj_index = adj_index;
+  n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
+			 (16 - a->dst_address_length) : 0);
 
-  if (!is_del)
-    {
-      set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
-    }
-  else
+  del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+  /* Starting at the value of the byte at this section of the v4 address
+   * fill the buckets/slots of the ply */
+  for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
     {
-      ip4_main_t *im = &ip4_main;
+      u16 slot;
+
+      slot = clib_net_to_host_u16 (dst_byte);
+      slot += i;
+      slot = clib_host_to_net_u16 (slot);
 
-      if (dst_address_length)
+      old_leaf = old_ply->leaves[slot];
+      old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+      if (old_leaf == del_leaf
+	  || (!old_leaf_is_terminal
+	      && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
 	{
-	  word i;
+	  old_ply->leaves[slot] =
+	    ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
+	  old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
+	}
+    }
+}
 
-	  /* If the ply was not deleted, then we need to fill the
-	   * bucket just reset will the leaf from the less specfic
-	   * cover.
-	   * Find next less specific route and insert into mtrie. */
-	  for (i = dst_address_length - 1; i >= 0; i--)
-	    {
-	      uword *p;
-	      index_t lbi;
-	      ip4_address_t key;
+void
+ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+			 const ip4_address_t * dst_address,
+			 u32 dst_address_length, u32 adj_index)
+{
+  ip4_fib_mtrie_set_unset_leaf_args_t a;
+  ip4_main_t *im = &ip4_main;
 
-	      if (!fib->fib_entry_by_dst_address[i])
-		continue;
+  /* Honor dst_address_length. Fib masks are in network byte order */
+  a.dst_address.as_u32 = (dst_address->as_u32 &
+			  im->fib_masks[dst_address_length]);
+  a.dst_address_length = dst_address_length;
+  a.adj_index = adj_index;
 
-	      key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
-	      p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32);
-	      if (p)
-		{
-		  lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index;
-		  if (INDEX_INVALID == lbi)
-		    continue;
+  set_root_leaf (m, &a);
+}
 
-		  a.cover_adj_index = lbi;
-		  a.cover_address_length = i;
+void
+ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+			 const ip4_address_t * dst_address,
+			 u32 dst_address_length,
+			 u32 adj_index,
+			 u32 cover_address_length, u32 cover_adj_index)
+{
+  ip4_fib_mtrie_set_unset_leaf_args_t a;
+  ip4_main_t *im = &ip4_main;
 
-		  break;
-		}
-	    }
-	}
-      else
-	{
-	  a.cover_adj_index = 0;
-	  a.cover_address_length = 0;
-	}
+  /* Honor dst_address_length. Fib masks are in network byte order */
+  a.dst_address.as_u32 = (dst_address->as_u32 &
+			  im->fib_masks[dst_address_length]);
+  a.dst_address_length = dst_address_length;
+  a.adj_index = adj_index;
+  a.cover_adj_index = cover_adj_index;
+  a.cover_address_length = cover_address_length;
 
-      /* the top level ply is never removed, so we can ignore the return code */
-      unset_leaf (m, &a, root_ply, 0);
-    }
+  /* the top level ply is never removed */
+  unset_root_leaf (m, &a);
 }
 
 /* Returns number of bytes of memory used by mtrie. */
 static uword
-mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
 {
   uword bytes, i;
 
-  if (!p)
-    {
-      if (pool_is_free_index (m->ply_pool, 0))
-	return 0;
-      p = pool_elt_at_index (m->ply_pool, 0);
-    }
-
   bytes = sizeof (p[0]);
   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
     {
       ip4_fib_mtrie_leaf_t l = p->leaves[i];
       if (ip4_fib_mtrie_leaf_is_next_ply (l))
-	bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l));
+	bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+    }
+
+  return bytes;
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword
+mtrie_memory_usage (ip4_fib_mtrie_t * m)
+{
+  uword bytes, i;
+
+  bytes = sizeof (*m);
+  for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
+    {
+      ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
+      if (ip4_fib_mtrie_leaf_is_next_ply (l))
+	bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
     }
 
   return bytes;
@@ -536,47 +709,49 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
   return s;
 }
 
+#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent)     \
+({                                                                      \
+  u32 a, ia_length;                                                     \
+  ip4_address_t ia;                                                     \
+  ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)];                            \
+                                                                        \
+  a = (_base_address) + ((_i) << (32 - (_ply_max_len)));                \
+  ia.as_u32 = clib_host_to_net_u32 (a);                                 \
+  ia_length = (_p)->dst_address_bits_of_leaves[(_i)];                   \
+  s = format (s, "\n%U%20U %U",                                         \
+              format_white_space, (_indent) + 2,                        \
+              format_ip4_address_and_length, &ia, ia_length,            \
+              format_ip4_fib_mtrie_leaf, _l);                           \
+                                                                        \
+  if (ip4_fib_mtrie_leaf_is_next_ply (_l))                              \
+    s = format (s, "\n%U%U",                                            \
+                format_white_space, (_indent) + 2,                      \
+                format_ip4_fib_mtrie_ply, m, a,                         \
+                ip4_fib_mtrie_leaf_get_next_ply_index (_l));            \
+  s;                                                                    \
+})
+
 static u8 *
 format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
 {
   ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
   u32 base_address = va_arg (*va, u32);
   u32 ply_index = va_arg (*va, u32);
-  u32 dst_address_byte_index = va_arg (*va, u32);
-  ip4_fib_mtrie_ply_t *p;
-  uword i, indent;
+  ip4_fib_mtrie_8_ply_t *p;
+  uword indent;
+  int i;
 
-  p = pool_elt_at_index (m->ply_pool, ply_index);
+  p = pool_elt_at_index (ip4_ply_pool, ply_index);
   indent = format_get_indent (s);
-  s =
-    format (s, "ply index %d, %d non-empty leaves", ply_index,
-	    p->n_non_empty_leafs);
+  s = format (s, "ply index %d, %d non-empty leaves", ply_index,
+	      p->n_non_empty_leafs);
+
   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
     {
-      ip4_fib_mtrie_leaf_t l = p->leaves[i];
-
       if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
 	{
-	  u32 a, ia_length;
-	  ip4_address_t ia;
-
-	  a = base_address + (i << (24 - 8 * dst_address_byte_index));
-	  ia.as_u32 = clib_host_to_net_u32 (a);
-	  if (ip4_fib_mtrie_leaf_is_terminal (l))
-	    ia_length = p->dst_address_bits_of_leaves[i];
-	  else
-	    ia_length = 8 * (1 + dst_address_byte_index);
-	  s = format (s, "\n%U%20U %U",
-		      format_white_space, indent + 2,
-		      format_ip4_address_and_length, &ia, ia_length,
-		      format_ip4_fib_mtrie_leaf, l);
-
-	  if (ip4_fib_mtrie_leaf_is_next_ply (l))
-	    s = format (s, "\n%U%U",
-			format_white_space, indent + 2,
-			format_ip4_fib_mtrie_ply, m, a,
-			ip4_fib_mtrie_leaf_get_next_ply_index (l),
-			dst_address_byte_index + 1);
+	  FORMAT_PLY (s, p, i, base_address,
+		      p->dst_address_bits_base + 8, indent);
 	}
     }
 
@@ -587,22 +762,44 @@ u8 *
 format_ip4_fib_mtrie (u8 * s, va_list * va)
 {
   ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+  ip4_fib_mtrie_16_ply_t *p;
+  u32 base_address = 0;
+  int i;
 
-  s = format (s, "%d plies, memory usage %U",
-	      pool_elts (m->ply_pool),
-	      format_memory_size, mtrie_memory_usage (m, 0));
+  s = format (s, "%d plies, memory usage %U\n",
+	      pool_elts (ip4_ply_pool),
+	      format_memory_size, mtrie_memory_usage (m));
+  s = format (s, "root-ply");
+  p = &m->root_ply;
 
-  if (pool_elts (m->ply_pool) > 0)
+  for (i = 0; i < ARRAY_LEN (p->leaves); i++)
     {
-      ip4_address_t base_address;
-      base_address.as_u32 = 0;
-      s =
-	format (s, "\n  %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0);
+      u16 slot;
+
+      slot = clib_host_to_net_u16 (i);
+
+      if (p->dst_address_bits_of_leaves[slot] > 0)
+	{
+	  FORMAT_PLY (s, p, slot, base_address, 16, 2);
+	}
     }
 
   return s;
 }
 
+static clib_error_t *
+ip4_mtrie_module_init (vlib_main_t * vm)
+{
+  /* Burn one ply so index 0 is taken */
+  CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
+
+  pool_get (ip4_ply_pool, p);
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index 128195d3..be262c2c 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -47,16 +47,43 @@
 
 /* ip4 fib leafs: 4 ply 8-8-8-8 mtrie.
    1 + 2*adj_index for terminal leaves.
-   0 + 2*next_ply_index for non-terminals.
+   0 + 2*next_ply_index for non-terminals, i.e. PLYs
    1 => empty (adjacency index of zero is special miss adjacency). */
 typedef u32 ip4_fib_mtrie_leaf_t;
 
 #define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
 
+/**
+ * @brief the 16 way stride that is the top PLY of the mtrie
+ * We do not maintain the count of 'real' leaves in this PLY, since
+ * it is never removed. The FIB will destroy the mtrie and the ply once
+ * the FIB is destroyed.
+ */
+#define PLY_16_SIZE (1<<16)
+typedef struct ip4_fib_mtrie_16_ply_t_
+{
+  /**
+   * The leaves/slots/buckets to be filed with leafs
+   */
+  union
+  {
+    ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE];
+
+#ifdef CLIB_HAVE_VEC128
+    u32x4 leaves_as_u32x4[PLY_16_SIZE / 4];
+#endif
+  };
+
+  /**
+   * Prefix length for terminal leaves.
+   */
+  u8 dst_address_bits_of_leaves[PLY_16_SIZE];
+} ip4_fib_mtrie_16_ply_t;
+
 /**
  * @brief One ply of the 4 ply mtrie fib.
  */
-typedef struct
+typedef struct ip4_fib_mtrie_8_ply_t_
 {
   /**
    * The leaves/slots/buckets to be filed with leafs
@@ -90,34 +117,72 @@ typedef struct
   /* Pad to cache line boundary. */
   u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
 }
-ip4_fib_mtrie_ply_t;
+ip4_fib_mtrie_8_ply_t;
 
-STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES,
+STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
 	       "IP4 Mtrie ply cache line");
 
+/**
+ * @brief The mutiway-TRIE.
+ * There is no data associated with the mtrie apart from the top PLY
+ */
 typedef struct
 {
-  /* Pool of plies.  Index zero is root ply. */
-  ip4_fib_mtrie_ply_t *ply_pool;
+  /**
+   * Embed the PLY with the mtrie struct. This means that the Data-plane
+   * 'get me the mtrie' returns the first ply, and not an indirect 'pointer'
+   * to it. therefore no cachline misses in the data-path.
+   */
+  ip4_fib_mtrie_16_ply_t root_ply;
 } ip4_fib_mtrie_t;
 
-void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m);
+/**
+ * @brief Initialise an mtrie
+ */
+void ip4_mtrie_init (ip4_fib_mtrie_t * m);
 
-struct ip4_fib_t;
+/**
+ * @brief Free an mtrie, It must be emty when free'd
+ */
+void ip4_mtrie_free (ip4_fib_mtrie_t * m);
 
-void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f,
-				  ip4_address_t dst_address,
-				  u32 dst_address_length,
-				  u32 adj_index, u32 is_del);
+/**
+ * @brief Add a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+			      const ip4_address_t * dst_address,
+			      u32 dst_address_length, u32 adj_index);
+/**
+ * @brief remove a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+			      const ip4_address_t * dst_address,
+			      u32 dst_address_length,
+			      u32 adj_index,
+			      u32 cover_address_length, u32 cover_adj_index);
 
+/**
+ * @brief Format/display the contents of the mtrie
+ */
 format_function_t format_ip4_fib_mtrie;
 
+/**
+ * @brief A global pool of 8bit stride plys
+ */
+extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
+
+/**
+ * Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index)
+ */
 always_inline u32
 ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
 {
   return n & 1;
 }
 
+/**
+ * From the stored slot value extract the LB index value
+ */
 always_inline u32
 ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
 {
@@ -125,35 +190,38 @@ ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
   return n >> 1;
 }
 
-/* Lookup step.  Processes 1 byte of 4 byte ip4 address. */
+/**
+ * @brief Lookup step.  Processes 1 byte of 4 byte ip4 address.
+ */
 always_inline ip4_fib_mtrie_leaf_t
 ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
 			   ip4_fib_mtrie_leaf_t current_leaf,
 			   const ip4_address_t * dst_address,
 			   u32 dst_address_byte_index)
 {
-  ip4_fib_mtrie_ply_t *ply;
+  ip4_fib_mtrie_8_ply_t *ply;
+
   uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
 
   if (!current_is_terminal)
     {
-      ply = m->ply_pool + (current_leaf >> 1);
+      ply = ip4_ply_pool + (current_leaf >> 1);
       return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
     }
 
   return current_leaf;
 }
 
-/* Lookup step.  Processes 1 byte of 4 byte ip4 address. */
+/**
+ * @brief Lookup step number 1.  Processes 2 bytes of 4 byte ip4 address.
+ */
 always_inline ip4_fib_mtrie_leaf_t
 ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m,
 			       const ip4_address_t * dst_address)
 {
   ip4_fib_mtrie_leaf_t next_leaf;
-  ip4_fib_mtrie_ply_t *ply;
 
-  ply = m->ply_pool;
-  next_leaf = ply->leaves[dst_address->as_u8[0]];
+  next_leaf = m->root_ply.leaves[dst_address->as_u16[0]];
 
   return next_leaf;
 }
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
index b2c1fcd4..1ff9fbdb 100644
--- a/src/vnet/ip/ip4_packet.h
+++ b/src/vnet/ip/ip4_packet.h
@@ -52,6 +52,7 @@ typedef union
   u32 data_u32;
   /* Aliases. */
   u8 as_u8[4];
+  u16 as_u16[2];
   u32 as_u32;
 } ip4_address_t;
 
diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c
index 7c2b7be8..6831066e 100644
--- a/src/vnet/ip/ip4_source_check.c
+++ b/src/vnet/ip/ip4_source_check.c
@@ -165,11 +165,6 @@ ip4_source_check_inline (vlib_main_t * vm,
 	  leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 	  leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
 
-	  leaf0 =
-	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-	  leaf1 =
-	    ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
-
 	  leaf0 =
 	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
 	  leaf1 =
@@ -248,9 +243,6 @@ ip4_source_check_inline (vlib_main_t * vm,
 
 	  leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
-	  leaf0 =
-	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-
 	  leaf0 =
 	    ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
 
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index 8fa9a479..bf7ec7d5 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -153,6 +153,9 @@ typedef struct ip6_main_t
   /* Pool of FIBs. */
   struct fib_table_t_ *fibs;
 
+  /* Pool of V6 FIBs. */
+  ip6_fib_t *v6_fibs;
+
   /** Vector of MFIBs. */
   struct mfib_table_t_ *mfibs;
 
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index e3a1fee8..b9f1782b 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -240,6 +240,21 @@ send_ip_fib_details (vpe_api_main_t * am,
   vl_msg_api_send_shmem (q, (u8 *) & mp);
 }
 
+typedef struct vl_api_ip_fib_dump_walk_ctx_t_
+{
+  fib_node_index_t *feis;
+} vl_api_ip_fib_dump_walk_ctx_t;
+
+static int
+vl_api_ip_fib_dump_walk (fib_node_index_t fei, void *arg)
+{
+  vl_api_ip_fib_dump_walk_ctx_t *ctx = arg;
+
+  vec_add1 (ctx->feis, fei);
+
+  return (1);
+}
+
 static void
 vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
 {
@@ -247,12 +262,13 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
   unix_shared_memory_queue_t *q;
   ip4_main_t *im = &ip4_main;
   fib_table_t *fib_table;
-  fib_node_index_t lfei, *lfeip, *lfeis = NULL;
-  mpls_label_t key;
+  fib_node_index_t *lfeip;
   fib_prefix_t pfx;
   u32 fib_index;
   fib_route_path_encode_t *api_rpaths;
-  int i;
+  vl_api_ip_fib_dump_walk_ctx_t ctx = {
+    .feis = NULL,
+  };
 
   q = vl_api_client_index_to_input_queue (mp->client_index);
   if (q == 0)
@@ -261,19 +277,16 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
   /* *INDENT-OFF* */
   pool_foreach (fib_table, im->fibs,
   ({
-    for (i = 0; i < ARRAY_LEN (fib_table->v4.fib_entry_by_dst_address); i++)
-      {
-        hash_foreach(key, lfei, fib_table->v4.fib_entry_by_dst_address[i],
-        ({
-          vec_add1(lfeis, lfei);
-        }));
-      }
+    fib_table_walk(fib_table->ft_index,
+                   FIB_PROTOCOL_IP4,
+                   vl_api_ip_fib_dump_walk,
+                   &ctx);
   }));
   /* *INDENT-ON* */
 
-  vec_sort_with_function (lfeis, fib_entry_cmp_for_sort);
+  vec_sort_with_function (ctx.feis, fib_entry_cmp_for_sort);
 
-  vec_foreach (lfeip, lfeis)
+  vec_foreach (lfeip, ctx.feis)
   {
     fib_entry_get_prefix (*lfeip, &pfx);
     fib_index = fib_entry_get_fib_index (*lfeip);
@@ -286,7 +299,7 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
     vec_free (api_rpaths);
   }
 
-  vec_free (lfeis);
+  vec_free (ctx.feis);
 }
 
 static void
@@ -377,10 +390,10 @@ api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q,
 {
   vpe_api_main_t *am = &vpe_api_main;
   ip6_main_t *im6 = &ip6_main;
-  ip6_fib_t *fib = &fib_table->v6;
   fib_node_index_t *fib_entry_index;
   api_ip6_fib_show_ctx_t ctx = {
-    .fib_index = fib->index,.entries = NULL,
+    .fib_index = fib_table->ft_index,
+    .entries = NULL,
   };
   fib_route_path_encode_t *api_rpaths;
   fib_prefix_t pfx;
diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c
index f631dc76..a085aaa2 100644
--- a/src/vnet/mpls/interface.c
+++ b/src/vnet/mpls/interface.c
@@ -18,6 +18,7 @@
 #include <vnet/vnet.h>
 #include <vnet/pg/pg.h>
 #include <vnet/mpls/mpls.h>
+#include <vnet/fib/mpls_fib.h>
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/adj/adj_midchain.h>
 #include <vnet/dpo/classify_dpo.h>
diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h
index 300f2cfd..b0125e60 100644
--- a/src/vnet/mpls/mpls.h
+++ b/src/vnet/mpls/mpls.h
@@ -30,29 +30,6 @@ typedef enum {
   MPLS_N_ERROR,
 } mpls_error_t;
 
-#define MPLS_FIB_DEFAULT_TABLE_ID 0
-
-/**
- * Type exposure is to allow the DP fast/inlined access
- */
-#define MPLS_FIB_KEY_SIZE 21
-#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))
-
-typedef struct mpls_fib_t_
-{
-  /**
-   * A hash table of entries. 21 bit key
-   * Hash table for reduced memory footprint
-   */
-  uword * mf_entries;
-
-  /**
-   * The load-balance indeices keyed by 21 bit label+eos bit.
-   * A flat array for maximum lookup performace.
-   */
-  index_t mf_lbs[MPLS_FIB_DB_SIZE];
-} mpls_fib_t;
-
 /**
  * @brief Definition of a callback for receiving MPLS interface state change
  * notifications
@@ -67,6 +44,9 @@ typedef struct {
   /**  A pool of all the MPLS FIBs */
   struct fib_table_t_ *fibs;
 
+  /**  A pool of all the MPLS FIBs */
+  struct mpls_fib_t_ *mpls_fibs;
+
   /** A hash table to lookup the mpls_fib by table ID */
   uword *fib_index_by_table_id;
 
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
index a36a5046..f1aef6c9 100644
--- a/src/vnet/mpls/mpls_api.c
+++ b/src/vnet/mpls/mpls_api.c
@@ -26,6 +26,7 @@
 #include <vnet/mpls/mpls_tunnel.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/fib_api.h>
+#include <vnet/fib/mpls_fib.h>
 
 #include <vnet/vnet_msg_enum.h>
 
@@ -369,6 +370,21 @@ send_mpls_fib_details (vpe_api_main_t * am,
   vl_msg_api_send_shmem (q, (u8 *) & mp);
 }
 
+typedef struct vl_api_mpls_fib_dump_table_walk_ctx_t_
+{
+  fib_node_index_t *lfeis;
+} vl_api_mpls_fib_dump_table_walk_ctx_t;
+
+static int
+vl_api_mpls_fib_dump_table_walk (fib_node_index_t fei, void *arg)
+{
+  vl_api_mpls_fib_dump_table_walk_ctx_t *ctx = arg;
+
+  vec_add1 (ctx->lfeis, fei);
+
+  return (1);
+}
+
 static void
 vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp)
 {
@@ -376,28 +392,30 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp)
   unix_shared_memory_queue_t *q;
   mpls_main_t *mm = &mpls_main;
   fib_table_t *fib_table;
-  fib_node_index_t lfei, *lfeip, *lfeis = NULL;
-  mpls_label_t key;
+  mpls_fib_t *mpls_fib;
+  fib_node_index_t *lfeip = NULL;
   fib_prefix_t pfx;
   u32 fib_index;
   fib_route_path_encode_t *api_rpaths;
+  vl_api_mpls_fib_dump_table_walk_ctx_t ctx = {
+    .lfeis = NULL,
+  };
 
   q = vl_api_client_index_to_input_queue (mp->client_index);
   if (q == 0)
     return;
 
   /* *INDENT-OFF* */
-  pool_foreach (fib_table, mm->fibs,
+  pool_foreach (mpls_fib, mm->mpls_fibs,
   ({
-    hash_foreach(key, lfei, fib_table->mpls.mf_entries,
-    ({
-  vec_add1(lfeis, lfei);
-    }));
+    mpls_fib_table_walk (mpls_fib,
+                         vl_api_mpls_fib_dump_table_walk,
+                         &ctx);
   }));
   /* *INDENT-ON* */
-  vec_sort_with_function (lfeis, fib_entry_cmp_for_sort);
+  vec_sort_with_function (ctx.lfeis, fib_entry_cmp_for_sort);
 
-  vec_foreach (lfeip, lfeis)
+  vec_foreach (lfeip, ctx.lfeis)
   {
     fib_entry_get_prefix (*lfeip, &pfx);
     fib_index = fib_entry_get_fib_index (*lfeip);
@@ -410,7 +428,7 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp)
     vec_free (api_rpaths);
   }
 
-  vec_free (lfeis);
+  vec_free (ctx.lfeis);
 }
 
 /*
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
index 14ccd864..09ae8b8f 100644
--- a/src/vpp/api/api.c
+++ b/src/vpp/api/api.c
@@ -896,9 +896,10 @@ ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp)
   /* *INDENT-OFF* */
   pool_foreach (fib_table, im4->fibs,
   ({
-    fib = &fib_table->v4;
     vnet_sw_interface_t * si;
 
+    fib = pool_elt_at_index (im4->v4_fibs, fib_table->ft_index);
+
     if (fib->table_id != target_fib_id)
       continue;
 
@@ -964,7 +965,8 @@ ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp)
   pool_foreach (fib_table, im6->fibs,
   ({
     vnet_sw_interface_t * si;
-    fib = &(fib_table->v6);
+
+    fib = pool_elt_at_index (im6->v6_fibs, fib_table->ft_index);
 
     if (fib->table_id != target_fib_id)
       continue;
diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c
index 1927da0b..042d02e2 100644
--- a/src/vpp/stats/stats.c
+++ b/src/vpp/stats/stats.c
@@ -17,6 +17,7 @@
 #include <vlib/threads.h>
 #include <vnet/fib/fib_entry.h>
 #include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
 #include <vnet/dpo/load_balance.h>
 
 #define STATS_DEBUG 0
@@ -576,6 +577,7 @@ do_ip4_fibs (stats_main_t * sm)
   static ip4_route_t *routes;
   ip4_route_t *r;
   fib_table_t *fib;
+  ip4_fib_t *v4_fib;
   ip_lookup_main_t *lm = &im4->lookup_main;
   static uword *results;
   vl_api_vnet_ip4_fib_counters_t *mp = 0;
@@ -592,6 +594,8 @@ again:
     while ((fib - im4->fibs) < start_at_fib_index)
       continue;
 
+    v4_fib = pool_elt_at_index (im4->v4_fibs, fib->ft_index);
+
     if (mp == 0)
       {
 	items_this_message = IP4_FIB_COUNTER_BATCH_SIZE;
@@ -615,9 +619,9 @@ again:
     vec_reset_length (routes);
     vec_reset_length (results);
 
-    for (i = 0; i < ARRAY_LEN (fib->v4.fib_entry_by_dst_address); i++)
+    for (i = 0; i < ARRAY_LEN (v4_fib->fib_entry_by_dst_address); i++)
       {
-	uword *hash = fib->v4.fib_entry_by_dst_address[i];
+	uword *hash = v4_fib->fib_entry_by_dst_address[i];
 	hash_pair_t *p;
 	ip4_route_t x;
 
-- 
cgit 1.2.3-korg


From 586afd762bfa149f5ca167bd5fd5a0cd59ce94fe Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Wed, 5 Apr 2017 19:18:20 +0200
Subject: Use thread local storage for thread index

This patch deprecates stack-based thread identification,
Also removes requirement that thread stacks are adjacent.

Finally, possibly annoying for some folks, it renames
all occurences of cpu_index and cpu_number with thread
index. Using word "cpu" is misleading here as thread can
be migrated ti different CPU, and also it is not related
to linux cpu index.

Change-Id: I68cdaf661e701d2336fc953dcb9978d10a70f7c1
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/examples/srv6-sample-localsid/node.c           |   4 +-
 src/plugins/dpdk/buffer.c                          |   2 +-
 src/plugins/dpdk/device/device.c                   |   8 +-
 src/plugins/dpdk/device/dpdk_priv.h                |   8 +-
 src/plugins/dpdk/device/init.c                     |   2 +-
 src/plugins/dpdk/device/node.c                     |  32 +++---
 src/plugins/dpdk/hqos/hqos.c                       |  16 +--
 src/plugins/dpdk/ipsec/cli.c                       |   8 +-
 src/plugins/dpdk/ipsec/crypto_node.c               |   4 +-
 src/plugins/dpdk/ipsec/esp.h                       |   4 +-
 src/plugins/dpdk/ipsec/esp_decrypt.c               |   4 +-
 src/plugins/dpdk/ipsec/esp_encrypt.c               |   5 +-
 src/plugins/dpdk/ipsec/ipsec.c                     |   2 +-
 src/plugins/dpdk/ipsec/ipsec.h                     |   4 +-
 src/plugins/dpdk/main.c                            |   2 +-
 src/plugins/flowperpkt/l2_node.c                   |   2 +-
 src/plugins/flowperpkt/node.c                      |   2 +-
 src/plugins/ioam/export-common/ioam_export.h       |   6 +-
 .../ioam/ip6/ioam_cache_tunnel_select_node.c       |  16 +--
 src/plugins/ixge/ixge.c                            |   2 +-
 src/plugins/lb/lb.c                                |   8 +-
 src/plugins/lb/node.c                              |  22 ++--
 src/plugins/lb/refcount.c                          |   8 +-
 src/plugins/lb/refcount.h                          |   4 +-
 src/plugins/memif/node.c                           |  35 +++---
 src/plugins/snat/in2out.c                          | 110 +++++++++---------
 src/plugins/snat/out2in.c                          | 102 ++++++++---------
 src/plugins/snat/snat.h                            |  10 +-
 src/vlib/buffer.c                                  |   6 +-
 src/vlib/buffer_funcs.h                            |   4 +-
 src/vlib/cli.c                                     |   6 +-
 src/vlib/counter.h                                 |  16 +--
 src/vlib/error.c                                   |   2 +-
 src/vlib/global_funcs.h                            |   2 +-
 src/vlib/main.c                                    |  14 +--
 src/vlib/main.h                                    |   2 +-
 src/vlib/node.c                                    |   2 +-
 src/vlib/node.h                                    |   6 +-
 src/vlib/node_funcs.h                              |   8 +-
 src/vlib/threads.c                                 |  69 ++++-------
 src/vlib/threads.h                                 |  21 ++--
 src/vlib/unix/cj.c                                 |   7 +-
 src/vlib/unix/cj.h                                 |   2 +-
 src/vlib/unix/main.c                               |  43 +++----
 src/vnet/adj/adj_l2.c                              |   4 +-
 src/vnet/adj/adj_midchain.c                        |   8 +-
 src/vnet/adj/adj_nsh.c                             |   4 +-
 src/vnet/classify/vnet_classify.c                  |  16 +--
 src/vnet/cop/ip4_whitelist.c                       |   8 +-
 src/vnet/cop/ip6_whitelist.c                       |   8 +-
 src/vnet/devices/af_packet/node.c                  |  20 ++--
 src/vnet/devices/devices.c                         |  61 +++++-----
 src/vnet/devices/devices.h                         |  18 +--
 src/vnet/devices/netmap/node.c                     |  24 ++--
 src/vnet/devices/ssvm/node.c                       |   6 +-
 src/vnet/devices/virtio/vhost-user.c               | 127 +++++++++++----------
 src/vnet/dpo/lookup_dpo.c                          |  20 ++--
 src/vnet/dpo/replicate_dpo.c                       |  12 +-
 src/vnet/ethernet/arp.c                            |   2 +-
 src/vnet/ethernet/interface.c                      |   7 +-
 src/vnet/ethernet/node.c                           |  14 +--
 src/vnet/gre/node.c                                |   8 +-
 src/vnet/interface.h                               |   2 +-
 src/vnet/interface_output.c                        |  53 ++++-----
 src/vnet/ip/ip4_forward.c                          |  34 +++---
 src/vnet/ip/ip4_input.c                            |   8 +-
 src/vnet/ip/ip6_forward.c                          |  24 ++--
 src/vnet/ip/ip6_input.c                            |   8 +-
 src/vnet/ip/ip6_neighbor.c                         |   4 +-
 src/vnet/ipsec/esp.h                               |   8 +-
 src/vnet/ipsec/esp_decrypt.c                       |  13 ++-
 src/vnet/ipsec/esp_encrypt.c                       |  13 ++-
 src/vnet/ipsec/ikev2.c                             |  64 ++++++-----
 src/vnet/ipsec/ipsec.h                             |  12 +-
 src/vnet/ipsec/ipsec_if.c                          |   2 +-
 src/vnet/l2/l2_bvi.h                               |   2 +-
 src/vnet/l2/l2_input.c                             |  14 +--
 src/vnet/l2/l2_output.c                            |   6 +-
 src/vnet/l2tp/decap.c                              |   2 +-
 src/vnet/l2tp/encap.c                              |   2 +-
 src/vnet/l2tp/l2tp.c                               |   6 +-
 src/vnet/lisp-gpe/decap.c                          |  16 +--
 src/vnet/lldp/lldp_input.c                         |   2 +-
 src/vnet/map/ip4_map.c                             |  14 +--
 src/vnet/map/ip4_map_t.c                           |  12 +-
 src/vnet/map/ip6_map.c                             |  19 +--
 src/vnet/map/ip6_map_t.c                           |  12 +-
 src/vnet/mpls/mpls_input.c                         |   8 +-
 src/vnet/mpls/mpls_lookup.c                        |  20 ++--
 src/vnet/mpls/mpls_output.c                        |  10 +-
 src/vnet/pg/input.c                                |   4 +-
 src/vnet/replication.c                             |  20 ++--
 src/vnet/replication.h                             |   2 +-
 src/vnet/session/node.c                            |   2 +-
 src/vnet/sr/sr_localsid.c                          |  44 +++----
 src/vnet/tcp/builtin_client.c                      |   2 +-
 src/vnet/tcp/tcp.c                                 |   8 +-
 src/vnet/tcp/tcp_debug.h                           |   2 +-
 src/vnet/tcp/tcp_input.c                           |  10 +-
 src/vnet/tcp/tcp_output.c                          |  20 ++--
 src/vnet/udp/udp_input.c                           |   2 +-
 src/vnet/unix/tapcli.c                             |   2 +-
 src/vnet/unix/tuntap.c                             |   4 +-
 src/vnet/vxlan-gpe/decap.c                         |  10 +-
 src/vnet/vxlan-gpe/encap.c                         |  12 +-
 src/vnet/vxlan/decap.c                             |  10 +-
 src/vnet/vxlan/encap.c                             |  12 +-
 src/vpp/stats/stats.c                              |  14 +--
 src/vpp/stats/stats.h                              |   2 +-
 109 files changed, 790 insertions(+), 791 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/examples/srv6-sample-localsid/node.c b/src/examples/srv6-sample-localsid/node.c
index 7bae9cd7..e83e2352 100644
--- a/src/examples/srv6-sample-localsid/node.c
+++ b/src/examples/srv6-sample-localsid/node.c
@@ -114,7 +114,7 @@ srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_fram
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
   {
@@ -168,7 +168,7 @@ srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_fram
       /* This increments the SRv6 per LocalSID counters.*/
       vlib_increment_combined_counter
         (((next0 == SRV6_SAMPLE_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : &(sm->sr_ls_valid_counters)),
-        cpu_index,
+        thread_index,
         ls0 - sm->localsids,
         1, vlib_buffer_length_in_chain (vm, b0));
 
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index 2765c292..c80b3fa8 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -132,7 +132,7 @@ dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
   u32 merge_index;
   int i;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   f = vlib_buffer_get_free_list (vm, free_list_index);
 
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
index 50b26689..91661246 100644
--- a/src/plugins/dpdk/device/device.c
+++ b/src/plugins/dpdk/device/device.c
@@ -243,7 +243,7 @@ static_always_inline
   ASSERT (ring->tx_tail == 0);
 
   n_retry = 16;
-  queue_id = vm->cpu_index;
+  queue_id = vm->thread_index;
 
   do
     {
@@ -266,7 +266,7 @@ static_always_inline
 	{
 	  /* no wrap, transmit in one burst */
 	  dpdk_device_hqos_per_worker_thread_t *hqos =
-	    &xd->hqos_wt[vm->cpu_index];
+	    &xd->hqos_wt[vm->thread_index];
 
 	  ASSERT (hqos->swq != NULL);
 
@@ -332,7 +332,7 @@ dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node,
 		     vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp)
 {
   dpdk_main_t *dm = &dpdk_main;
-  u32 my_cpu = vm->cpu_index;
+  u32 my_cpu = vm->thread_index;
   struct rte_mbuf *mb_new;
 
   if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0)
@@ -376,7 +376,7 @@ dpdk_interface_tx (vlib_main_t * vm,
   tx_ring_hdr_t *ring;
   u32 n_on_ring;
 
-  my_cpu = vm->cpu_index;
+  my_cpu = vm->thread_index;
 
   queue_id = my_cpu;
 
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
index dd40ff48..52b4ca4b 100644
--- a/src/plugins/dpdk/device/dpdk_priv.h
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -79,7 +79,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
 {
   vlib_simple_counter_main_t *cm;
   vnet_main_t *vnm = vnet_get_main ();
-  u32 my_cpu = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u64 rxerrors, last_rxerrors;
 
   /* only update counters for PMD interfaces */
@@ -96,7 +96,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_RX_NO_BUF);
 
-      vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
 				     xd->stats.rx_nombuf -
 				     xd->last_stats.rx_nombuf);
     }
@@ -107,7 +107,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_RX_MISS);
 
-      vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
 				     xd->stats.imissed -
 				     xd->last_stats.imissed);
     }
@@ -119,7 +119,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_RX_ERROR);
 
-      vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
 				     rxerrors - last_rxerrors);
     }
 
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index 538db6cb..7eaf8da7 100755
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -324,7 +324,7 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
   int rv;
   int j;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
     {
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index e740fd18..b10e0fad 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -283,7 +283,7 @@ dpdk_buffer_init_from_template (void *d0, void *d1, void *d2, void *d3,
  */
 static_always_inline u32
 dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
-		   vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id,
+		   vlib_node_runtime_t * node, u32 thread_index, u16 queue_id,
 		   int maybe_multiseg)
 {
   u32 n_buffers;
@@ -294,7 +294,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
   uword n_rx_bytes = 0;
   u32 n_trace, trace_cnt __attribute__ ((unused));
   vlib_buffer_free_list_t *fl;
-  vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, cpu_index);
+  vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, thread_index);
 
   if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
     return 0;
@@ -306,7 +306,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
       return 0;
     }
 
-  vec_reset_length (xd->d_trace_buffers[cpu_index]);
+  vec_reset_length (xd->d_trace_buffers[thread_index]);
   trace_cnt = n_trace = vlib_get_trace_count (vm, node);
 
   if (n_trace > 0)
@@ -318,7 +318,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
 	{
 	  struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++];
 	  vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
-	  vec_add1 (xd->d_trace_buffers[cpu_index],
+	  vec_add1 (xd->d_trace_buffers[thread_index],
 		    vlib_get_buffer_index (vm, b));
 	}
     }
@@ -546,20 +546,22 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
 
-  if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0))
+  if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[thread_index]) > 0))
     {
-      dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index],
-		     vec_len (xd->d_trace_buffers[cpu_index]));
-      vlib_set_trace_count (vm, node, n_trace -
-			    vec_len (xd->d_trace_buffers[cpu_index]));
+      dpdk_rx_trace (dm, node, xd, queue_id,
+		     xd->d_trace_buffers[thread_index],
+		     vec_len (xd->d_trace_buffers[thread_index]));
+      vlib_set_trace_count (vm, node,
+			    n_trace -
+			    vec_len (xd->d_trace_buffers[thread_index]));
     }
 
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
+     thread_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, mb_index);
+  vnet_device_increment_rx_packets (thread_index, mb_index);
 
   return mb_index;
 }
@@ -630,19 +632,19 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
   dpdk_device_t *xd;
   uword n_rx_packets = 0;
   dpdk_device_and_queue_t *dq;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   /*
    * Poll all devices on this cpu for input/interrupts.
    */
   /* *INDENT-OFF* */
-  vec_foreach (dq, dm->devices_by_cpu[cpu_index])
+  vec_foreach (dq, dm->devices_by_cpu[thread_index])
     {
       xd = vec_elt_at_index(dm->devices, dq->device);
       if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
-        n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 1);
+        n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 1);
       else
-        n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 0);
+        n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 0);
     }
   /* *INDENT-ON* */
 
diff --git a/src/plugins/dpdk/hqos/hqos.c b/src/plugins/dpdk/hqos/hqos.c
index a288fca7..8b251beb 100644
--- a/src/plugins/dpdk/hqos/hqos.c
+++ b/src/plugins/dpdk/hqos/hqos.c
@@ -397,7 +397,7 @@ static_always_inline void
 dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm)
 {
   dpdk_main_t *dm = &dpdk_main;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   u32 dev_pos;
 
   dev_pos = 0;
@@ -405,12 +405,12 @@ dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm)
     {
       vlib_worker_thread_barrier_check ();
 
-      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]);
+      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]);
       if (dev_pos >= n_devs)
 	dev_pos = 0;
 
       dpdk_device_and_queue_t *dq =
-	vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos);
+	vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos);
       dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
 
       dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
@@ -479,7 +479,7 @@ static_always_inline void
 dpdk_hqos_thread_internal (vlib_main_t * vm)
 {
   dpdk_main_t *dm = &dpdk_main;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   u32 dev_pos;
 
   dev_pos = 0;
@@ -487,7 +487,7 @@ dpdk_hqos_thread_internal (vlib_main_t * vm)
     {
       vlib_worker_thread_barrier_check ();
 
-      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]);
+      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]);
       if (PREDICT_FALSE (n_devs == 0))
 	{
 	  dev_pos = 0;
@@ -497,7 +497,7 @@ dpdk_hqos_thread_internal (vlib_main_t * vm)
 	dev_pos = 0;
 
       dpdk_device_and_queue_t *dq =
-	vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos);
+	vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos);
       dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
 
       dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
@@ -586,7 +586,7 @@ dpdk_hqos_thread (vlib_worker_thread_t * w)
 
   vm = vlib_get_main ();
 
-  ASSERT (vm->cpu_index == os_get_cpu_number ());
+  ASSERT (vm->thread_index == vlib_get_thread_index ());
 
   clib_time_init (&vm->clib_time);
   clib_mem_set_heap (w->thread_mheap);
@@ -595,7 +595,7 @@ dpdk_hqos_thread (vlib_worker_thread_t * w)
   while (tm->worker_thread_release == 0)
     vlib_worker_thread_barrier_check ();
 
-  if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0)
+  if (vec_len (dm->devices_by_hqos_cpu[vm->thread_index]) == 0)
     return
       clib_error
       ("current I/O TX thread does not have any devices assigned to it");
diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c
index cd0a6037..3ae8c9b8 100644
--- a/src/plugins/dpdk/ipsec/cli.c
+++ b/src/plugins/dpdk/ipsec/cli.c
@@ -42,8 +42,8 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display)
   for (i = 0; i < tm->n_vlib_mains; i++)
     {
       uword key, data;
-      u32 cpu_index = vlib_mains[i]->cpu_index;
-      crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+      u32 thread_index = vlib_mains[i]->thread_index;
+      crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
       u8 *s = 0;
 
       if (skip_master)
@@ -57,7 +57,7 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display)
 	  i32 last_cdev = -1;
 	  crypto_qp_data_t *qpd;
 
-	  s = format (s, "%u\t", cpu_index);
+	  s = format (s, "%u\t", thread_index);
 
 	  /* *INDENT-OFF* */
 	  vec_foreach (qpd, cwm->qp_data)
@@ -95,7 +95,7 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display)
 	    cap.sym.auth.algo = p_key->auth_algo;
 	    check_algo_is_supported (&cap, auth_str);
 	    vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n",
-			     vlib_mains[i]->cpu_index, cipher_str, auth_str,
+			     vlib_mains[i]->thread_index, cipher_str, auth_str,
 			     p_key->is_outbound ? "out" : "in",
 			     cwm->qp_data[data].dev_id,
 			     cwm->qp_data[data].qp_id);
diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c
index dc3452b2..a3c45902 100644
--- a/src/plugins/dpdk/ipsec/crypto_node.c
+++ b/src/plugins/dpdk/ipsec/crypto_node.c
@@ -171,9 +171,9 @@ static uword
 dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 		      vlib_frame_t * frame)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
   crypto_qp_data_t *qpd;
   u32 n_deq = 0;
 
diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h
index 320295b1..56f0c756 100644
--- a/src/plugins/dpdk/ipsec/esp.h
+++ b/src/plugins/dpdk/ipsec/esp.h
@@ -170,9 +170,9 @@ static_always_inline int
 create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess,
 		 u8 is_outbound)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
   struct rte_crypto_sym_xform cipher_xform = { 0 };
   struct rte_crypto_sym_xform auth_xform = { 0 };
   struct rte_crypto_sym_xform *xfs;
diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c
index 286e03f8..bab76e3b 100644
--- a/src/plugins/dpdk/ipsec/esp_decrypt.c
+++ b/src/plugins/dpdk/ipsec/esp_decrypt.c
@@ -88,7 +88,7 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
 {
   u32 n_left_from, *from, *to_next, next_index;
   ipsec_main_t *im = &ipsec_main;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   dpdk_crypto_main_t * dcm = &dpdk_crypto_main;
   dpdk_esp_main_t * em = &dpdk_esp_main;
   u32 i;
@@ -104,7 +104,7 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
       return n_left_from;
     }
 
-  crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index);
+  crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, thread_index);
   u32 n_qps = vec_len(cwm->qp_data);
   struct rte_crypto_op ** cops_to_enq[n_qps];
   u32 n_cop_qp[n_qps], * bi_to_enq[n_qps];
diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c
index 5b03de73..f996d7df 100644
--- a/src/plugins/dpdk/ipsec/esp_encrypt.c
+++ b/src/plugins/dpdk/ipsec/esp_encrypt.c
@@ -93,7 +93,7 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
 {
   u32 n_left_from, *from, *to_next, next_index;
   ipsec_main_t *im = &ipsec_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
   dpdk_esp_main_t *em = &dpdk_esp_main;
   u32 i;
@@ -111,7 +111,8 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
       return n_left_from;
     }
 
-  crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index);
+  crypto_worker_main_t *cwm =
+    vec_elt_at_index (dcm->workers_main, thread_index);
   u32 n_qps = vec_len (cwm->qp_data);
   struct rte_crypto_op **cops_to_enq[n_qps];
   u32 n_cop_qp[n_qps], *bi_to_enq[n_qps];
diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c
index b0aaaaec..5d8f4fba 100644
--- a/src/plugins/dpdk/ipsec/ipsec.c
+++ b/src/plugins/dpdk/ipsec/ipsec.c
@@ -289,7 +289,7 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
 	      if (!map)
 		{
 		  clib_warning ("unable to create hash table for worker %u",
-				vlib_mains[i]->cpu_index);
+				vlib_mains[i]->thread_index);
 		  goto error;
 		}
 	      cwm->algo_qp_map = map;
diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h
index 28bffc80..f0f793c0 100644
--- a/src/plugins/dpdk/ipsec/ipsec.h
+++ b/src/plugins/dpdk/ipsec/ipsec.h
@@ -95,8 +95,8 @@ static_always_inline void
 crypto_alloc_cops ()
 {
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  u32 cpu_index = os_get_cpu_number ();
-  crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+  u32 thread_index = vlib_get_thread_index ();
+  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
   unsigned socket_id = rte_socket_id ();
   crypto_qp_data_t *qpd;
 
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
index 7ee2a785..942b8b2d 100644
--- a/src/plugins/dpdk/main.c
+++ b/src/plugins/dpdk/main.c
@@ -39,7 +39,7 @@ rte_delay_us_override (unsigned us)
    * thread then do not intercept. (Must not be called from an
    * independent pthread).
    */
-  if (os_get_cpu_number () == 0)
+  if (vlib_get_thread_index () == 0)
     {
       /*
        * We're in the vlib main thread or a vlib process. Make sure
diff --git a/src/plugins/flowperpkt/l2_node.c b/src/plugins/flowperpkt/l2_node.c
index 1c2f681e..fdaf81d1 100644
--- a/src/plugins/flowperpkt/l2_node.c
+++ b/src/plugins/flowperpkt/l2_node.c
@@ -102,7 +102,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 		       u8 * src_mac, u8 * dst_mac,
 		       u16 ethertype, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->cpu_index;
+  u32 my_cpu_number = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
diff --git a/src/plugins/flowperpkt/node.c b/src/plugins/flowperpkt/node.c
index f77f087d..0277682d 100644
--- a/src/plugins/flowperpkt/node.c
+++ b/src/plugins/flowperpkt/node.c
@@ -101,7 +101,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 			 u32 src_address, u32 dst_address,
 			 u8 tos, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->cpu_index;
+  u32 my_cpu_number = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h
index 2bf3fd54..9de0d13b 100644
--- a/src/plugins/ioam/export-common/ioam_export.h
+++ b/src/plugins/ioam/export-common/ioam_export.h
@@ -477,8 +477,8 @@ do {                                                                           \
   from = vlib_frame_vector_args (F);                                           \
   n_left_from = (F)->n_vectors;                                                \
   next_index = (N)->cached_next_index;                                         \
-  while (__sync_lock_test_and_set ((EM)->lockp[(VM)->cpu_index], 1));          \
-  my_buf = ioam_export_get_my_buffer (EM, (VM)->cpu_index);                    \
+  while (__sync_lock_test_and_set ((EM)->lockp[(VM)->thread_index], 1));       \
+  my_buf = ioam_export_get_my_buffer (EM, (VM)->thread_index);                 \
   my_buf->touched_at = vlib_time_now (VM);                                     \
   while (n_left_from > 0)                                                      \
     {                                                                          \
@@ -620,7 +620,7 @@ do {                                                                           \
     }                                                                          \
   vlib_node_increment_counter (VM, export_node.index,                          \
 			       EXPORT_ERROR_RECORDED, pkts_recorded);          \
-  *(EM)->lockp[(VM)->cpu_index] = 0;                                           \
+  *(EM)->lockp[(VM)->thread_index] = 0;                                        \
 } while(0)
 
 #endif /* __included_ioam_export_h__ */
diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
index a56dc040..0cf742c9 100644
--- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
+++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
@@ -396,7 +396,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 					      clib_net_to_host_u32
 					      (tcp0->seq_number) + 1,
 					      no_of_responses, now,
-					      vm->cpu_index, &pool_index0))
+					      vm->thread_index, &pool_index0))
 		    {
 		      cache_ts_added++;
 		    }
@@ -419,7 +419,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 	      e2e =
 		(ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
 					     cm->rewrite_pool_index_offset);
-	      e2e->pool_id = (u8) vm->cpu_index;
+	      e2e->pool_id = (u8) vm->thread_index;
 	      e2e->pool_index = pool_index0;
 	      ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
 					   ((u8 *) e2e +
@@ -455,7 +455,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 					      clib_net_to_host_u32
 					      (tcp1->seq_number) + 1,
 					      no_of_responses, now,
-					      vm->cpu_index, &pool_index1))
+					      vm->thread_index, &pool_index1))
 		    {
 		      cache_ts_added++;
 		    }
@@ -479,7 +479,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 	      e2e =
 		(ioam_e2e_cache_option_t *) ((u8 *) hbh1 +
 					     cm->rewrite_pool_index_offset);
-	      e2e->pool_id = (u8) vm->cpu_index;
+	      e2e->pool_id = (u8) vm->thread_index;
 	      e2e->pool_index = pool_index1;
 	      ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
 					   ((u8 *) e2e +
@@ -562,7 +562,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 					      clib_net_to_host_u32
 					      (tcp0->seq_number) + 1,
 					      no_of_responses, now,
-					      vm->cpu_index, &pool_index0))
+					      vm->thread_index, &pool_index0))
 		    {
 		      cache_ts_added++;
 		    }
@@ -585,7 +585,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 	      e2e =
 		(ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
 					     cm->rewrite_pool_index_offset);
-	      e2e->pool_id = (u8) vm->cpu_index;
+	      e2e->pool_id = (u8) vm->thread_index;
 	      e2e->pool_index = pool_index0;
 	      ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
 					   ((u8 *) e2e +
@@ -701,7 +701,7 @@ expired_cache_ts_timer_callback (u32 * expired_timers)
   ioam_cache_main_t *cm = &ioam_cache_main;
   int i;
   u32 pool_index;
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 count = 0;
 
   for (i = 0; i < vec_len (expired_timers); i++)
@@ -724,7 +724,7 @@ ioam_cache_ts_timer_tick_node_fn (vlib_main_t * vm,
 				  vlib_frame_t * f)
 {
   ioam_cache_main_t *cm = &ioam_cache_main;
-  u32 my_thread_index = os_get_cpu_number ();
+  u32 my_thread_index = vlib_get_thread_index ();
   struct timespec ts, tsrem;
 
   tw_timer_expire_timers_16t_2w_512sl (&cm->timer_wheels[my_thread_index],
diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c
index f3c5cc09..08f5b692 100644
--- a/src/plugins/ixge/ixge.c
+++ b/src/plugins/ixge/ixge.c
@@ -1887,7 +1887,7 @@ done:
   vlib_increment_combined_counter (vnet_main.
 				   interface_main.combined_sw_if_counters +
 				   VNET_INTERFACE_COUNTER_RX,
-				   0 /* cpu_index */ ,
+				   0 /* thread_index */ ,
 				   xd->vlib_sw_if_index, n_packets,
 				   dq->rx.n_bytes);
 
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
index add81236..addc2a42 100644
--- a/src/plugins/lb/lb.c
+++ b/src/plugins/lb/lb.c
@@ -63,11 +63,11 @@ u8 *format_lb_main (u8 * s, va_list * args)
   s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
   s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
 
-  u32 cpu_index;
-  for(cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++ ) {
-    lb_hash_t *h = lbm->per_cpu[cpu_index].sticky_ht;
+  u32 thread_index;
+  for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
+    lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
     if (h) {
-      s = format(s, "core %d\n", cpu_index);
+      s = format(s, "core %d\n", thread_index);
       s = format(s, "  timeout: %ds\n", h->timeout);
       s = format(s, "  usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())),  lb_hash_size(h));
     }
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
index 8b763c53..3171148b 100644
--- a/src/plugins/lb/node.c
+++ b/src/plugins/lb/node.c
@@ -60,10 +60,10 @@ format_lb_trace (u8 * s, va_list * args)
   return s;
 }
 
-lb_hash_t *lb_get_sticky_table(u32 cpu_index)
+lb_hash_t *lb_get_sticky_table(u32 thread_index)
 {
   lb_main_t *lbm = &lb_main;
-  lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
+  lb_hash_t *sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
   //Check if size changed
   if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht))))
     {
@@ -71,8 +71,8 @@ lb_hash_t *lb_get_sticky_table(u32 cpu_index)
       lb_hash_bucket_t *b;
       u32 i;
       lb_hash_foreach_entry(sticky_ht, b, i) {
-	vlib_refcount_add(&lbm->as_refcount, cpu_index, b->value[i], -1);
-	vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, 1);
+	vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
+	vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
       }
 
       lb_hash_free(sticky_ht);
@@ -81,8 +81,8 @@ lb_hash_t *lb_get_sticky_table(u32 cpu_index)
 
   //Create if necessary
   if (PREDICT_FALSE(sticky_ht == NULL)) {
-    lbm->per_cpu[cpu_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
-    sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
+    lbm->per_cpu[thread_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
+    sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
     clib_warning("Regenerated sticky table %p", sticky_ht);
   }
 
@@ -153,10 +153,10 @@ lb_node_fn (vlib_main_t * vm,
 {
   lb_main_t *lbm = &lb_main;
   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 lb_time = lb_hash_time_now(vm);
 
-  lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index);
+  lb_hash_t *sticky_ht = lb_get_sticky_table(thread_index);
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
@@ -240,9 +240,9 @@ lb_node_fn (vlib_main_t * vm,
 	  //Configuration may be changed, vectors resized, etc...
 
 	  //Dereference previously used
-	  vlib_refcount_add(&lbm->as_refcount, cpu_index,
+	  vlib_refcount_add(&lbm->as_refcount, thread_index,
 			    lb_hash_available_value(sticky_ht, hash0, available_index0), -1);
-	  vlib_refcount_add(&lbm->as_refcount, cpu_index,
+	  vlib_refcount_add(&lbm->as_refcount, thread_index,
 			    asindex0, 1);
 
 	  //Add sticky entry
@@ -260,7 +260,7 @@ lb_node_fn (vlib_main_t * vm,
 	}
 
       vlib_increment_simple_counter(&lbm->vip_counters[counter],
-				    cpu_index,
+				    thread_index,
 				    vnet_buffer (p0)->ip.adj_index[VLIB_TX],
 				    1);
 
diff --git a/src/plugins/lb/refcount.c b/src/plugins/lb/refcount.c
index 22415c88..6f01ab5a 100644
--- a/src/plugins/lb/refcount.c
+++ b/src/plugins/lb/refcount.c
@@ -31,10 +31,10 @@ u64 vlib_refcount_get(vlib_refcount_t *r, u32 index)
 {
   u64 count = 0;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
-  u32 cpu_index;
-  for (cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++) {
-    if (r->per_cpu[cpu_index].length > index)
-      count += r->per_cpu[cpu_index].counters[index];
+  u32 thread_index;
+  for (thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++) {
+    if (r->per_cpu[thread_index].length > index)
+      count += r->per_cpu[thread_index].counters[index];
   }
   return count;
 }
diff --git a/src/plugins/lb/refcount.h b/src/plugins/lb/refcount.h
index 8c26e7be..dcfcb3fe 100644
--- a/src/plugins/lb/refcount.h
+++ b/src/plugins/lb/refcount.h
@@ -45,9 +45,9 @@ typedef struct {
 void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size);
 
 static_always_inline
-void vlib_refcount_add(vlib_refcount_t *r, u32 cpu_index, u32 counter_index, i32 v)
+void vlib_refcount_add(vlib_refcount_t *r, u32 thread_index, u32 counter_index, i32 v)
 {
-  vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[cpu_index];
+  vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[thread_index];
   if (PREDICT_FALSE(counter_index >= per_cpu->length))
     __vlib_refcount_resize(per_cpu, clib_max(counter_index + 16, per_cpu->length * 2));
 
diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c
index 659d5dfb..cee1f3d1 100644
--- a/src/plugins/memif/node.c
+++ b/src/plugins/memif/node.c
@@ -94,7 +94,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 n_rx_bytes = 0;
   u32 *to_next = 0;
   u32 n_free_bufs;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 bi0, bi1;
   vlib_buffer_t *b0, *b1;
   u16 ring_size = 1 << mif->log2_ring_size;
@@ -105,14 +105,15 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
   if (mif->per_interface_next_index != ~0)
     next_index = mif->per_interface_next_index;
 
-  n_free_bufs = vec_len (nm->rx_buffers[cpu_index]);
+  n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
   if (PREDICT_FALSE (n_free_bufs < ring_size))
     {
-      vec_validate (nm->rx_buffers[cpu_index], ring_size + n_free_bufs - 1);
+      vec_validate (nm->rx_buffers[thread_index],
+		    ring_size + n_free_bufs - 1);
       n_free_bufs +=
-	vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs],
+	vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
 			   ring_size);
-      _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs;
+      _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs;
     }
 
   head = ring->head;
@@ -158,15 +159,15 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 			     CLIB_CACHE_LINE_BYTES, LOAD);
 	    }
 	  /* get empty buffer */
-	  u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1;
-	  bi0 = nm->rx_buffers[cpu_index][last_buf];
-	  bi1 = nm->rx_buffers[cpu_index][last_buf - 1];
-	  _vec_len (nm->rx_buffers[cpu_index]) -= 2;
+	  u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1;
+	  bi0 = nm->rx_buffers[thread_index][last_buf];
+	  bi1 = nm->rx_buffers[thread_index][last_buf - 1];
+	  _vec_len (nm->rx_buffers[thread_index]) -= 2;
 
 	  if (last_buf > 4)
 	    {
-	      memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 2]);
-	      memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 3]);
+	      memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 2]);
+	      memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 3]);
 	    }
 
 	  /* enqueue buffer */
@@ -256,9 +257,9 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       while (num_slots && n_left_to_next)
 	{
 	  /* get empty buffer */
-	  u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1;
-	  bi0 = nm->rx_buffers[cpu_index][last_buf];
-	  _vec_len (nm->rx_buffers[cpu_index]) = last_buf;
+	  u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1;
+	  bi0 = nm->rx_buffers[thread_index][last_buf];
+	  _vec_len (nm->rx_buffers[thread_index]) = last_buf;
 
 	  /* enqueue buffer */
 	  to_next[0] = bi0;
@@ -315,7 +316,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
   ring->tail = head;
 
   vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
-				   + VNET_INTERFACE_COUNTER_RX, cpu_index,
+				   + VNET_INTERFACE_COUNTER_RX, thread_index,
 				   mif->hw_if_index, n_rx_packets,
 				   n_rx_bytes);
 
@@ -327,7 +328,7 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 		vlib_frame_t * frame)
 {
   u32 n_rx_packets = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   memif_main_t *nm = &memif_main;
   memif_if_t *mif;
 
@@ -337,7 +338,7 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
       if (mif->flags & MEMIF_IF_FLAG_ADMIN_UP &&
 	  mif->flags & MEMIF_IF_FLAG_CONNECTED &&
 	  (mif->if_index % nm->input_cpu_count) ==
-	  (cpu_index - nm->input_cpu_first_index))
+	  (thread_index - nm->input_cpu_first_index))
 	{
 	  if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
 	    n_rx_packets +=
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c
index b4961365..e5ee965f 100644
--- a/src/plugins/snat/in2out.c
+++ b/src/plugins/snat/in2out.c
@@ -212,7 +212,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
                       snat_session_t ** sessionp,
                       vlib_node_runtime_t * node,
                       u32 next0,
-                      u32 cpu_index)
+                      u32 thread_index)
 {
   snat_user_t *u;
   snat_user_key_t user_key;
@@ -246,27 +246,27 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
     {
       /* no, make a new one */
-      pool_get (sm->per_thread_data[cpu_index].users, u);
+      pool_get (sm->per_thread_data[thread_index].users, u);
       memset (u, 0, sizeof (*u));
       u->addr = ip0->src_address;
       u->fib_index = rx_fib_index0;
 
-      pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
+      pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
 
       u->sessions_per_user_list_head_index = per_user_list_head_elt -
-        sm->per_thread_data[cpu_index].list_pool;
+        sm->per_thread_data[thread_index].list_pool;
 
-      clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                        u->sessions_per_user_list_head_index);
 
-      kv0.value = u - sm->per_thread_data[cpu_index].users;
+      kv0.value = u - sm->per_thread_data[thread_index].users;
 
       /* add user */
       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
     }
   else
     {
-      u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+      u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
                              value0.value);
     }
 
@@ -276,25 +276,25 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
       /* Remove the oldest dynamic translation */
       do {
           oldest_per_user_translation_list_index =
-            clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
+            clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
                                     u->sessions_per_user_list_head_index);
 
           ASSERT (oldest_per_user_translation_list_index != ~0);
 
           /* add it back to the end of the LRU list */
-          clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                               u->sessions_per_user_list_head_index,
                               oldest_per_user_translation_list_index);
           /* Get the list element */
           oldest_per_user_translation_list_elt =
-            pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
+            pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
                                oldest_per_user_translation_list_index);
 
           /* Get the session index from the list element */
           session_index = oldest_per_user_translation_list_elt->value;
 
           /* Get the session */
-          s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+          s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                  session_index);
       } while (snat_is_session_static (s));
 
@@ -346,7 +346,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
         }
 
       /* Create a new session */
-      pool_get (sm->per_thread_data[cpu_index].sessions, s);
+      pool_get (sm->per_thread_data[thread_index].sessions, s);
       memset (s, 0, sizeof (*s));
       
       s->outside_address_index = address_index;
@@ -362,22 +362,22 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
         }
 
       /* Create list elts */
-      pool_get (sm->per_thread_data[cpu_index].list_pool,
+      pool_get (sm->per_thread_data[thread_index].list_pool,
                 per_user_translation_list_elt);
-      clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                        per_user_translation_list_elt -
-                       sm->per_thread_data[cpu_index].list_pool);
+                       sm->per_thread_data[thread_index].list_pool);
 
       per_user_translation_list_elt->value =
-        s - sm->per_thread_data[cpu_index].sessions;
+        s - sm->per_thread_data[thread_index].sessions;
       s->per_user_index = per_user_translation_list_elt -
-                          sm->per_thread_data[cpu_index].list_pool;
+                          sm->per_thread_data[thread_index].list_pool;
       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
 
-      clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                           s->per_user_list_head_index,
                           per_user_translation_list_elt -
-                          sm->per_thread_data[cpu_index].list_pool);
+                          sm->per_thread_data[thread_index].list_pool);
    }
   
   s->in2out = *key0;
@@ -388,12 +388,12 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
 
   /* Add to translation hashes */
   kv0.key = s->in2out.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
       clib_warning ("in2out key add failed");
   
   kv0.key = s->out2in.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
   
   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
       clib_warning ("out2in key add failed");
@@ -403,7 +403,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
   worker_by_out_key.port = s->out2in.port;
   worker_by_out_key.fib_index = s->out2in.fib_index;
   kv0.key = worker_by_out_key.as_u64;
-  kv0.value = cpu_index;
+  kv0.value = thread_index;
   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
 
   /* log NAT event */
@@ -465,7 +465,7 @@ snat_in2out_error_t icmp_get_key(icmp46_header_t *icmp0,
  *
  * @param[in,out] sm             SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -473,7 +473,7 @@ snat_in2out_error_t icmp_get_key(icmp46_header_t *icmp0,
  * @param d                      optional parameter
  */
 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -524,13 +524,13 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
         }
 
       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                         &s0, node, next0, cpu_index);
+                         &s0, node, next0, thread_index);
 
       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
         goto out;
     }
   else
-    s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+    s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                             value0.value);
 
 out:
@@ -548,7 +548,7 @@ out:
  *
  * @param[in] sm                 SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -556,7 +556,7 @@ out:
  * @param d                      optional parameter
  */
 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -624,7 +624,7 @@ static inline u32 icmp_in2out (snat_main_t *sm,
                                u32 rx_fib_index0,
                                vlib_node_runtime_t * node,
                                u32 next0,
-                               u32 cpu_index,
+                               u32 thread_index,
                                void *d)
 {
   snat_session_key_t key0, sm0;
@@ -641,7 +641,7 @@ static inline u32 icmp_in2out (snat_main_t *sm,
 
   echo0 = (icmp_echo_header_t *)(icmp0+1);
 
-  next0_tmp = sm->icmp_match_in2out_cb(sm, node, cpu_index, b0,
+  next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
                                        &key0, &sm0, &dont_translate, d);
   if (next0_tmp != ~0)
     next0 = next0_tmp;
@@ -847,11 +847,11 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
                                          vlib_node_runtime_t * node,
                                          u32 next0,
                                          f64 now,
-                                         u32 cpu_index,
+                                         u32 thread_index,
                                          snat_session_t ** p_s0)
 {
   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                      next0, cpu_index, p_s0);
+                      next0, thread_index, p_s0);
   snat_session_t * s0 = *p_s0;
   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
     {
@@ -862,9 +862,9 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
       /* Per-user LRU list maintenance for dynamic translations */
       if (!snat_is_session_static (s0))
         {
-          clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                              s0->per_user_index);
-          clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                               s0->per_user_list_head_index,
                               s0->per_user_index);
         }
@@ -884,7 +884,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
   f64 now = vlib_time_now (vm);
   u32 stats_node_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
     snat_in2out_node.index;
@@ -977,7 +977,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 {
                   next0 = icmp_in2out_slow_path 
                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
-                     node, next0, now, cpu_index, &s0);
+                     node, next0, now, thread_index, &s0);
                   goto trace00;
                 }
             }
@@ -1006,7 +1006,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                     goto trace00;
 
                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                                     &s0, node, next0, cpu_index);
+                                     &s0, node, next0, thread_index);
                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
                     goto trace00;
                 }
@@ -1017,7 +1017,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->src_address.as_u32;
@@ -1063,9 +1063,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -1081,7 +1081,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
               t->next_index = next0;
                   t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
@@ -1117,7 +1117,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 {
                   next1 = icmp_in2out_slow_path 
                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
-                     next1, now, cpu_index, &s1);
+                     next1, now, thread_index, &s1);
                   goto trace01;
                 }
             }
@@ -1146,7 +1146,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                     goto trace01;
 
                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
-                                     &s1, node, next1, cpu_index);
+                                     &s1, node, next1, thread_index);
                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
                     goto trace01;
                 }
@@ -1157,7 +1157,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 }
             }
           else
-            s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value1.value);
 
           old_addr1 = ip1->src_address.as_u32;
@@ -1203,9 +1203,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s1))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s1->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s1->per_user_list_head_index,
                                   s1->per_user_index);
             }
@@ -1220,7 +1220,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
               t->next_index = next1;
               t->session_index = ~0;
               if (s1)
-                t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
@@ -1292,7 +1292,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 {
                   next0 = icmp_in2out_slow_path 
                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                     next0, now, cpu_index, &s0);
+                     next0, now, thread_index, &s0);
                   goto trace0;
                 }
             }
@@ -1321,7 +1321,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                     goto trace0;
 
                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                                     &s0, node, next0, cpu_index);
+                                     &s0, node, next0, thread_index);
 
                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
                     goto trace0;
@@ -1333,7 +1333,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->src_address.as_u32;
@@ -1379,9 +1379,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -1397,7 +1397,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
               t->next_index = next0;
                   t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
@@ -2010,7 +2010,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm,
   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
   u32 next_worker_index = 0;
   u32 current_worker_index = ~0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ASSERT (vec_len (sm->workers));
 
@@ -2048,7 +2048,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm,
 
       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
 
-      if (PREDICT_FALSE (next_worker_index != cpu_index))
+      if (PREDICT_FALSE (next_worker_index != thread_index))
         {
           do_handoff = 1;
 
diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c
index 656e42db..5d308d78 100644
--- a/src/plugins/snat/out2in.c
+++ b/src/plugins/snat/out2in.c
@@ -129,7 +129,7 @@ create_session_for_static_mapping (snat_main_t *sm,
                                    snat_session_key_t in2out,
                                    snat_session_key_t out2in,
                                    vlib_node_runtime_t * node,
-                                   u32 cpu_index)
+                                   u32 thread_index)
 {
   snat_user_t *u;
   snat_user_key_t user_key;
@@ -146,36 +146,36 @@ create_session_for_static_mapping (snat_main_t *sm,
   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
     {
       /* no, make a new one */
-      pool_get (sm->per_thread_data[cpu_index].users, u);
+      pool_get (sm->per_thread_data[thread_index].users, u);
       memset (u, 0, sizeof (*u));
       u->addr = in2out.addr;
       u->fib_index = in2out.fib_index;
 
-      pool_get (sm->per_thread_data[cpu_index].list_pool,
+      pool_get (sm->per_thread_data[thread_index].list_pool,
                 per_user_list_head_elt);
 
       u->sessions_per_user_list_head_index = per_user_list_head_elt -
-        sm->per_thread_data[cpu_index].list_pool;
+        sm->per_thread_data[thread_index].list_pool;
 
-      clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                        u->sessions_per_user_list_head_index);
 
-      kv0.value = u - sm->per_thread_data[cpu_index].users;
+      kv0.value = u - sm->per_thread_data[thread_index].users;
 
       /* add user */
       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
 
       /* add non-traslated packets worker lookup */
-      kv0.value = cpu_index;
+      kv0.value = thread_index;
       clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
     }
   else
     {
-      u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+      u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
                              value0.value);
     }
 
-  pool_get (sm->per_thread_data[cpu_index].sessions, s);
+  pool_get (sm->per_thread_data[thread_index].sessions, s);
   memset (s, 0, sizeof (*s));
 
   s->outside_address_index = ~0;
@@ -183,22 +183,22 @@ create_session_for_static_mapping (snat_main_t *sm,
   u->nstaticsessions++;
 
   /* Create list elts */
-  pool_get (sm->per_thread_data[cpu_index].list_pool,
+  pool_get (sm->per_thread_data[thread_index].list_pool,
             per_user_translation_list_elt);
-  clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+  clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                    per_user_translation_list_elt -
-                   sm->per_thread_data[cpu_index].list_pool);
+                   sm->per_thread_data[thread_index].list_pool);
 
   per_user_translation_list_elt->value =
-    s - sm->per_thread_data[cpu_index].sessions;
+    s - sm->per_thread_data[thread_index].sessions;
   s->per_user_index =
-    per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool;
+    per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool;
   s->per_user_list_head_index = u->sessions_per_user_list_head_index;
 
-  clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+  clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                       s->per_user_list_head_index,
                       per_user_translation_list_elt -
-                      sm->per_thread_data[cpu_index].list_pool);
+                      sm->per_thread_data[thread_index].list_pool);
 
   s->in2out = in2out;
   s->out2in = out2in;
@@ -206,12 +206,12 @@ create_session_for_static_mapping (snat_main_t *sm,
 
   /* Add to translation hashes */
   kv0.key = s->in2out.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
       clib_warning ("in2out key add failed");
 
   kv0.key = s->out2in.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
 
   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
       clib_warning ("out2in key add failed");
@@ -298,7 +298,7 @@ is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0,
  *
  * @param[in,out] sm             SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -306,7 +306,7 @@ is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0,
  * @param d                      optional parameter
  */
 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -366,7 +366,7 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
 
       /* Create session initiated by host from external network */
       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
-                                             node, cpu_index);
+                                             node, thread_index);
 
       if (!s0)
         {
@@ -375,7 +375,7 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
         }
     }
   else
-    s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+    s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                             value0.value);
 
 out:
@@ -393,7 +393,7 @@ out:
  *
  * @param[in] sm                 SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -401,7 +401,7 @@ out:
  * @param d                      optional parameter
  */
 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -460,7 +460,7 @@ static inline u32 icmp_out2in (snat_main_t *sm,
                                u32 rx_fib_index0,
                                vlib_node_runtime_t * node,
                                u32 next0,
-                               u32 cpu_index,
+                               u32 thread_index,
                                void *d)
 {
   snat_session_key_t key0, sm0;
@@ -477,7 +477,7 @@ static inline u32 icmp_out2in (snat_main_t *sm,
 
   echo0 = (icmp_echo_header_t *)(icmp0+1);
 
-  next0_tmp = sm->icmp_match_out2in_cb(sm, node, cpu_index, b0,
+  next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0,
                                        &key0, &sm0, &dont_translate, d);
   if (next0_tmp != ~0)
     next0 = next0_tmp;
@@ -589,11 +589,11 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
                                          u32 rx_fib_index0,
                                          vlib_node_runtime_t * node,
                                          u32 next0, f64 now,
-                                         u32 cpu_index,
+                                         u32 thread_index,
                                          snat_session_t ** p_s0)
 {
   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                      next0, cpu_index, p_s0);
+                      next0, thread_index, p_s0);
   snat_session_t * s0 = *p_s0;
   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
     {
@@ -604,9 +604,9 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
       /* Per-user LRU list maintenance for dynamic translation */
       if (!snat_is_session_static (s0))
         {
-          clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                              s0->per_user_index);
-          clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                               s0->per_user_list_head_index,
                               s0->per_user_index);
         }
@@ -624,7 +624,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
   f64 now = vlib_time_now (vm);
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -712,7 +712,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
             {
               next0 = icmp_out2in_slow_path 
                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, 
-                 next0, now, cpu_index, &s0);
+                 next0, now, thread_index, &s0);
               goto trace0;
             }
 
@@ -743,7 +743,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
 
               /* Create session initiated by host from external network */
               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
-                                                     cpu_index);
+                                                     thread_index);
               if (!s0)
                 {
                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
@@ -752,7 +752,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->dst_address.as_u32;
@@ -796,9 +796,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -813,7 +813,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
               t->next_index = next0;
               t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
@@ -847,7 +847,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
             {
               next1 = icmp_out2in_slow_path 
                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, 
-                 next1, now, cpu_index, &s1);
+                 next1, now, thread_index, &s1);
               goto trace1;
             }
 
@@ -878,7 +878,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
 
               /* Create session initiated by host from external network */
               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
-                                                     cpu_index);
+                                                     thread_index);
               if (!s1)
                 {
                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
@@ -887,7 +887,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
                 }
             }
           else
-            s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value1.value);
 
           old_addr1 = ip1->dst_address.as_u32;
@@ -931,9 +931,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s1))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s1->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s1->per_user_list_head_index,
                                   s1->per_user_index);
             }
@@ -948,7 +948,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
               t->next_index = next1;
               t->session_index = ~0;
               if (s1)
-                t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
@@ -1016,7 +1016,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
             {
               next0 = icmp_out2in_slow_path 
                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, 
-                 next0, now, cpu_index, &s0);
+                 next0, now, thread_index, &s0);
               goto trace00;
             }
 
@@ -1048,7 +1048,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
 
               /* Create session initiated by host from external network */
               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
-                                                     cpu_index);
+                                                     thread_index);
               if (!s0)
                 {
                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
@@ -1057,7 +1057,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->dst_address.as_u32;
@@ -1101,9 +1101,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -1118,7 +1118,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
               t->next_index = next0;
               t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
@@ -1599,7 +1599,7 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm,
   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
   u32 next_worker_index = 0;
   u32 current_worker_index = ~0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ASSERT (vec_len (sm->workers));
 
@@ -1637,7 +1637,7 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm,
 
       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
 
-      if (PREDICT_FALSE (next_worker_index != cpu_index))
+      if (PREDICT_FALSE (next_worker_index != thread_index))
         {
           do_handoff = 1;
 
diff --git a/src/plugins/snat/snat.h b/src/plugins/snat/snat.h
index 017825c0..f4e1c5c0 100644
--- a/src/plugins/snat/snat.h
+++ b/src/plugins/snat/snat.h
@@ -221,7 +221,7 @@ struct snat_main_s;
 
 typedef u32 snat_icmp_match_function_t (struct snat_main_s *sm,
                                         vlib_node_runtime_t *node,
-                                        u32 cpu_index,
+                                        u32 thread_index,
                                         vlib_buffer_t *b0,
                                         snat_session_key_t *p_key,
                                         snat_session_key_t *p_value,
@@ -402,22 +402,22 @@ typedef struct {
 } tcp_udp_header_t;
 
 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index a517a597..be3b41ef 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -299,7 +299,7 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm,
   if (CLIB_DEBUG == 0)
     return;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   /* smp disaster check */
   if (vec_len (vlib_mains) > 1)
@@ -355,7 +355,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm,
   vlib_buffer_free_list_t *f;
   int i;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0)
     {
@@ -474,7 +474,7 @@ vlib_buffer_delete_free_list_internal (vlib_main_t * vm, u32 free_list_index)
   u32 merge_index;
   int i;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   f = vlib_buffer_get_free_list (vm, free_list_index);
 
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 394c336a..328660a3 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -209,7 +209,7 @@ always_inline vlib_buffer_known_state_t
 vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   uword *p = hash_get (bm->buffer_known_hash, buffer_index);
   return p ? p[0] : VLIB_BUFFER_UNKNOWN;
@@ -221,7 +221,7 @@ vlib_buffer_set_known_state (vlib_main_t * vm,
 			     vlib_buffer_known_state_t state)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
   hash_set (bm->buffer_known_hash, buffer_index, state);
 }
 
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
index f853f655..3cc95076 100644
--- a/src/vlib/cli.c
+++ b/src/vlib/cli.c
@@ -709,7 +709,7 @@ test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
     {
         /* *INDENT-OFF* */
         foreach_vlib_main({
-          heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+          heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
           mheap = mheap_header(heap);
           mheap->flags |= MHEAP_FLAG_VALIDATE;
           // Turn off small object cache because it delays detection of errors
@@ -722,7 +722,7 @@ test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
     {
         /* *INDENT-OFF* */
         foreach_vlib_main({
-          heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+          heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
           mheap = mheap_header(heap);
           mheap->flags &= ~MHEAP_FLAG_VALIDATE;
           mheap->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
@@ -733,7 +733,7 @@ test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
     {
         /* *INDENT-OFF* */
         foreach_vlib_main({
-          heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+          heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
           mheap = mheap_header(heap);
           mheap_validate(heap);
         });
diff --git a/src/vlib/counter.h b/src/vlib/counter.h
index 17a85217..60e2055d 100644
--- a/src/vlib/counter.h
+++ b/src/vlib/counter.h
@@ -70,17 +70,17 @@ u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm);
 
 /** Increment a simple counter
     @param cm - (vlib_simple_counter_main_t *) simple counter main pointer
-    @param cpu_index - (u32) the current cpu index
+    @param thread_index - (u32) the current cpu index
     @param index - (u32) index of the counter to increment
     @param increment - (u64) quantitiy to add to the counter
 */
 always_inline void
 vlib_increment_simple_counter (vlib_simple_counter_main_t * cm,
-			       u32 cpu_index, u32 index, u64 increment)
+			       u32 thread_index, u32 index, u64 increment)
 {
   counter_t *my_counters;
 
-  my_counters = cm->counters[cpu_index];
+  my_counters = cm->counters[thread_index];
   my_counters[index] += increment;
 }
 
@@ -201,7 +201,7 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm);
 
 /** Increment a combined counter
     @param cm - (vlib_combined_counter_main_t *) comined counter main pointer
-    @param cpu_index - (u32) the current cpu index
+    @param thread_index - (u32) the current cpu index
     @param index - (u32) index of the counter to increment
     @param packet_increment - (u64) number of packets to add to the counter
     @param byte_increment - (u64) number of bytes to add to the counter
@@ -209,13 +209,13 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm);
 
 always_inline void
 vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
-				 u32 cpu_index,
+				 u32 thread_index,
 				 u32 index, u64 n_packets, u64 n_bytes)
 {
   vlib_counter_t *my_counters;
 
   /* Use this CPU's counter array */
-  my_counters = cm->counters[cpu_index];
+  my_counters = cm->counters[thread_index];
 
   my_counters[index].packets += n_packets;
   my_counters[index].bytes += n_bytes;
@@ -224,14 +224,14 @@ vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
 /** Pre-fetch a per-thread combined counter for the given object index */
 always_inline void
 vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm,
-				u32 cpu_index, u32 index)
+				u32 thread_index, u32 index)
 {
   vlib_counter_t *cpu_counters;
 
   /*
    * This CPU's index is assumed to already be in cache
    */
-  cpu_counters = cm->counters[cpu_index];
+  cpu_counters = cm->counters[thread_index];
   CLIB_PREFETCH (cpu_counters + index, CLIB_CACHE_LINE_BYTES, STORE);
 }
 
diff --git a/src/vlib/error.c b/src/vlib/error.c
index a2c23176..e4ed4ee3 100644
--- a/src/vlib/error.c
+++ b/src/vlib/error.c
@@ -149,7 +149,7 @@ vlib_register_errors (vlib_main_t * vm,
   vlib_node_t *n = vlib_get_node (vm, node_index);
   uword l;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   /* Free up any previous error strings. */
   if (n->n_errors > 0)
diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h
index f51ec381..9dd01fbf 100644
--- a/src/vlib/global_funcs.h
+++ b/src/vlib/global_funcs.h
@@ -23,7 +23,7 @@ always_inline vlib_main_t *
 vlib_get_main (void)
 {
   vlib_main_t *vm;
-  vm = vlib_mains[os_get_cpu_number ()];
+  vm = vlib_mains[vlib_get_thread_index ()];
   ASSERT (vm);
   return vm;
 }
diff --git a/src/vlib/main.c b/src/vlib/main.c
index b22203f0..422d3e26 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -136,18 +136,18 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
   else
     {
       f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
-      f->cpu_index = vm->cpu_index;
+      f->thread_index = vm->thread_index;
       fi = vlib_frame_index_no_check (vm, f);
     }
 
   /* Poison frame when debugging. */
   if (CLIB_DEBUG > 0)
     {
-      u32 save_cpu_index = f->cpu_index;
+      u32 save_thread_index = f->thread_index;
 
       memset (f, 0xfe, n);
 
-      f->cpu_index = save_cpu_index;
+      f->thread_index = save_thread_index;
     }
 
   /* Insert magic number. */
@@ -517,7 +517,7 @@ vlib_put_next_frame (vlib_main_t * vm,
 	   * a dangling frame reference. Each thread has its own copy of
 	   * the next_frames vector.
 	   */
-	  if (0 && r->cpu_index != next_runtime->cpu_index)
+	  if (0 && r->thread_index != next_runtime->thread_index)
 	    {
 	      nf->frame_index = ~0;
 	      nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
@@ -866,7 +866,7 @@ vlib_elog_main_loop_event (vlib_main_t * vm,
 				  : evm->node_call_elog_event_types,
 				  node_index),
 		/* track */
-		(vm->cpu_index ? &vlib_worker_threads[vm->cpu_index].
+		(vm->thread_index ? &vlib_worker_threads[vm->thread_index].
 		 elog_track : &em->default_track),
 		/* data to log */ n_vectors);
 }
@@ -963,7 +963,7 @@ dispatch_node (vlib_main_t * vm,
 
   vm->cpu_time_last_node_dispatch = last_time_stamp;
 
-  if (1 /* || vm->cpu_index == node->cpu_index */ )
+  if (1 /* || vm->thread_index == node->thread_index */ )
     {
       vlib_main_t *stat_vm;
 
@@ -1029,7 +1029,7 @@ dispatch_node (vlib_main_t * vm,
 	  {
 	    u32 node_name, vector_length, is_polling;
 	  } *ed;
-	  vlib_worker_thread_t *w = vlib_worker_threads + vm->cpu_index;
+	  vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index;
 #endif
 
 	  if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 0197b4f3..329bf073 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -156,7 +156,7 @@ typedef struct vlib_main_t
   uword *init_functions_called;
 
   /* to compare with node runtime */
-  u32 cpu_index;
+  u32 thread_index;
 
   void **mbuf_alloc_list;
 
diff --git a/src/vlib/node.c b/src/vlib/node.c
index dc0a4de5..bbd3a42e 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -99,7 +99,7 @@ vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index)
   vlib_pending_frame_t *pf;
   i32 i, j, n_insert;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   vlib_worker_thread_barrier_sync (vm);
 
diff --git a/src/vlib/node.h b/src/vlib/node.h
index fc7e7da2..1e2f4c38 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -344,8 +344,8 @@ typedef struct vlib_frame_t
   /* Number of vector elements currently in frame. */
   u16 n_vectors;
 
-  /* Owner cpuid / heap id */
-  u16 cpu_index;
+  /* Owner thread / heap id */
+  u16 thread_index;
 
   /* Scalar and vector arguments to next node. */
   u8 arguments[0];
@@ -459,7 +459,7 @@ typedef struct vlib_node_runtime_t
 					  zero before first run of this
 					  node. */
 
-  u16 cpu_index;			/**< CPU this node runs on */
+  u16 thread_index;			/**< thread this node runs on */
 
   u8 runtime_data[0];			/**< Function dependent
 					  node-runtime data. This data is
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index 1f7d94e1..54e36874 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -201,9 +201,9 @@ always_inline vlib_frame_t *
 vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index)
 {
   vlib_frame_t *f;
-  u32 cpu_index = frame_index & VLIB_CPU_MASK;
+  u32 thread_index = frame_index & VLIB_CPU_MASK;
   u32 offset = frame_index & VLIB_OFFSET_MASK;
-  vm = vlib_mains[cpu_index];
+  vm = vlib_mains[thread_index];
   f = vm->heap_base + offset;
   return f;
 }
@@ -215,10 +215,10 @@ vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f)
 
   ASSERT (((uword) f & VLIB_CPU_MASK) == 0);
 
-  vm = vlib_mains[f->cpu_index];
+  vm = vlib_mains[f->thread_index];
 
   i = ((u8 *) f - (u8 *) vm->heap_base);
-  return i | f->cpu_index;
+  return i | f->thread_index;
 }
 
 always_inline vlib_frame_t *
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index ef3a24d3..4a111f8d 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,27 +35,12 @@ vl (void *p)
 vlib_worker_thread_t *vlib_worker_threads;
 vlib_thread_main_t vlib_thread_main;
 
+__thread uword vlib_thread_index = 0;
+
 uword
 os_get_cpu_number (void)
 {
-  void *sp;
-  uword n;
-  u32 len;
-
-  len = vec_len (vlib_thread_stacks);
-  if (len == 0)
-    return 0;
-
-  /* Get any old stack address. */
-  sp = &sp;
-
-  n = ((uword) sp - (uword) vlib_thread_stacks[0])
-    >> VLIB_LOG2_THREAD_STACK_SIZE;
-
-  /* "processes" have their own stacks, and they always run in thread 0 */
-  n = n >= len ? 0 : n;
-
-  return n;
+  return vlib_thread_index;
 }
 
 uword
@@ -275,21 +260,6 @@ vlib_thread_init (vlib_main_t * vm)
   return 0;
 }
 
-vlib_worker_thread_t *
-vlib_alloc_thread (vlib_main_t * vm)
-{
-  vlib_worker_thread_t *w;
-
-  if (vec_len (vlib_worker_threads) >= vec_len (vlib_thread_stacks))
-    {
-      clib_warning ("out of worker threads... Quitting...");
-      exit (1);
-    }
-  vec_add2 (vlib_worker_threads, w, 1);
-  w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
-  return w;
-}
-
 vlib_frame_queue_t *
 vlib_frame_queue_alloc (int nelts)
 {
@@ -427,7 +397,7 @@ vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
       f64 b4 = vlib_time_now_ticks (vm, before);
       vlib_worker_thread_barrier_check (vm, b4);
       /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */
-      // vlib_frame_queue_dequeue (vm->cpu_index, vm, nm);
+      // vlib_frame_queue_dequeue (vm->thread_index, vm, nm);
     }
 
   elt = fq->elts + (new_tail & (fq->nelts - 1));
@@ -497,6 +467,8 @@ vlib_worker_thread_bootstrap_fn (void *arg)
   w->lwp = syscall (SYS_gettid);
   w->thread_id = pthread_self ();
 
+  vlib_thread_index = w - vlib_worker_threads;
+
   rv = (void *) clib_calljmp
     ((uword (*)(uword)) w->thread_function,
      (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE);
@@ -610,7 +582,9 @@ start_workers (vlib_main_t * vm)
 		  mheap_alloc (0 /* use VM */ , tr->mheap_size);
 	      else
 		w->thread_mheap = main_heap;
-	      w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+
+	      w->thread_stack =
+		vlib_thread_stack_init (w - vlib_worker_threads);
 	      w->thread_function = tr->function;
 	      w->thread_function_arg = w;
 	      w->instance_id = k;
@@ -630,7 +604,7 @@ start_workers (vlib_main_t * vm)
 	      vm_clone = clib_mem_alloc (sizeof (*vm_clone));
 	      clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
 
-	      vm_clone->cpu_index = worker_thread_index;
+	      vm_clone->thread_index = worker_thread_index;
 	      vm_clone->heap_base = w->thread_mheap;
 	      vm_clone->mbuf_alloc_list = 0;
 	      vm_clone->init_functions_called =
@@ -679,7 +653,7 @@ start_workers (vlib_main_t * vm)
 	      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
 	      {
 		vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-		rt->cpu_index = vm_clone->cpu_index;
+		rt->thread_index = vm_clone->thread_index;
 		/* copy initial runtime_data from node */
 		if (n->runtime_data && n->runtime_data_bytes > 0)
 		  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -692,7 +666,7 @@ start_workers (vlib_main_t * vm)
 	      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
 	      {
 		vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-		rt->cpu_index = vm_clone->cpu_index;
+		rt->thread_index = vm_clone->thread_index;
 		/* copy initial runtime_data from node */
 		if (n->runtime_data && n->runtime_data_bytes > 0)
 		  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -756,7 +730,8 @@ start_workers (vlib_main_t * vm)
 		  mheap_alloc (0 /* use VM */ , tr->mheap_size);
 	      else
 		w->thread_mheap = main_heap;
-	      w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+	      w->thread_stack =
+		vlib_thread_stack_init (w - vlib_worker_threads);
 	      w->thread_function = tr->function;
 	      w->thread_function_arg = w;
 	      w->instance_id = j;
@@ -827,7 +802,7 @@ vlib_worker_thread_node_runtime_update (void)
 				  uword n_calls,
 				  uword n_vectors, uword n_clocks);
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (vec_len (vlib_mains) == 1)
     return;
@@ -835,7 +810,7 @@ vlib_worker_thread_node_runtime_update (void)
   vm = vlib_mains[0];
   nm = &vm->node_main;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
   ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
 
   /*
@@ -955,7 +930,7 @@ vlib_worker_thread_node_runtime_update (void)
       vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
       {
 	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-	rt->cpu_index = vm_clone->cpu_index;
+	rt->thread_index = vm_clone->thread_index;
 	/* copy runtime_data, will be overwritten later for existing rt */
 	if (n->runtime_data && n->runtime_data_bytes > 0)
 	  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -981,7 +956,7 @@ vlib_worker_thread_node_runtime_update (void)
       vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
       {
 	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-	rt->cpu_index = vm_clone->cpu_index;
+	rt->thread_index = vm_clone->thread_index;
 	/* copy runtime_data, will be overwritten later for existing rt */
 	if (n->runtime_data && n->runtime_data_bytes > 0)
 	  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -1180,7 +1155,7 @@ vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
   if (vlib_mains == 0)
     return;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
   vlib_worker_thread_barrier_sync (vm);
 
   switch (which)
@@ -1212,7 +1187,7 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
 
   vlib_worker_threads[0].barrier_sync_count++;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
 
@@ -1260,7 +1235,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 int
 vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm)
 {
-  u32 thread_id = vm->cpu_index;
+  u32 thread_id = vm->thread_index;
   vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
   vlib_frame_queue_elt_t *elt;
   u32 *from, *to;
@@ -1393,7 +1368,7 @@ vlib_worker_thread_fn (void *arg)
   vlib_main_t *vm = vlib_get_main ();
   clib_error_t *e;
 
-  ASSERT (vm->cpu_index == os_get_cpu_number ());
+  ASSERT (vm->thread_index == vlib_get_thread_index ());
 
   vlib_worker_thread_init (w);
   clib_time_init (&vm->clib_time);
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index eca4fc26..101d3d4a 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -153,8 +153,6 @@ typedef struct
 /* Called early, in thread 0's context */
 clib_error_t *vlib_thread_init (vlib_main_t * vm);
 
-vlib_worker_thread_t *vlib_alloc_thread (vlib_main_t * vm);
-
 int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
 			      u32 frame_queue_index, vlib_frame_t * frame,
 			      vlib_frame_queue_msg_type_t type);
@@ -183,12 +181,19 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 
+extern __thread uword vlib_thread_index;
+static_always_inline uword
+vlib_get_thread_index (void)
+{
+  return vlib_thread_index;
+}
+
 always_inline void
 vlib_smp_unsafe_warning (void)
 {
   if (CLIB_DEBUG > 0)
     {
-      if (os_get_cpu_number ())
+      if (vlib_get_thread_index ())
 	fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
     }
 }
@@ -331,21 +336,21 @@ vlib_num_workers ()
 }
 
 always_inline u32
-vlib_get_worker_cpu_index (u32 worker_index)
+vlib_get_worker_thread_index (u32 worker_index)
 {
   return worker_index + 1;
 }
 
 always_inline u32
-vlib_get_worker_index (u32 cpu_index)
+vlib_get_worker_index (u32 thread_index)
 {
-  return cpu_index - 1;
+  return thread_index - 1;
 }
 
 always_inline u32
 vlib_get_current_worker_index ()
 {
-  return os_get_cpu_number () - 1;
+  return vlib_get_thread_index () - 1;
 }
 
 static inline void
@@ -467,6 +472,8 @@ vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
   return elt;
 }
 
+u8 *vlib_thread_stack_init (uword thread_index);
+
 int vlib_thread_cb_register (struct vlib_main_t *vm,
 			     vlib_thread_callbacks_t * cb);
 
diff --git a/src/vlib/unix/cj.c b/src/vlib/unix/cj.c
index 33ba163a..7c1e9475 100644
--- a/src/vlib/unix/cj.c
+++ b/src/vlib/unix/cj.c
@@ -48,7 +48,7 @@ cj_log (u32 type, void *data0, void *data1)
 
   r = (cj_record_t *) & (cjm->records[new_tail & (cjm->num_records - 1)]);
   r->time = vlib_time_now (cjm->vlib_main);
-  r->cpu = os_get_cpu_number ();
+  r->thread_index = vlib_get_thread_index ();
   r->type = type;
   r->data[0] = pointer_to_uword (data0);
   r->data[1] = pointer_to_uword (data1);
@@ -133,7 +133,8 @@ static inline void
 cj_dump_one_record (cj_record_t * r)
 {
   fprintf (stderr, "[%d]: %10.6f T%02d %llx %llx\n",
-	   r->cpu, r->time, r->type, (long long unsigned int) r->data[0],
+	   r->thread_index, r->time, r->type,
+	   (long long unsigned int) r->data[0],
 	   (long long unsigned int) r->data[1]);
 }
 
@@ -161,7 +162,7 @@ cj_dump_internal (u8 filter0_enable, u64 filter0,
   index = (cjm->tail + 1) & (cjm->num_records - 1);
   r = &(cjm->records[index]);
 
-  if (r->cpu != (u32) ~ 0)
+  if (r->thread_index != (u32) ~ 0)
     {
       /* Yes, dump from tail + 1 to the end */
       for (i = index; i < cjm->num_records; i++)
diff --git a/src/vlib/unix/cj.h b/src/vlib/unix/cj.h
index 67626afe..d0a1d46e 100644
--- a/src/vlib/unix/cj.h
+++ b/src/vlib/unix/cj.h
@@ -23,7 +23,7 @@
 typedef struct
 {
   f64 time;
-  u32 cpu;
+  u32 thread_index;
   u32 type;
   u64 data[2];
 } cj_record_t;
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index 6b96cc0d..db5ddd64 100644
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -510,13 +510,28 @@ thread0 (uword arg)
   return i;
 }
 
+u8 *
+vlib_thread_stack_init (uword thread_index)
+{
+  vec_validate (vlib_thread_stacks, thread_index);
+  vlib_thread_stacks[thread_index] = clib_mem_alloc_aligned
+    (VLIB_THREAD_STACK_SIZE, VLIB_THREAD_STACK_SIZE);
+
+  /*
+   * Disallow writes to the bottom page of the stack, to
+   * catch stack overflows.
+   */
+  if (mprotect (vlib_thread_stacks[thread_index],
+		clib_mem_get_page_size (), PROT_READ) < 0)
+    clib_unix_warning ("thread stack");
+  return vlib_thread_stacks[thread_index];
+}
+
 int
 vlib_unix_main (int argc, char *argv[])
 {
   vlib_main_t *vm = &vlib_global_main;	/* one and only time for this! */
-  vlib_thread_main_t *tm = &vlib_thread_main;
   unformat_input_t input;
-  u8 *thread_stacks;
   clib_error_t *e;
   int i;
 
@@ -548,29 +563,9 @@ vlib_unix_main (int argc, char *argv[])
     }
   unformat_free (&input);
 
-  /*
-   * allocate n x VLIB_THREAD_STACK_SIZE stacks, aligned to a
-   * VLIB_THREAD_STACK_SIZE boundary
-   * See also: os_get_cpu_number() in vlib/vlib/threads.c
-   */
-  thread_stacks = clib_mem_alloc_aligned
-    ((uword) tm->n_thread_stacks * VLIB_THREAD_STACK_SIZE,
-     VLIB_THREAD_STACK_SIZE);
-
-  vec_validate (vlib_thread_stacks, tm->n_thread_stacks - 1);
-  for (i = 0; i < vec_len (vlib_thread_stacks); i++)
-    {
-      vlib_thread_stacks[i] = thread_stacks;
-
-      /*
-       * Disallow writes to the bottom page of the stack, to
-       * catch stack overflows.
-       */
-      if (mprotect (thread_stacks, clib_mem_get_page_size (), PROT_READ) < 0)
-	clib_unix_warning ("thread stack");
+  vlib_thread_stack_init (0);
 
-      thread_stacks += VLIB_THREAD_STACK_SIZE;
-    }
+  vlib_thread_index = 0;
 
   i = clib_calljmp (thread0, (uword) vm,
 		    (void *) (vlib_thread_stacks[0] +
diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c
index f68e54e0..20d70dd4 100644
--- a/src/vnet/adj/adj_l2.c
+++ b/src/vnet/adj/adj_l2.c
@@ -52,7 +52,7 @@ adj_l2_rewrite_inline (vlib_main_t * vm,
 {
     u32 * from = vlib_frame_vector_args (frame);
     u32 n_left_from, n_left_to_next, * to_next, next_index;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
     ethernet_main_t * em = &ethernet_main;
 
     n_left_from = frame->n_vectors;
@@ -93,7 +93,7 @@ adj_l2_rewrite_inline (vlib_main_t * vm,
             vnet_buffer(p0)->sw_if_index[VLIB_TX] = adj0->rewrite_header.sw_if_index;
 
 	    vlib_increment_combined_counter(&adjacency_counters,
-                                            cpu_index,
+                                            thread_index,
                                             adj_index0,
                                             /* packet increment */ 0,
                                             /* byte increment */ rw_len0);
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index e8087f08..5756de43 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -49,7 +49,7 @@ adj_midchain_tx_inline (vlib_main_t * vm,
     u32 next_index;
     vnet_main_t *vnm = vnet_get_main ();
     vnet_interface_main_t *im = &vnm->interface_main;
-    u32 cpu_index = vm->cpu_index;
+    u32 thread_index = vm->thread_index;
 
     /* Vector of buffer / pkt indices we're supposed to process */
     from = vlib_frame_vector_args (frame);
@@ -124,13 +124,13 @@ adj_midchain_tx_inline (vlib_main_t * vm,
 	    {
 		vlib_increment_combined_counter (im->combined_sw_if_counters
 						 + VNET_INTERFACE_COUNTER_TX,
-						 cpu_index,
+						 thread_index,
 						 adj0->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b0));
 		vlib_increment_combined_counter (im->combined_sw_if_counters
 						 + VNET_INTERFACE_COUNTER_TX,
-						 cpu_index,
+						 thread_index,
 						 adj1->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b1));
@@ -181,7 +181,7 @@ adj_midchain_tx_inline (vlib_main_t * vm,
 	    {
 		vlib_increment_combined_counter (im->combined_sw_if_counters
 						 + VNET_INTERFACE_COUNTER_TX,
-						 cpu_index,
+						 thread_index,
 						 adj0->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b0));
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
index 9a0f9d8b..128570b0 100644
--- a/src/vnet/adj/adj_nsh.c
+++ b/src/vnet/adj/adj_nsh.c
@@ -53,7 +53,7 @@ adj_nsh_rewrite_inline (vlib_main_t * vm,
 {
     u32 * from = vlib_frame_vector_args (frame);
     u32 n_left_from, n_left_to_next, * to_next, next_index;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
 
     n_left_from = frame->n_vectors;
     next_index = node->cached_next_index;
@@ -94,7 +94,7 @@ adj_nsh_rewrite_inline (vlib_main_t * vm,
             vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
 
             vlib_increment_combined_counter(&adjacency_counters,
-                                            cpu_index,
+                                            thread_index,
                                             adj_index0,
                                             /* packet increment */ 0,
                                             /* byte increment */ rw_len0);
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
index 98842a48..70a189b0 100644
--- a/src/vnet/classify/vnet_classify.c
+++ b/src/vnet/classify/vnet_classify.c
@@ -251,12 +251,12 @@ static inline void make_working_copy
   vnet_classify_entry_##size##_t * working_copy##size = 0;
   foreach_size_in_u32x4;
 #undef _
-  u32 cpu_number = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
 
-  if (cpu_number >= vec_len (t->working_copies))
+  if (thread_index >= vec_len (t->working_copies))
     {
       oldheap = clib_mem_set_heap (t->mheap);
-      vec_validate (t->working_copies, cpu_number);
+      vec_validate (t->working_copies, thread_index);
       clib_mem_set_heap (oldheap);
     }
 
@@ -265,7 +265,7 @@ static inline void make_working_copy
    * updates from multiple threads will not result in sporadic, spurious
    * lookup failures. 
    */
-  working_copy = t->working_copies[cpu_number];
+  working_copy = t->working_copies[thread_index];
 
   t->saved_bucket.as_u64 = b->as_u64;
   oldheap = clib_mem_set_heap (t->mheap);
@@ -290,7 +290,7 @@ static inline void make_working_copy
         default:
           abort();
         }
-      t->working_copies[cpu_number] = working_copy;
+      t->working_copies[thread_index] = working_copy;
     }
 
   _vec_len(working_copy) = (1<<b->log2_pages)*t->entries_per_page;
@@ -318,7 +318,7 @@ static inline void make_working_copy
   working_bucket.offset = vnet_classify_get_offset (t, working_copy);
   CLIB_MEMORY_BARRIER();
   b->as_u64 = working_bucket.as_u64;
-  t->working_copies[cpu_number] = working_copy;
+  t->working_copies[thread_index] = working_copy;
 }
 
 static vnet_classify_entry_t *
@@ -387,7 +387,7 @@ int vnet_classify_add_del (vnet_classify_table_t * t,
   int i;
   u64 hash, new_hash;
   u32 new_log2_pages;
-  u32 cpu_number = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u8 * key_minus_skip;
 
   ASSERT ((add_v->flags & VNET_CLASSIFY_ENTRY_FREE) == 0);
@@ -498,7 +498,7 @@ int vnet_classify_add_del (vnet_classify_table_t * t,
   new_log2_pages = t->saved_bucket.log2_pages + 1;
 
  expand_again:
-  working_copy = t->working_copies[cpu_number];
+  working_copy = t->working_copies[thread_index];
   new_v = split_and_rehash (t, working_copy, new_log2_pages);
 
   if (new_v == 0)
diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c
index 6ef3d7d7..1b5e336b 100644
--- a/src/vnet/cop/ip4_whitelist.c
+++ b/src/vnet/cop/ip4_whitelist.c
@@ -60,7 +60,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
   cop_feature_type_t next_index;
   cop_main_t *cm = &cop_main;
   vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -177,12 +177,12 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
           dpo1 = load_balance_get_bucket_i(lb1, 0);
 
           vlib_increment_combined_counter
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0)
                + sizeof(ethernet_header_t));
 
           vlib_increment_combined_counter
-              (vcm, cpu_index, lb_index1, 1,
+              (vcm, thread_index, lb_index1, 1,
                vlib_buffer_length_in_chain (vm, b1)
                + sizeof(ethernet_header_t));
 
@@ -273,7 +273,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
           dpo0 = load_balance_get_bucket_i(lb0, 0);
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0) 
                + sizeof(ethernet_header_t));
 
diff --git a/src/vnet/cop/ip6_whitelist.c b/src/vnet/cop/ip6_whitelist.c
index c2e16ccf..f3fe62e3 100644
--- a/src/vnet/cop/ip6_whitelist.c
+++ b/src/vnet/cop/ip6_whitelist.c
@@ -61,7 +61,7 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
   cop_main_t *cm = &cop_main;
   ip6_main_t * im6 = &ip6_main;
   vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -153,12 +153,12 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
           dpo1 = load_balance_get_bucket_i(lb1, 0);
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0) 
                + sizeof(ethernet_header_t));
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index1, 1,
+              (vcm, thread_index, lb_index1, 1,
                vlib_buffer_length_in_chain (vm, b1)
                + sizeof(ethernet_header_t));
 
@@ -233,7 +233,7 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
           dpo0 = load_balance_get_bucket_i(lb0, 0);
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0) 
                + sizeof(ethernet_header_t));
 
diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c
index ba337f3f..76980102 100644
--- a/src/vnet/devices/af_packet/node.c
+++ b/src/vnet/devices/af_packet/node.c
@@ -124,7 +124,7 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 frame_num = apif->rx_req->tp_frame_nr;
   u8 *block_start = apif->rx_ring + block * block_size;
   uword n_trace = vlib_get_trace_count (vm, node);
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
 							  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
   u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes;
@@ -132,15 +132,15 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   if (apif->per_interface_next_index != ~0)
     next_index = apif->per_interface_next_index;
 
-  n_free_bufs = vec_len (apm->rx_buffers[cpu_index]);
+  n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
   if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
     {
-      vec_validate (apm->rx_buffers[cpu_index],
+      vec_validate (apm->rx_buffers[thread_index],
 		    VLIB_FRAME_SIZE + n_free_bufs - 1);
       n_free_bufs +=
-	vlib_buffer_alloc (vm, &apm->rx_buffers[cpu_index][n_free_bufs],
+	vlib_buffer_alloc (vm, &apm->rx_buffers[thread_index][n_free_bufs],
 			   VLIB_FRAME_SIZE);
-      _vec_len (apm->rx_buffers[cpu_index]) = n_free_bufs;
+      _vec_len (apm->rx_buffers[thread_index]) = n_free_bufs;
     }
 
   rx_frame = apif->next_rx_frame;
@@ -163,11 +163,11 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	    {
 	      /* grab free buffer */
 	      u32 last_empty_buffer =
-		vec_len (apm->rx_buffers[cpu_index]) - 1;
+		vec_len (apm->rx_buffers[thread_index]) - 1;
 	      prev_bi0 = bi0;
-	      bi0 = apm->rx_buffers[cpu_index][last_empty_buffer];
+	      bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
 	      b0 = vlib_get_buffer (vm, bi0);
-	      _vec_len (apm->rx_buffers[cpu_index]) = last_empty_buffer;
+	      _vec_len (apm->rx_buffers[thread_index]) = last_empty_buffer;
 	      n_free_bufs--;
 
 	      /* copy data */
@@ -236,9 +236,9 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     os_get_cpu_number (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
+     vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
   return n_rx_packets;
 }
 
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
index 41645220..5e5e812c 100644
--- a/src/vnet/devices/devices.c
+++ b/src/vnet/devices/devices.c
@@ -104,7 +104,7 @@ vnet_device_queue_sort (void *a1, void *a2)
 
 void
 vnet_device_input_assign_thread (u32 hw_if_index,
-				 u16 queue_id, uword cpu_index)
+				 u16 queue_id, uword thread_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
   vnet_device_main_t *vdm = &vnet_device_main;
@@ -115,19 +115,19 @@ vnet_device_input_assign_thread (u32 hw_if_index,
 
   ASSERT (hw->input_node_index > 0);
 
-  if (vdm->first_worker_cpu_index == 0)
-    cpu_index = 0;
+  if (vdm->first_worker_thread_index == 0)
+    thread_index = 0;
 
-  if (cpu_index != 0 &&
-      (cpu_index < vdm->first_worker_cpu_index ||
-       cpu_index > vdm->last_worker_cpu_index))
+  if (thread_index != 0 &&
+      (thread_index < vdm->first_worker_thread_index ||
+       thread_index > vdm->last_worker_thread_index))
     {
-      cpu_index = vdm->next_worker_cpu_index++;
-      if (vdm->next_worker_cpu_index > vdm->last_worker_cpu_index)
-	vdm->next_worker_cpu_index = vdm->first_worker_cpu_index;
+      thread_index = vdm->next_worker_thread_index++;
+      if (vdm->next_worker_thread_index > vdm->last_worker_thread_index)
+	vdm->next_worker_thread_index = vdm->first_worker_thread_index;
     }
 
-  vm = vlib_mains[cpu_index];
+  vm = vlib_mains[thread_index];
   rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
 
   vec_add2 (rt->devices_and_queues, dq, 1);
@@ -136,33 +136,33 @@ vnet_device_input_assign_thread (u32 hw_if_index,
   dq->queue_id = queue_id;
 
   vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort);
-  vec_validate (hw->input_node_cpu_index_by_queue, queue_id);
-  hw->input_node_cpu_index_by_queue[queue_id] = cpu_index;
+  vec_validate (hw->input_node_thread_index_by_queue, queue_id);
+  hw->input_node_thread_index_by_queue[queue_id] = thread_index;
 }
 
 static int
 vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id,
-				   uword cpu_index)
+				   uword thread_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
   vnet_device_input_runtime_t *rt;
   vnet_device_and_queue_t *dq;
-  uword old_cpu_index;
+  uword old_thread_index;
 
-  if (hw->input_node_cpu_index_by_queue == 0)
+  if (hw->input_node_thread_index_by_queue == 0)
     return VNET_API_ERROR_INVALID_INTERFACE;
 
-  if (vec_len (hw->input_node_cpu_index_by_queue) < queue_id + 1)
+  if (vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1)
     return VNET_API_ERROR_INVALID_INTERFACE;
 
-  old_cpu_index = hw->input_node_cpu_index_by_queue[queue_id];
+  old_thread_index = hw->input_node_thread_index_by_queue[queue_id];
 
-  if (old_cpu_index == cpu_index)
+  if (old_thread_index == thread_index)
     return 0;
 
   rt =
-    vlib_node_get_runtime_data (vlib_mains[old_cpu_index],
+    vlib_node_get_runtime_data (vlib_mains[old_thread_index],
 				hw->input_node_index);
 
   vec_foreach (dq, rt->devices_and_queues)
@@ -240,7 +240,7 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input,
   vnet_device_main_t *vdm = &vnet_device_main;
   u32 hw_if_index = (u32) ~ 0;
   u32 queue_id = (u32) 0;
-  u32 cpu_index = (u32) ~ 0;
+  u32 thread_index = (u32) ~ 0;
   int rv;
 
   if (!unformat_user (input, unformat_line_input, line_input))
@@ -253,10 +253,10 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input,
 	;
       else if (unformat (line_input, "queue %d", &queue_id))
 	;
-      else if (unformat (line_input, "main", &cpu_index))
-	cpu_index = 0;
-      else if (unformat (line_input, "worker %d", &cpu_index))
-	cpu_index += vdm->first_worker_cpu_index;
+      else if (unformat (line_input, "main", &thread_index))
+	thread_index = 0;
+      else if (unformat (line_input, "worker %d", &thread_index))
+	thread_index += vdm->first_worker_thread_index;
       else
 	{
 	  error = clib_error_return (0, "parse error: '%U'",
@@ -271,16 +271,17 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input,
   if (hw_if_index == (u32) ~ 0)
     return clib_error_return (0, "please specify valid interface name");
 
-  if (cpu_index > vdm->last_worker_cpu_index)
+  if (thread_index > vdm->last_worker_thread_index)
     return clib_error_return (0,
 			      "please specify valid worker thread or main");
 
-  rv = vnet_device_input_unassign_thread (hw_if_index, queue_id, cpu_index);
+  rv =
+    vnet_device_input_unassign_thread (hw_if_index, queue_id, thread_index);
 
   if (rv)
     return clib_error_return (0, "not found");
 
-  vnet_device_input_assign_thread (hw_if_index, queue_id, cpu_index);
+  vnet_device_input_assign_thread (hw_if_index, queue_id, thread_index);
 
   return 0;
 }
@@ -326,9 +327,9 @@ vnet_device_init (vlib_main_t * vm)
   tr = p ? (vlib_thread_registration_t *) p[0] : 0;
   if (tr && tr->count > 0)
     {
-      vdm->first_worker_cpu_index = tr->first_index;
-      vdm->next_worker_cpu_index = tr->first_index;
-      vdm->last_worker_cpu_index = tr->first_index + tr->count - 1;
+      vdm->first_worker_thread_index = tr->first_index;
+      vdm->next_worker_thread_index = tr->first_index;
+      vdm->last_worker_thread_index = tr->first_index + tr->count - 1;
     }
   return 0;
 }
diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h
index bbb29fe3..966f8302 100644
--- a/src/vnet/devices/devices.h
+++ b/src/vnet/devices/devices.h
@@ -50,9 +50,9 @@ typedef struct
 typedef struct
 {
   vnet_device_per_worker_data_t *workers;
-  uword first_worker_cpu_index;
-  uword last_worker_cpu_index;
-  uword next_worker_cpu_index;
+  uword first_worker_thread_index;
+  uword last_worker_thread_index;
+  uword next_worker_thread_index;
 } vnet_device_main_t;
 
 typedef struct
@@ -80,7 +80,7 @@ vnet_set_device_input_node (u32 hw_if_index, u32 node_index)
 }
 
 void vnet_device_input_assign_thread (u32 hw_if_index, u16 queue_id,
-				      uword cpu_index);
+				      uword thread_index);
 
 static inline u64
 vnet_get_aggregate_rx_packets (void)
@@ -95,12 +95,12 @@ vnet_get_aggregate_rx_packets (void)
 }
 
 static inline void
-vnet_device_increment_rx_packets (u32 cpu_index, u64 count)
+vnet_device_increment_rx_packets (u32 thread_index, u64 count)
 {
   vnet_device_main_t *vdm = &vnet_device_main;
   vnet_device_per_worker_data_t *pwd;
 
-  pwd = vec_elt_at_index (vdm->workers, cpu_index);
+  pwd = vec_elt_at_index (vdm->workers, thread_index);
   pwd->aggregate_rx_packets += count;
 }
 
@@ -117,9 +117,9 @@ vnet_device_input_set_interrupt_pending (vnet_main_t * vnm, u32 hw_if_index,
 {
   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
 
-  ASSERT (queue_id < vec_len (hw->input_node_cpu_index_by_queue));
-  u32 cpu_index = hw->input_node_cpu_index_by_queue[queue_id];
-  vlib_node_set_interrupt_pending (vlib_mains[cpu_index],
+  ASSERT (queue_id < vec_len (hw->input_node_thread_index_by_queue));
+  u32 thread_index = hw->input_node_thread_index_by_queue[queue_id];
+  vlib_node_set_interrupt_pending (vlib_mains[thread_index],
 				   hw->input_node_index);
 }
 
diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c
index 68ea7832..e120eeae 100644
--- a/src/vnet/devices/netmap/node.c
+++ b/src/vnet/devices/netmap/node.c
@@ -98,22 +98,22 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 n_free_bufs;
   struct netmap_ring *ring;
   int cur_ring;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
 							  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
 
   if (nif->per_interface_next_index != ~0)
     next_index = nif->per_interface_next_index;
 
-  n_free_bufs = vec_len (nm->rx_buffers[cpu_index]);
+  n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
   if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
     {
-      vec_validate (nm->rx_buffers[cpu_index],
+      vec_validate (nm->rx_buffers[thread_index],
 		    VLIB_FRAME_SIZE + n_free_bufs - 1);
       n_free_bufs +=
-	vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs],
+	vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
 			   VLIB_FRAME_SIZE);
-      _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs;
+      _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs;
     }
 
   cur_ring = nif->first_rx_ring;
@@ -163,11 +163,11 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 		  vlib_buffer_t *b0;
 		  /* grab free buffer */
 		  u32 last_empty_buffer =
-		    vec_len (nm->rx_buffers[cpu_index]) - 1;
+		    vec_len (nm->rx_buffers[thread_index]) - 1;
 		  prev_bi0 = bi0;
-		  bi0 = nm->rx_buffers[cpu_index][last_empty_buffer];
+		  bi0 = nm->rx_buffers[thread_index][last_empty_buffer];
 		  b0 = vlib_get_buffer (vm, bi0);
-		  _vec_len (nm->rx_buffers[cpu_index]) = last_empty_buffer;
+		  _vec_len (nm->rx_buffers[thread_index]) = last_empty_buffer;
 		  n_free_bufs--;
 
 		  /* copy data */
@@ -247,9 +247,9 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     os_get_cpu_number (), nif->hw_if_index, n_rx_packets, n_rx_bytes);
+     vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
 
   return n_rx_packets;
 }
@@ -260,7 +260,7 @@ netmap_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   int i;
   u32 n_rx_packets = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   netmap_main_t *nm = &netmap_main;
   netmap_if_t *nmi;
 
@@ -269,7 +269,7 @@ netmap_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
       nmi = vec_elt_at_index (nm->interfaces, i);
       if (nmi->is_admin_up &&
 	  (i % nm->input_cpu_count) ==
-	  (cpu_index - nm->input_cpu_first_index))
+	  (thread_index - nm->input_cpu_first_index))
 	n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi);
     }
 
diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c
index a6c9dfd7..539b4161 100644
--- a/src/vnet/devices/ssvm/node.c
+++ b/src/vnet/devices/ssvm/node.c
@@ -89,7 +89,7 @@ ssvm_eth_device_input (ssvm_eth_main_t * em,
   ethernet_header_t *eh0;
   u16 type0;
   u32 n_rx_bytes = 0, l3_offset0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 trace_cnt __attribute__ ((unused)) = vlib_get_trace_count (vm, node);
   volatile u32 *lock;
   u32 *elt_indices;
@@ -284,10 +284,10 @@ out:
 
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
-     + VNET_INTERFACE_COUNTER_RX, cpu_index,
+     + VNET_INTERFACE_COUNTER_RX, thread_index,
      intfc->vlib_hw_if_index, rx_queue_index, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, rx_queue_index);
+  vnet_device_increment_rx_packets (thread_index, rx_queue_index);
 
   return rx_queue_index;
 }
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 00807dc0..5e720f65 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -331,7 +331,7 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
 {
   //Let's try to assign one queue to each thread
   u32 qid = 0;
-  u32 cpu_index = 0;
+  u32 thread_index = 0;
   vui->use_tx_spinlock = 0;
   while (1)
     {
@@ -341,20 +341,21 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
 	  if (!rxvq->started || !rxvq->enabled)
 	    continue;
 
-	  vui->per_cpu_tx_qid[cpu_index] = qid;
-	  cpu_index++;
-	  if (cpu_index == vlib_get_thread_main ()->n_vlib_mains)
+	  vui->per_cpu_tx_qid[thread_index] = qid;
+	  thread_index++;
+	  if (thread_index == vlib_get_thread_main ()->n_vlib_mains)
 	    return;
 	}
       //We need to loop, meaning the spinlock has to be used
       vui->use_tx_spinlock = 1;
-      if (cpu_index == 0)
+      if (thread_index == 0)
 	{
 	  //Could not find a single valid one
-	  for (cpu_index = 0;
-	       cpu_index < vlib_get_thread_main ()->n_vlib_mains; cpu_index++)
+	  for (thread_index = 0;
+	       thread_index < vlib_get_thread_main ()->n_vlib_mains;
+	       thread_index++)
 	    {
-	      vui->per_cpu_tx_qid[cpu_index] = 0;
+	      vui->per_cpu_tx_qid[thread_index] = 0;
 	    }
 	  return;
 	}
@@ -368,7 +369,7 @@ vhost_user_rx_thread_placement ()
   vhost_user_intf_t *vui;
   vhost_cpu_t *vhc;
   u32 *workers = 0;
-  u32 cpu_index;
+  u32 thread_index;
   vlib_main_t *vm;
 
   //Let's list all workers cpu indexes
@@ -400,9 +401,9 @@ vhost_user_rx_thread_placement ()
 	    continue;
 
 	  i %= vec_len (vui_workers);
-	  cpu_index = vui_workers[i];
+	  thread_index = vui_workers[i];
 	  i++;
-	  vhc = &vum->cpus[cpu_index];
+	  vhc = &vum->cpus[thread_index];
 
 	  iaq.qid = qid;
 	  iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
@@ -429,14 +430,14 @@ vhost_user_rx_thread_placement ()
     vhc->operation_mode = mode;
   }
 
-  for (cpu_index = vum->input_cpu_first_index;
-       cpu_index < vum->input_cpu_first_index + vum->input_cpu_count;
-       cpu_index++)
+  for (thread_index = vum->input_cpu_first_index;
+       thread_index < vum->input_cpu_first_index + vum->input_cpu_count;
+       thread_index++)
     {
       vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
 
-      vhc = &vum->cpus[cpu_index];
-      vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+      vhc = &vum->cpus[thread_index];
+      vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
       switch (vhc->operation_mode)
 	{
 	case VHOST_USER_INTERRUPT_MODE:
@@ -532,7 +533,7 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
 {
   vhost_user_main_t *vum = &vhost_user_main;
   vhost_cpu_t *vhc;
-  u32 cpu_index;
+  u32 thread_index;
   vhost_iface_and_queue_t *vhiq;
   vlib_main_t *vm;
   u32 ifq2;
@@ -553,8 +554,8 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
 	  if ((vhiq->vhost_iface_index == (ifq >> 8)) &&
 	      (VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff)))
 	    {
-	      cpu_index = vhc - vum->cpus;
-	      vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+	      thread_index = vhc - vum->cpus;
+	      vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
 	      /*
 	       * Convert RX virtqueue number in the lower byte to vring
 	       * queue index for the input node process. Top bytes contain
@@ -1592,7 +1593,7 @@ vhost_user_if_input (vlib_main_t * vm,
   u32 n_trace = vlib_get_trace_count (vm, node);
   u16 qsz_mask;
   u32 map_hint = 0;
-  u16 cpu_index = os_get_cpu_number ();
+  u16 thread_index = vlib_get_thread_index ();
   u16 copy_len = 0;
 
   {
@@ -1651,32 +1652,32 @@ vhost_user_if_input (vlib_main_t * vm,
    * in the loop and come back later. This is not an issue as for big packet,
    * processing cost really comes from the memory copy.
    */
-  if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len < n_left + 1))
+  if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1))
     {
-      u32 curr_len = vum->cpus[cpu_index].rx_buffers_len;
-      vum->cpus[cpu_index].rx_buffers_len +=
+      u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
+      vum->cpus[thread_index].rx_buffers_len +=
 	vlib_buffer_alloc_from_free_list (vm,
-					  vum->cpus[cpu_index].rx_buffers +
+					  vum->cpus[thread_index].rx_buffers +
 					  curr_len,
 					  VHOST_USER_RX_BUFFERS_N - curr_len,
 					  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
 
       if (PREDICT_FALSE
-	  (vum->cpus[cpu_index].rx_buffers_len <
+	  (vum->cpus[thread_index].rx_buffers_len <
 	   VHOST_USER_RX_BUFFER_STARVATION))
 	{
 	  /* In case of buffer starvation, discard some packets from the queue
 	   * and log the event.
 	   * We keep doing best effort for the remaining packets. */
-	  u32 flush = (n_left + 1 > vum->cpus[cpu_index].rx_buffers_len) ?
-	    n_left + 1 - vum->cpus[cpu_index].rx_buffers_len : 1;
+	  u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
+	    n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
 	  flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
 
 	  n_left -= flush;
 	  vlib_increment_simple_counter (vnet_main.
 					 interface_main.sw_if_counters +
 					 VNET_INTERFACE_COUNTER_DROP,
-					 os_get_cpu_number (),
+					 vlib_get_thread_index (),
 					 vui->sw_if_index, flush);
 
 	  vlib_error_count (vm, vhost_user_input_node.index,
@@ -1696,7 +1697,7 @@ vhost_user_if_input (vlib_main_t * vm,
 	  u32 desc_data_offset;
 	  vring_desc_t *desc_table = txvq->desc;
 
-	  if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len <= 1))
+	  if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
 	    {
 	      /* Not enough rx_buffers
 	       * Note: We yeld on 1 so we don't need to do an additional
@@ -1707,17 +1708,18 @@ vhost_user_if_input (vlib_main_t * vm,
 	    }
 
 	  desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
-	  vum->cpus[cpu_index].rx_buffers_len--;
-	  bi_current = (vum->cpus[cpu_index].rx_buffers)
-	    [vum->cpus[cpu_index].rx_buffers_len];
+	  vum->cpus[thread_index].rx_buffers_len--;
+	  bi_current = (vum->cpus[thread_index].rx_buffers)
+	    [vum->cpus[thread_index].rx_buffers_len];
 	  b_head = b_current = vlib_get_buffer (vm, bi_current);
 	  to_next[0] = bi_current;	//We do that now so we can forget about bi_current
 	  to_next++;
 	  n_left_to_next--;
 
 	  vlib_prefetch_buffer_with_index (vm,
-					   (vum->cpus[cpu_index].rx_buffers)
-					   [vum->cpus[cpu_index].
+					   (vum->
+					    cpus[thread_index].rx_buffers)
+					   [vum->cpus[thread_index].
 					    rx_buffers_len - 1], LOAD);
 
 	  /* Just preset the used descriptor id and length for later */
@@ -1791,7 +1793,7 @@ vhost_user_if_input (vlib_main_t * vm,
 		  (b_current->current_length == VLIB_BUFFER_DATA_SIZE))
 		{
 		  if (PREDICT_FALSE
-		      (vum->cpus[cpu_index].rx_buffers_len == 0))
+		      (vum->cpus[thread_index].rx_buffers_len == 0))
 		    {
 		      /* Cancel speculation */
 		      to_next--;
@@ -1805,17 +1807,18 @@ vhost_user_if_input (vlib_main_t * vm,
 		       * but valid.
 		       */
 		      vhost_user_input_rewind_buffers (vm,
-						       &vum->cpus[cpu_index],
+						       &vum->cpus
+						       [thread_index],
 						       b_head);
 		      n_left = 0;
 		      goto stop;
 		    }
 
 		  /* Get next output */
-		  vum->cpus[cpu_index].rx_buffers_len--;
+		  vum->cpus[thread_index].rx_buffers_len--;
 		  u32 bi_next =
-		    (vum->cpus[cpu_index].rx_buffers)[vum->cpus
-						      [cpu_index].rx_buffers_len];
+		    (vum->cpus[thread_index].rx_buffers)[vum->cpus
+							 [thread_index].rx_buffers_len];
 		  b_current->next_buffer = bi_next;
 		  b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
 		  bi_current = bi_next;
@@ -1823,7 +1826,7 @@ vhost_user_if_input (vlib_main_t * vm,
 		}
 
 	      /* Prepare a copy order executed later for the data */
-	      vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+	      vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
 	      copy_len++;
 	      u32 desc_data_l =
 		desc_table[desc_current].len - desc_data_offset;
@@ -1880,7 +1883,7 @@ vhost_user_if_input (vlib_main_t * vm,
 	  if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
 	    {
 	      if (PREDICT_FALSE
-		  (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+		  (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
 					  copy_len, &map_hint)))
 		{
 		  clib_warning
@@ -1905,7 +1908,7 @@ vhost_user_if_input (vlib_main_t * vm,
 
   /* Do the memory copies */
   if (PREDICT_FALSE
-      (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+      (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
 			      copy_len, &map_hint)))
     {
       clib_warning ("Memory mapping error on interface hw_if_index=%d "
@@ -1933,9 +1936,9 @@ vhost_user_if_input (vlib_main_t * vm,
   vlib_increment_combined_counter
     (vnet_main.interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
+     vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
 
   return n_rx_packets;
 }
@@ -1946,15 +1949,15 @@ vhost_user_input (vlib_main_t * vm,
 {
   vhost_user_main_t *vum = &vhost_user_main;
   uword n_rx_packets = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   vhost_iface_and_queue_t *vhiq;
   vhost_user_intf_t *vui;
   vhost_cpu_t *vhc;
 
-  vhc = &vum->cpus[cpu_index];
+  vhc = &vum->cpus[thread_index];
   if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
     {
-      vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+      vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
       {
 	vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
 	n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
@@ -2096,7 +2099,7 @@ vhost_user_tx (vlib_main_t * vm,
   vhost_user_vring_t *rxvq;
   u16 qsz_mask;
   u8 error;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 map_hint = 0;
   u8 retry = 8;
   u16 copy_len;
@@ -2116,7 +2119,7 @@ vhost_user_tx (vlib_main_t * vm,
 
   qid =
     VHOST_VRING_IDX_RX (*vec_elt_at_index
-			(vui->per_cpu_tx_qid, os_get_cpu_number ()));
+			(vui->per_cpu_tx_qid, vlib_get_thread_index ()));
   rxvq = &vui->vrings[qid];
   if (PREDICT_FALSE (vui->use_tx_spinlock))
     vhost_user_vring_lock (vui, qid);
@@ -2143,10 +2146,10 @@ retry:
 
       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	{
-	  vum->cpus[cpu_index].current_trace =
+	  vum->cpus[thread_index].current_trace =
 	    vlib_add_trace (vm, node, b0,
-			    sizeof (*vum->cpus[cpu_index].current_trace));
-	  vhost_user_tx_trace (vum->cpus[cpu_index].current_trace,
+			    sizeof (*vum->cpus[thread_index].current_trace));
+	  vhost_user_tx_trace (vum->cpus[thread_index].current_trace,
 			       vui, qid / 2, b0, rxvq);
 	}
 
@@ -2188,14 +2191,14 @@ retry:
       {
 	// Get a header from the header array
 	virtio_net_hdr_mrg_rxbuf_t *hdr =
-	  &vum->cpus[cpu_index].tx_headers[tx_headers_len];
+	  &vum->cpus[thread_index].tx_headers[tx_headers_len];
 	tx_headers_len++;
 	hdr->hdr.flags = 0;
 	hdr->hdr.gso_type = 0;
 	hdr->num_buffers = 1;	//This is local, no need to check
 
 	// Prepare a copy order executed later for the header
-	vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+	vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
 	copy_len++;
 	cpy->len = vui->virtio_net_hdr_sz;
 	cpy->dst = buffer_map_addr;
@@ -2220,7 +2223,7 @@ retry:
 	      else if (vui->virtio_net_hdr_sz == 12)	//MRG is available
 		{
 		  virtio_net_hdr_mrg_rxbuf_t *hdr =
-		    &vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+		    &vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
 
 		  //Move from available to used buffer
 		  rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id =
@@ -2282,7 +2285,7 @@ retry:
 	    }
 
 	  {
-	    vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+	    vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
 	    copy_len++;
 	    cpy->len = bytes_left;
 	    cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
@@ -2325,8 +2328,8 @@ retry:
 
       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	{
-	  vum->cpus[cpu_index].current_trace->hdr =
-	    vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+	  vum->cpus[thread_index].current_trace->hdr =
+	    vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
 	}
 
       n_left--;			//At the end for error counting when 'goto done' is invoked
@@ -2336,7 +2339,7 @@ retry:
 done:
   //Do the memory copies
   if (PREDICT_FALSE
-      (vhost_user_tx_copy (vui, vum->cpus[cpu_index].copy,
+      (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
 			   copy_len, &map_hint)))
     {
       clib_warning ("Memory mapping error on interface hw_if_index=%d "
@@ -2386,7 +2389,7 @@ done3:
       vlib_increment_simple_counter
 	(vnet_main.interface_main.sw_if_counters
 	 + VNET_INTERFACE_COUNTER_DROP,
-	 os_get_cpu_number (), vui->sw_if_index, n_left);
+	 vlib_get_thread_index (), vui->sw_if_index, n_left);
     }
 
   vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
@@ -2773,11 +2776,11 @@ vhost_user_send_interrupt_process (vlib_main_t * vm,
 	case ~0:
 	  vec_foreach (vhc, vum->cpus)
 	  {
-	    u32 cpu_index = vhc - vum->cpus;
+	    u32 thread_index = vhc - vum->cpus;
 	    f64 next_timeout;
 
 	    next_timeout = timeout;
-	    vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+	    vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
 	    {
 	      vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
 	      vhost_user_vring_t *rxvq =
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index e94e871c..97ad0a44 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -266,7 +266,7 @@ lookup_dpo_ip4_inline (vlib_main_t * vm,
                        int table_from_interface)
 {
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
     vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
 
     from = vlib_frame_vector_args (from_frame);
@@ -407,10 +407,10 @@ lookup_dpo_ip4_inline (vlib_main_t * vm,
 	    vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi1, 1,
+		(cm, thread_index, lbi1, 1,
 		 vlib_buffer_length_in_chain (vm, b1));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -511,7 +511,7 @@ lookup_dpo_ip4_inline (vlib_main_t * vm,
 	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -606,7 +606,7 @@ lookup_dpo_ip6_inline (vlib_main_t * vm,
 {
     vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
 
     from = vlib_frame_vector_args (from_frame);
     n_left_from = from_frame->n_vectors;
@@ -749,10 +749,10 @@ lookup_dpo_ip6_inline (vlib_main_t * vm,
 	    vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi1, 1,
+		(cm, thread_index, lbi1, 1,
 		 vlib_buffer_length_in_chain (vm, b1));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -853,7 +853,7 @@ lookup_dpo_ip6_inline (vlib_main_t * vm,
 	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -930,7 +930,7 @@ lookup_dpo_mpls_inline (vlib_main_t * vm,
                        int table_from_interface)
 {
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
     vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
 
     from = vlib_frame_vector_args (from_frame);
@@ -994,7 +994,7 @@ lookup_dpo_mpls_inline (vlib_main_t * vm,
             vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
             vlib_increment_combined_counter
-                (cm, cpu_index, lbi0, 1,
+                (cm, thread_index, lbi0, 1,
                  vlib_buffer_length_in_chain (vm, b0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index a9f334be..e25ceae9 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -627,7 +627,7 @@ replicate_inline (vlib_main_t * vm,
     vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
     replicate_main_t * rm = &replicate_main;
     u32 n_left_from, * from, * to_next, next_index;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
 
     from = vlib_frame_vector_args (frame);
     n_left_from = frame->n_vectors;
@@ -657,12 +657,12 @@ replicate_inline (vlib_main_t * vm,
             rep0 = replicate_get(repi0);
 
             vlib_increment_combined_counter(
-                cm, cpu_index, repi0, 1,
+                cm, thread_index, repi0, 1,
                 vlib_buffer_length_in_chain(vm, b0));
 
-	    vec_validate (rm->clones[cpu_index], rep0->rep_n_buckets - 1);
+	    vec_validate (rm->clones[thread_index], rep0->rep_n_buckets - 1);
 
-	    num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[cpu_index], rep0->rep_n_buckets, 128);
+	    num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[thread_index], rep0->rep_n_buckets, 128);
 
 	    if (num_cloned != rep0->rep_n_buckets)
 	      {
@@ -673,7 +673,7 @@ replicate_inline (vlib_main_t * vm,
 
             for (bucket = 0; bucket < num_cloned; bucket++)
             {
-                ci0 = rm->clones[cpu_index][bucket];
+                ci0 = rm->clones[thread_index][bucket];
                 c0 = vlib_get_buffer(vm, ci0);
 
                 to_next[0] = ci0;
@@ -700,7 +700,7 @@ replicate_inline (vlib_main_t * vm,
 		    vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 		  }
             }
-	    vec_reset_length (rm->clones[cpu_index]);
+	    vec_reset_length (rm->clones[thread_index]);
         }
 
         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
index ee757505..c74a097e 100644
--- a/src/vnet/ethernet/arp.c
+++ b/src/vnet/ethernet/arp.c
@@ -1771,7 +1771,7 @@ set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
 				    * a)
 {
   vnet_main_t *vm = vnet_get_main ();
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 9894e3c8..335e3f9f 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -362,7 +362,7 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
   u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT;
   u32 i, next_node_index, bvi_flag, sw_if_index;
   u32 n_pkts = 0, n_bytes = 0;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   vnet_main_t *vnm = vnet_get_main ();
   vnet_interface_main_t *im = &vnm->interface_main;
   vlib_node_main_t *nm = &vm->node_main;
@@ -420,8 +420,9 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
 
       /* increment TX interface stat */
       vlib_increment_combined_counter (im->combined_sw_if_counters +
-				       VNET_INTERFACE_COUNTER_TX, cpu_index,
-				       sw_if_index, n_pkts, n_bytes);
+				       VNET_INTERFACE_COUNTER_TX,
+				       thread_index, sw_if_index, n_pkts,
+				       n_bytes);
     }
 
   return n_left_from;
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index b699e381..f7787ed2 100755
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -291,7 +291,7 @@ ethernet_input_inline (vlib_main_t * vm,
   vlib_node_runtime_t *error_node;
   u32 n_left_from, next_index, *from, *to_next;
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 cached_sw_if_index = ~0;
   u32 cached_is_l2 = 0;		/* shut up gcc */
   vnet_hw_interface_t *hi = NULL;	/* used for main interface only */
@@ -510,7 +510,7 @@ ethernet_input_inline (vlib_main_t * vm,
 						     interface_main.combined_sw_if_counters
 						     +
 						     VNET_INTERFACE_COUNTER_RX,
-						     cpu_index,
+						     thread_index,
 						     new_sw_if_index0, 1,
 						     len0);
 		  if (new_sw_if_index1 != old_sw_if_index1
@@ -519,7 +519,7 @@ ethernet_input_inline (vlib_main_t * vm,
 						     interface_main.combined_sw_if_counters
 						     +
 						     VNET_INTERFACE_COUNTER_RX,
-						     cpu_index,
+						     thread_index,
 						     new_sw_if_index1, 1,
 						     len1);
 
@@ -530,7 +530,7 @@ ethernet_input_inline (vlib_main_t * vm,
 			  vlib_increment_combined_counter
 			    (vnm->interface_main.combined_sw_if_counters
 			     + VNET_INTERFACE_COUNTER_RX,
-			     cpu_index,
+			     thread_index,
 			     stats_sw_if_index,
 			     stats_n_packets, stats_n_bytes);
 			  stats_n_packets = stats_n_bytes = 0;
@@ -696,13 +696,13 @@ ethernet_input_inline (vlib_main_t * vm,
 		    vlib_increment_combined_counter
 		      (vnm->interface_main.combined_sw_if_counters
 		       + VNET_INTERFACE_COUNTER_RX,
-		       cpu_index, new_sw_if_index0, 1, len0);
+		       thread_index, new_sw_if_index0, 1, len0);
 		  if (stats_n_packets > 0)
 		    {
 		      vlib_increment_combined_counter
 			(vnm->interface_main.combined_sw_if_counters
 			 + VNET_INTERFACE_COUNTER_RX,
-			 cpu_index,
+			 thread_index,
 			 stats_sw_if_index, stats_n_packets, stats_n_bytes);
 		      stats_n_packets = stats_n_bytes = 0;
 		    }
@@ -734,7 +734,7 @@ ethernet_input_inline (vlib_main_t * vm,
       vlib_increment_combined_counter
 	(vnm->interface_main.combined_sw_if_counters
 	 + VNET_INTERFACE_COUNTER_RX,
-	 cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
       node->runtime_data[0] = stats_sw_if_index;
     }
 
diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c
index 2683586e..acf15f24 100644
--- a/src/vnet/gre/node.c
+++ b/src/vnet/gre/node.c
@@ -75,7 +75,7 @@ gre_input (vlib_main_t * vm,
   u64 cached_tunnel_key6[4];
   u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index = 0;
 
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 len;
   vnet_interface_main_t *im = &gm->vnet_main->interface_main;
 
@@ -257,7 +257,7 @@ gre_input (vlib_main_t * vm,
           len = vlib_buffer_length_in_chain (vm, b0);
           vlib_increment_combined_counter (im->combined_sw_if_counters
                                            + VNET_INTERFACE_COUNTER_RX,
-                                           cpu_index,
+                                           thread_index,
                                            tunnel_sw_if_index,
                                            1 /* packets */,
                                            len /* bytes */);
@@ -324,7 +324,7 @@ drop0:
           len = vlib_buffer_length_in_chain (vm, b1);
           vlib_increment_combined_counter (im->combined_sw_if_counters
                                            + VNET_INTERFACE_COUNTER_RX,
-                                           cpu_index,
+                                           thread_index,
                                            tunnel_sw_if_index,
                                            1 /* packets */,
                                            len /* bytes */);
@@ -502,7 +502,7 @@ drop1:
           len = vlib_buffer_length_in_chain (vm, b0);
           vlib_increment_combined_counter (im->combined_sw_if_counters
                                            + VNET_INTERFACE_COUNTER_RX,
-                                           cpu_index,
+                                           thread_index,
                                            tunnel_sw_if_index,
                                            1 /* packets */,
                                            len /* bytes */);
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index a1ea2d61..08f08b10 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -468,7 +468,7 @@ typedef struct vnet_hw_interface_t
   u32 input_node_index;
 
   /* input node cpu index by queue */
-  u32 *input_node_cpu_index_by_queue;
+  u32 *input_node_thread_index_by_queue;
 
 } vnet_hw_interface_t;
 
diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c
index 03f2cdca..663dc309 100644
--- a/src/vnet/interface_output.c
+++ b/src/vnet/interface_output.c
@@ -196,7 +196,7 @@ slow_path (vlib_main_t * vm,
  */
 static_always_inline void
 incr_output_stats (vnet_main_t * vnm,
-		   u32 cpu_index,
+		   u32 thread_index,
 		   u32 length,
 		   u32 sw_if_index,
 		   u32 * last_sw_if_index, u32 * n_packets, u32 * n_bytes)
@@ -216,7 +216,7 @@ incr_output_stats (vnet_main_t * vnm,
 
 	  vlib_increment_combined_counter (im->combined_sw_if_counters
 					   + VNET_INTERFACE_COUNTER_TX,
-					   cpu_index,
+					   thread_index,
 					   *last_sw_if_index,
 					   *n_packets, *n_bytes);
 	}
@@ -240,7 +240,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
   u32 n_left_to_tx, *from, *from_end, *to_tx;
   u32 n_bytes, n_buffers, n_packets;
   u32 last_sw_if_index;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   n_buffers = frame->n_vectors;
 
@@ -266,7 +266,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
 
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_TX_ERROR);
-      vlib_increment_simple_counter (cm, cpu_index,
+      vlib_increment_simple_counter (cm, thread_index,
 				     rt->sw_if_index, n_buffers);
       return vlib_error_drop_buffers (vm, node, from,
 				      /* buffer stride */ 1,
@@ -341,18 +341,18 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
 		  from += 1;
 		  to_tx += n_buffers;
 		  n_left_to_tx -= n_buffers;
-		  incr_output_stats (vnm, cpu_index, n_slow_bytes,
+		  incr_output_stats (vnm, thread_index, n_slow_bytes,
 				     vnet_buffer (b)->sw_if_index[VLIB_TX],
 				     &last_sw_if_index, &n_packets, &n_bytes);
 		}
 	    }
 	  else
 	    {
-	      incr_output_stats (vnm, cpu_index,
+	      incr_output_stats (vnm, thread_index,
 				 vlib_buffer_length_in_chain (vm, b0),
 				 vnet_buffer (b0)->sw_if_index[VLIB_TX],
 				 &last_sw_if_index, &n_packets, &n_bytes);
-	      incr_output_stats (vnm, cpu_index,
+	      incr_output_stats (vnm, thread_index,
 				 vlib_buffer_length_in_chain (vm, b0),
 				 vnet_buffer (b1)->sw_if_index[VLIB_TX],
 				 &last_sw_if_index, &n_packets, &n_bytes);
@@ -396,7 +396,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
 	      to_tx += n_buffers;
 	      n_left_to_tx -= n_buffers;
 	    }
-	  incr_output_stats (vnm, cpu_index,
+	  incr_output_stats (vnm, thread_index,
 			     vlib_buffer_length_in_chain (vm, b0),
 			     vnet_buffer (b0)->sw_if_index[VLIB_TX],
 			     &last_sw_if_index, &n_packets, &n_bytes);
@@ -408,7 +408,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
     }
 
   /* Final update of interface stats. */
-  incr_output_stats (vnm, cpu_index, 0, ~0,	/* ~0 will flush stats */
+  incr_output_stats (vnm, thread_index, 0, ~0,	/* ~0 will flush stats */
 		     &last_sw_if_index, &n_packets, &n_bytes);
 
   return n_buffers;
@@ -428,7 +428,7 @@ vnet_interface_output_node (vlib_main_t * vm,
   u32 n_left_to_tx, *from, *from_end, *to_tx;
   u32 n_bytes, n_buffers, n_packets;
   u32 n_bytes_b0, n_bytes_b1, n_bytes_b2, n_bytes_b3;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   vnet_interface_main_t *im = &vnm->interface_main;
   u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX;
   u32 current_config_index = ~0;
@@ -458,7 +458,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_TX_ERROR);
-      vlib_increment_simple_counter (cm, cpu_index,
+      vlib_increment_simple_counter (cm, thread_index,
 				     rt->sw_if_index, n_buffers);
 
       return vlib_error_drop_buffers (vm, node, from,
@@ -558,7 +558,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 	    {
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif0, 1,
+					       thread_index, tx_swif0, 1,
 					       n_bytes_b0);
 	    }
 
@@ -567,7 +567,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif1, 1,
+					       thread_index, tx_swif1, 1,
 					       n_bytes_b1);
 	    }
 
@@ -576,7 +576,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif2, 1,
+					       thread_index, tx_swif2, 1,
 					       n_bytes_b2);
 	    }
 	  if (PREDICT_FALSE (tx_swif3 != rt->sw_if_index))
@@ -584,7 +584,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif3, 1,
+					       thread_index, tx_swif3, 1,
 					       n_bytes_b3);
 	    }
 	}
@@ -623,7 +623,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif0, 1,
+					       thread_index, tx_swif0, 1,
 					       n_bytes_b0);
 	    }
 	}
@@ -634,7 +634,7 @@ vnet_interface_output_node (vlib_main_t * vm,
   /* Update main interface stats. */
   vlib_increment_combined_counter (im->combined_sw_if_counters
 				   + VNET_INTERFACE_COUNTER_TX,
-				   cpu_index,
+				   thread_index,
 				   rt->sw_if_index, n_packets, n_bytes);
   return n_buffers;
 }
@@ -893,7 +893,7 @@ process_drop_punt (vlib_main_t * vm,
   u32 current_sw_if_index, n_errors_current_sw_if_index;
   u64 current_counter;
   vlib_simple_counter_main_t *cm;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   static vlib_error_t memory[VNET_ERROR_N_DISPOSITION];
   static char memory_init[VNET_ERROR_N_DISPOSITION];
@@ -965,19 +965,19 @@ process_drop_punt (vlib_main_t * vm,
 	  current_counter -= 2;
 	  n_errors_current_sw_if_index -= 2;
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
 
 	  /* Increment super-interface drop/punt counters for
 	     sub-interfaces. */
 	  sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0);
 	  vlib_increment_simple_counter
-	    (cm, cpu_index, sw_if0->sup_sw_if_index,
+	    (cm, thread_index, sw_if0->sup_sw_if_index,
 	     sw_if0->sup_sw_if_index != sw_if_index0);
 
 	  sw_if1 = vnet_get_sw_interface (vnm, sw_if_index1);
 	  vlib_increment_simple_counter
-	    (cm, cpu_index, sw_if1->sup_sw_if_index,
+	    (cm, thread_index, sw_if1->sup_sw_if_index,
 	     sw_if1->sup_sw_if_index != sw_if_index1);
 
 	  em->counters[current_counter_index] = current_counter;
@@ -1013,11 +1013,12 @@ process_drop_punt (vlib_main_t * vm,
       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
 
       /* Increment drop/punt counters. */
-      vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+      vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
 
       /* Increment super-interface drop/punt counters for sub-interfaces. */
       sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0);
-      vlib_increment_simple_counter (cm, cpu_index, sw_if0->sup_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index,
+				     sw_if0->sup_sw_if_index,
 				     sw_if0->sup_sw_if_index != sw_if_index0);
 
       if (PREDICT_FALSE (e0 != current_error))
@@ -1041,12 +1042,12 @@ process_drop_punt (vlib_main_t * vm,
     {
       vnet_sw_interface_t *si;
 
-      vlib_increment_simple_counter (cm, cpu_index, current_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, current_sw_if_index,
 				     n_errors_current_sw_if_index);
 
       si = vnet_get_sw_interface (vnm, current_sw_if_index);
       if (si->sup_sw_if_index != current_sw_if_index)
-	vlib_increment_simple_counter (cm, cpu_index, si->sup_sw_if_index,
+	vlib_increment_simple_counter (cm, thread_index, si->sup_sw_if_index,
 				       n_errors_current_sw_if_index);
     }
 
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index ee1703e7..fdfe7f63 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -75,7 +75,7 @@ ip4_lookup_inline (vlib_main_t * vm,
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -292,19 +292,19 @@ ip4_lookup_inline (vlib_main_t * vm,
 	  vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index0, 1,
+	    (cm, thread_index, lb_index0, 1,
 	     vlib_buffer_length_in_chain (vm, p0)
 	     + sizeof (ethernet_header_t));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index1, 1,
+	    (cm, thread_index, lb_index1, 1,
 	     vlib_buffer_length_in_chain (vm, p1)
 	     + sizeof (ethernet_header_t));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index2, 1,
+	    (cm, thread_index, lb_index2, 1,
 	     vlib_buffer_length_in_chain (vm, p2)
 	     + sizeof (ethernet_header_t));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index3, 1,
+	    (cm, thread_index, lb_index3, 1,
 	     vlib_buffer_length_in_chain (vm, p3)
 	     + sizeof (ethernet_header_t));
 
@@ -392,7 +392,7 @@ ip4_lookup_inline (vlib_main_t * vm,
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  from += 1;
 	  to_next += 1;
@@ -479,7 +479,7 @@ ip4_load_balance (vlib_main_t * vm,
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -584,9 +584,9 @@ ip4_load_balance (vlib_main_t * vm,
 	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+	    (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
 
 	  vlib_validate_buffer_enqueue_x2 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -639,7 +639,7 @@ ip4_load_balance (vlib_main_t * vm,
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -2330,7 +2330,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -2379,9 +2379,9 @@ ip4_rewrite_inline (vlib_main_t * vm,
 	  if (do_counters)
 	    {
 	      vlib_prefetch_combined_counter (&adjacency_counters,
-					      cpu_index, adj_index0);
+					      thread_index, adj_index0);
 	      vlib_prefetch_combined_counter (&adjacency_counters,
-					      cpu_index, adj_index1);
+					      thread_index, adj_index1);
 	    }
 
 	  ip0 = vlib_buffer_get_current (p0);
@@ -2527,13 +2527,13 @@ ip4_rewrite_inline (vlib_main_t * vm,
 	    {
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index,
+		 thread_index,
 		 adj_index0, 1,
 		 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index,
+		 thread_index,
 		 adj_index1, 1,
 		 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
 	    }
@@ -2618,7 +2618,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
 	  if (do_counters)
 	    vlib_prefetch_combined_counter (&adjacency_counters,
-					    cpu_index, adj_index0);
+					    thread_index, adj_index0);
 
 	  /* Guess we are only writing on simple Ethernet header. */
 	  vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
@@ -2637,7 +2637,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
 	  if (do_counters)
 	    vlib_increment_combined_counter
 	      (&adjacency_counters,
-	       cpu_index, adj_index0, 1,
+	       thread_index, adj_index0, 1,
 	       vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 
 	  /* Check MTU of outgoing interface. */
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index ba200a9f..3b08f4b0 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -85,7 +85,7 @@ ip4_input_inline (vlib_main_t * vm,
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip4_input_node.index);
   vlib_simple_counter_main_t *cm;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -178,8 +178,8 @@ ip4_input_inline (vlib_main_t * vm,
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 	  vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
 
 	  /* Punt packets with options or wrong version. */
 	  if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
@@ -299,7 +299,7 @@ ip4_input_inline (vlib_main_t * vm,
 	  vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
 
 	  /* Punt packets with options or wrong version. */
 	  if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index c120f12c..c2fc4f87 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -74,7 +74,7 @@ ip6_lookup_inline (vlib_main_t * vm,
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -185,9 +185,9 @@ ip6_lookup_inline (vlib_main_t * vm,
 	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+	    (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
 
 	  from += 2;
 	  to_next += 2;
@@ -291,7 +291,7 @@ ip6_lookup_inline (vlib_main_t * vm,
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  from += 1;
 	  to_next += 1;
@@ -703,7 +703,7 @@ ip6_load_balance (vlib_main_t * vm,
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   ip6_main_t *im = &ip6_main;
 
   from = vlib_frame_vector_args (frame);
@@ -824,9 +824,9 @@ ip6_load_balance (vlib_main_t * vm,
 	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+	    (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
 
 	  vlib_validate_buffer_enqueue_x2 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -886,7 +886,7 @@ ip6_load_balance (vlib_main_t * vm,
 	    }
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -1897,7 +1897,7 @@ ip6_rewrite_inline (vlib_main_t * vm,
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -2019,11 +2019,11 @@ ip6_rewrite_inline (vlib_main_t * vm,
 	    {
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index, adj_index0, 1,
+		 thread_index, adj_index0, 1,
 		 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index, adj_index1, 1,
+		 thread_index, adj_index1, 1,
 		 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
 	    }
 
@@ -2156,7 +2156,7 @@ ip6_rewrite_inline (vlib_main_t * vm,
 	    {
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index, adj_index0, 1,
+		 thread_index, adj_index0, 1,
 		 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 	    }
 
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
index 20306088..ffdc4727 100644
--- a/src/vnet/ip/ip6_input.c
+++ b/src/vnet/ip/ip6_input.c
@@ -82,7 +82,7 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip6_input_node.index);
   vlib_simple_counter_main_t *cm;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -171,8 +171,8 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 	  vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
 
 	  error0 = error1 = IP6_ERROR_NONE;
 
@@ -270,7 +270,7 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
 	  error0 = IP6_ERROR_NONE;
 
 	  /* Version != 6?  Drop it. */
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index 5d1fb6f8..2af546df 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -581,7 +581,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
   u32 next_index;
   pending_resolution_t *pr, *mc;
 
-  if (os_get_cpu_number ())
+  if (vlib_get_thread_index ())
     {
       set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
 				  1 /* set new neighbor */ , is_static,
@@ -722,7 +722,7 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
   uword *p;
   int rv = 0;
 
-  if (os_get_cpu_number ())
+  if (vlib_get_thread_index ())
     {
       set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
 				  0 /* unset */ , 0, 0);
diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h
index 50cac806..799003b9 100644
--- a/src/vnet/ipsec/esp.h
+++ b/src/vnet/ipsec/esp.h
@@ -282,8 +282,8 @@ hmac_calc (ipsec_integ_alg_t alg,
 	   u8 * data, int data_len, u8 * signature, u8 use_esn, u32 seq_hi)
 {
   esp_main_t *em = &esp_main;
-  u32 cpu_index = os_get_cpu_number ();
-  HMAC_CTX *ctx = &(em->per_thread_data[cpu_index].hmac_ctx);
+  u32 thread_index = vlib_get_thread_index ();
+  HMAC_CTX *ctx = &(em->per_thread_data[thread_index].hmac_ctx);
   const EVP_MD *md = NULL;
   unsigned int len;
 
@@ -292,10 +292,10 @@ hmac_calc (ipsec_integ_alg_t alg,
   if (PREDICT_FALSE (em->esp_integ_algs[alg].md == 0))
     return 0;
 
-  if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_integ_alg))
+  if (PREDICT_FALSE (alg != em->per_thread_data[thread_index].last_integ_alg))
     {
       md = em->esp_integ_algs[alg].md;
-      em->per_thread_data[cpu_index].last_integ_alg = alg;
+      em->per_thread_data[thread_index].last_integ_alg = alg;
     }
 
   HMAC_Init (ctx, key, key_len, md);
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
index 7289b260..925d2b45 100644
--- a/src/vnet/ipsec/esp_decrypt.c
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -85,8 +85,8 @@ esp_decrypt_aes_cbc (ipsec_crypto_alg_t alg,
 		     u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv)
 {
   esp_main_t *em = &esp_main;
-  u32 cpu_index = os_get_cpu_number ();
-  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[cpu_index].decrypt_ctx);
+  u32 thread_index = vlib_get_thread_index ();
+  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].decrypt_ctx);
   const EVP_CIPHER *cipher = NULL;
   int out_len;
 
@@ -95,10 +95,11 @@ esp_decrypt_aes_cbc (ipsec_crypto_alg_t alg,
   if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == 0))
     return;
 
-  if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_decrypt_alg))
+  if (PREDICT_FALSE
+      (alg != em->per_thread_data[thread_index].last_decrypt_alg))
     {
       cipher = em->esp_crypto_algs[alg].type;
-      em->per_thread_data[cpu_index].last_decrypt_alg = alg;
+      em->per_thread_data[thread_index].last_decrypt_alg = alg;
     }
 
   EVP_DecryptInit_ex (ctx, cipher, NULL, key, iv);
@@ -117,11 +118,11 @@ esp_decrypt_node_fn (vlib_main_t * vm,
   u32 *recycle = 0;
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ipsec_alloc_empty_buffers (vm, im);
 
-  u32 *empty_buffers = im->empty_buffers[cpu_index];
+  u32 *empty_buffers = im->empty_buffers[thread_index];
 
   if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from))
     {
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index 44ae2297..b2bc4e0b 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -88,8 +88,8 @@ esp_encrypt_aes_cbc (ipsec_crypto_alg_t alg,
 		     u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv)
 {
   esp_main_t *em = &esp_main;
-  u32 cpu_index = os_get_cpu_number ();
-  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[cpu_index].encrypt_ctx);
+  u32 thread_index = vlib_get_thread_index ();
+  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].encrypt_ctx);
   const EVP_CIPHER *cipher = NULL;
   int out_len;
 
@@ -98,10 +98,11 @@ esp_encrypt_aes_cbc (ipsec_crypto_alg_t alg,
   if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == IPSEC_CRYPTO_ALG_NONE))
     return;
 
-  if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_encrypt_alg))
+  if (PREDICT_FALSE
+      (alg != em->per_thread_data[thread_index].last_encrypt_alg))
     {
       cipher = em->esp_crypto_algs[alg].type;
-      em->per_thread_data[cpu_index].last_encrypt_alg = alg;
+      em->per_thread_data[thread_index].last_encrypt_alg = alg;
     }
 
   EVP_EncryptInit_ex (ctx, cipher, NULL, key, iv);
@@ -119,11 +120,11 @@ esp_encrypt_node_fn (vlib_main_t * vm,
   n_left_from = from_frame->n_vectors;
   ipsec_main_t *im = &ipsec_main;
   u32 *recycle = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ipsec_alloc_empty_buffers (vm, im);
 
-  u32 *empty_buffers = im->empty_buffers[cpu_index];
+  u32 *empty_buffers = im->empty_buffers[thread_index];
 
   if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from))
     {
diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c
index 2c1074d8..3f9978a7 100644
--- a/src/vnet/ipsec/ikev2.c
+++ b/src/vnet/ipsec/ikev2.c
@@ -303,16 +303,16 @@ static void
 ikev2_delete_sa (ikev2_sa_t * sa)
 {
   ikev2_main_t *km = &ikev2_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   uword *p;
 
   ikev2_sa_free_all_vec (sa);
 
-  p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi, sa->rspi);
+  p = hash_get (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi);
   if (p)
     {
-      hash_unset (km->per_thread_data[cpu_index].sa_by_rspi, sa->rspi);
-      pool_put (km->per_thread_data[cpu_index].sas, sa);
+      hash_unset (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi);
+      pool_put (km->per_thread_data[thread_index].sas, sa);
     }
 }
 
@@ -776,29 +776,31 @@ ikev2_initial_contact_cleanup (ikev2_sa_t * sa)
   ikev2_sa_t *tmp;
   u32 i, *delete = 0;
   ikev2_child_sa_t *c;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   if (!sa->initial_contact)
     return;
 
   /* find old IKE SAs with the same authenticated identity */
   /* *INDENT-OFF* */
-  pool_foreach (tmp, km->per_thread_data[cpu_index].sas, ({
+  pool_foreach (tmp, km->per_thread_data[thread_index].sas, ({
         if (tmp->i_id.type != sa->i_id.type ||
             vec_len(tmp->i_id.data) != vec_len(sa->i_id.data) ||
             memcmp(sa->i_id.data, tmp->i_id.data, vec_len(sa->i_id.data)))
           continue;
 
         if (sa->rspi != tmp->rspi)
-          vec_add1(delete, tmp - km->per_thread_data[cpu_index].sas);
+          vec_add1(delete, tmp - km->per_thread_data[thread_index].sas);
   }));
   /* *INDENT-ON* */
 
   for (i = 0; i < vec_len (delete); i++)
     {
-      tmp = pool_elt_at_index (km->per_thread_data[cpu_index].sas, delete[i]);
-      vec_foreach (c, tmp->childs)
-	ikev2_delete_tunnel_interface (km->vnet_main, tmp, c);
+      tmp =
+	pool_elt_at_index (km->per_thread_data[thread_index].sas, delete[i]);
+      vec_foreach (c,
+		   tmp->childs) ikev2_delete_tunnel_interface (km->vnet_main,
+							       tmp, c);
       ikev2_delete_sa (tmp);
     }
 
@@ -1922,10 +1924,10 @@ ikev2_retransmit_sa_init (ike_header_t * ike,
 {
   ikev2_main_t *km = &ikev2_main;
   ikev2_sa_t *sa;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   /* *INDENT-OFF* */
-  pool_foreach (sa, km->per_thread_data[cpu_index].sas, ({
+  pool_foreach (sa, km->per_thread_data[thread_index].sas, ({
     if (sa->ispi == clib_net_to_host_u64(ike->ispi) &&
         sa->iaddr.as_u32 == iaddr.as_u32 &&
         sa->raddr.as_u32 == raddr.as_u32)
@@ -2036,7 +2038,7 @@ ikev2_node_fn (vlib_main_t * vm,
   u32 n_left_from, *from, *to_next;
   ikev2_next_t next_index;
   ikev2_main_t *km = &ikev2_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -2134,11 +2136,14 @@ ikev2_node_fn (vlib_main_t * vm,
 		      if (sa0->state == IKEV2_STATE_SA_INIT)
 			{
 			  /* add SA to the pool */
-			  pool_get (km->per_thread_data[cpu_index].sas, sa0);
+			  pool_get (km->per_thread_data[thread_index].sas,
+				    sa0);
 			  clib_memcpy (sa0, &sa, sizeof (*sa0));
-			  hash_set (km->per_thread_data[cpu_index].sa_by_rspi,
+			  hash_set (km->
+				    per_thread_data[thread_index].sa_by_rspi,
 				    sa0->rspi,
-				    sa0 - km->per_thread_data[cpu_index].sas);
+				    sa0 -
+				    km->per_thread_data[thread_index].sas);
 			}
 		      else
 			{
@@ -2169,11 +2174,11 @@ ikev2_node_fn (vlib_main_t * vm,
 		  if (sa0->state == IKEV2_STATE_SA_INIT)
 		    {
 		      /* add SA to the pool */
-		      pool_get (km->per_thread_data[cpu_index].sas, sa0);
+		      pool_get (km->per_thread_data[thread_index].sas, sa0);
 		      clib_memcpy (sa0, &sa, sizeof (*sa0));
-		      hash_set (km->per_thread_data[cpu_index].sa_by_rspi,
+		      hash_set (km->per_thread_data[thread_index].sa_by_rspi,
 				sa0->rspi,
-				sa0 - km->per_thread_data[cpu_index].sas);
+				sa0 - km->per_thread_data[thread_index].sas);
 		    }
 		  else
 		    {
@@ -2184,12 +2189,13 @@ ikev2_node_fn (vlib_main_t * vm,
 	  else if (ike0->exchange == IKEV2_EXCHANGE_IKE_AUTH)
 	    {
 	      uword *p;
-	      p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi,
+	      p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
 			    clib_net_to_host_u64 (ike0->rspi));
 	      if (p)
 		{
-		  sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas,
-					   p[0]);
+		  sa0 =
+		    pool_elt_at_index (km->per_thread_data[thread_index].sas,
+				       p[0]);
 
 		  r = ikev2_retransmit_resp (sa0, ike0);
 		  if (r == 1)
@@ -2240,12 +2246,13 @@ ikev2_node_fn (vlib_main_t * vm,
 	  else if (ike0->exchange == IKEV2_EXCHANGE_INFORMATIONAL)
 	    {
 	      uword *p;
-	      p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi,
+	      p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
 			    clib_net_to_host_u64 (ike0->rspi));
 	      if (p)
 		{
-		  sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas,
-					   p[0]);
+		  sa0 =
+		    pool_elt_at_index (km->per_thread_data[thread_index].sas,
+				       p[0]);
 
 		  r = ikev2_retransmit_resp (sa0, ike0);
 		  if (r == 1)
@@ -2305,12 +2312,13 @@ ikev2_node_fn (vlib_main_t * vm,
 	  else if (ike0->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA)
 	    {
 	      uword *p;
-	      p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi,
+	      p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
 			    clib_net_to_host_u64 (ike0->rspi));
 	      if (p)
 		{
-		  sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas,
-					   p[0]);
+		  sa0 =
+		    pool_elt_at_index (km->per_thread_data[thread_index].sas,
+				       p[0]);
 
 		  r = ikev2_retransmit_resp (sa0, ike0);
 		  if (r == 1)
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index 58f0f145..c884e360 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -324,21 +324,21 @@ int ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index,
 always_inline void
 ipsec_alloc_empty_buffers (vlib_main_t * vm, ipsec_main_t * im)
 {
-  u32 cpu_index = os_get_cpu_number ();
-  uword l = vec_len (im->empty_buffers[cpu_index]);
+  u32 thread_index = vlib_get_thread_index ();
+  uword l = vec_len (im->empty_buffers[thread_index]);
   uword n_alloc = 0;
 
   if (PREDICT_FALSE (l < VLIB_FRAME_SIZE))
     {
-      if (!im->empty_buffers[cpu_index])
+      if (!im->empty_buffers[thread_index])
 	{
-	  vec_alloc (im->empty_buffers[cpu_index], 2 * VLIB_FRAME_SIZE);
+	  vec_alloc (im->empty_buffers[thread_index], 2 * VLIB_FRAME_SIZE);
 	}
 
-      n_alloc = vlib_buffer_alloc (vm, im->empty_buffers[cpu_index] + l,
+      n_alloc = vlib_buffer_alloc (vm, im->empty_buffers[thread_index] + l,
 				   2 * VLIB_FRAME_SIZE - l);
 
-      _vec_len (im->empty_buffers[cpu_index]) = l + n_alloc;
+      _vec_len (im->empty_buffers[thread_index]) = l + n_alloc;
     }
 }
 
diff --git a/src/vnet/ipsec/ipsec_if.c b/src/vnet/ipsec/ipsec_if.c
index dc882004..ed124894 100644
--- a/src/vnet/ipsec/ipsec_if.c
+++ b/src/vnet/ipsec/ipsec_if.c
@@ -99,7 +99,7 @@ static int
 ipsec_add_del_tunnel_if_rpc_callback (ipsec_add_del_tunnel_args_t * a)
 {
   vnet_main_t *vnm = vnet_get_main ();
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   return ipsec_add_del_tunnel_if_internal (vnm, a);
 }
diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h
index dd1130a6..e21a1616 100644
--- a/src/vnet/l2/l2_bvi.h
+++ b/src/vnet/l2/l2_bvi.h
@@ -97,7 +97,7 @@ l2_to_bvi (vlib_main_t * vlib_main,
   vlib_increment_combined_counter
     (vnet_main->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     vlib_main->cpu_index,
+     vlib_main->thread_index,
      vnet_buffer (b0)->sw_if_index[VLIB_RX],
      1, vlib_buffer_length_in_chain (vlib_main, b0));
   return TO_BVI_ERR_OK;
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
index 041ff38d..e5d6878a 100644
--- a/src/vnet/l2/l2_input.c
+++ b/src/vnet/l2/l2_input.c
@@ -117,7 +117,7 @@ typedef enum
 static_always_inline void
 classify_and_dispatch (vlib_main_t * vm,
 		       vlib_node_runtime_t * node,
-		       u32 cpu_index,
+		       u32 thread_index,
 		       l2input_main_t * msm, vlib_buffer_t * b0, u32 * next0)
 {
   /*
@@ -237,7 +237,7 @@ l2input_node_inline (vlib_main_t * vm,
   u32 n_left_from, *from, *to_next;
   l2input_next_t next_index;
   l2input_main_t *msm = &l2input_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;	/* number of packets to process */
@@ -350,10 +350,10 @@ l2input_node_inline (vlib_main_t * vm,
 	  vlib_node_increment_counter (vm, l2input_node.index,
 				       L2INPUT_ERROR_L2INPUT, 4);
 
-	  classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0);
-	  classify_and_dispatch (vm, node, cpu_index, msm, b1, &next1);
-	  classify_and_dispatch (vm, node, cpu_index, msm, b2, &next2);
-	  classify_and_dispatch (vm, node, cpu_index, msm, b3, &next3);
+	  classify_and_dispatch (vm, node, thread_index, msm, b0, &next0);
+	  classify_and_dispatch (vm, node, thread_index, msm, b1, &next1);
+	  classify_and_dispatch (vm, node, thread_index, msm, b2, &next2);
+	  classify_and_dispatch (vm, node, thread_index, msm, b3, &next3);
 
 	  /* verify speculative enqueues, maybe switch current next frame */
 	  /* if next0==next1==next_index then nothing special needs to be done */
@@ -393,7 +393,7 @@ l2input_node_inline (vlib_main_t * vm,
 	  vlib_node_increment_counter (vm, l2input_node.index,
 				       L2INPUT_ERROR_L2INPUT, 1);
 
-	  classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0);
+	  classify_and_dispatch (vm, node, thread_index, msm, b0, &next0);
 
 	  /* verify speculative enqueue, maybe switch current next frame */
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c
index 00f22571..e17b2a16 100644
--- a/src/vnet/l2/l2_output.c
+++ b/src/vnet/l2/l2_output.c
@@ -643,11 +643,11 @@ l2output_create_output_node_mapping (vlib_main_t * vlib_main, vnet_main_t * vnet
 
   hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
 
-  uword cpu_number;
+  uword thread_index;
 
-  cpu_number = os_get_cpu_number ();
+  thread_index = vlib_get_thread_index ();
 
-  if (cpu_number)
+  if (thread_index)
     {
       u32 oldflags;
 
diff --git a/src/vnet/l2tp/decap.c b/src/vnet/l2tp/decap.c
index e8986935..46104129 100644
--- a/src/vnet/l2tp/decap.c
+++ b/src/vnet/l2tp/decap.c
@@ -149,7 +149,7 @@ last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi)
 
   /* per-mapping byte stats include the ethernet header */
   vlib_increment_combined_counter (&lm->counter_main,
-				   os_get_cpu_number (),
+				   vlib_get_thread_index (),
 				   counter_index, 1 /* packet_increment */ ,
 				   vlib_buffer_length_in_chain (vm, b) +
 				   sizeof (ethernet_header_t));
diff --git a/src/vnet/l2tp/encap.c b/src/vnet/l2tp/encap.c
index ed7a9580..dcdfde4b 100644
--- a/src/vnet/l2tp/encap.c
+++ b/src/vnet/l2tp/encap.c
@@ -124,7 +124,7 @@ last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi)
 
   /* per-mapping byte stats include the ethernet header */
   vlib_increment_combined_counter (&lm->counter_main,
-				   os_get_cpu_number (),
+				   vlib_get_thread_index (),
 				   counter_index, 1 /* packet_increment */ ,
 				   vlib_buffer_length_in_chain (vm, b));
 
diff --git a/src/vnet/l2tp/l2tp.c b/src/vnet/l2tp/l2tp.c
index cb94d7e7..3dedc447 100644
--- a/src/vnet/l2tp/l2tp.c
+++ b/src/vnet/l2tp/l2tp.c
@@ -157,7 +157,7 @@ test_counters_command_fn (vlib_main_t * vm,
   u32 session_index;
   u32 counter_index;
   u32 nincr = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   /* *INDENT-OFF* */
   pool_foreach (session, lm->sessions,
@@ -167,11 +167,11 @@ test_counters_command_fn (vlib_main_t * vm,
       session_index_to_counter_index (session_index,
                                       SESSION_COUNTER_USER_TO_NETWORK);
     vlib_increment_combined_counter (&lm->counter_main,
-                                     cpu_index,
+                                     thread_index,
                                      counter_index,
                                      1/*pkt*/, 1111 /*bytes*/);
     vlib_increment_combined_counter (&lm->counter_main,
-                                     cpu_index,
+                                     thread_index,
                                      counter_index+1,
                                      1/*pkt*/, 2222 /*bytes*/);
     nincr++;
diff --git a/src/vnet/lisp-gpe/decap.c b/src/vnet/lisp-gpe/decap.c
index d887a95f..68769710 100644
--- a/src/vnet/lisp-gpe/decap.c
+++ b/src/vnet/lisp-gpe/decap.c
@@ -103,7 +103,7 @@ next_index_to_iface (lisp_gpe_main_t * lgm, u32 next_index)
 }
 
 static_always_inline void
-incr_decap_stats (vnet_main_t * vnm, u32 cpu_index, u32 length,
+incr_decap_stats (vnet_main_t * vnm, u32 thread_index, u32 length,
 		  u32 sw_if_index, u32 * last_sw_if_index, u32 * n_packets,
 		  u32 * n_bytes)
 {
@@ -122,7 +122,7 @@ incr_decap_stats (vnet_main_t * vnm, u32 cpu_index, u32 length,
 
 	  vlib_increment_combined_counter (im->combined_sw_if_counters +
 					   VNET_INTERFACE_COUNTER_RX,
-					   cpu_index, *last_sw_if_index,
+					   thread_index, *last_sw_if_index,
 					   *n_packets, *n_bytes);
 	}
       *last_sw_if_index = sw_if_index;
@@ -150,11 +150,11 @@ static uword
 lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 		       vlib_frame_t * from_frame, u8 is_v4)
 {
-  u32 n_left_from, next_index, *from, *to_next, cpu_index;
+  u32 n_left_from, next_index, *from, *to_next, thread_index;
   u32 n_bytes = 0, n_packets = 0, last_sw_if_index = ~0, drops = 0;
   lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
 
-  cpu_index = os_get_cpu_number ();
+  thread_index = vlib_get_thread_index ();
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
 
@@ -267,7 +267,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
 	  if (si0)
 	    {
-	      incr_decap_stats (lgm->vnet_main, cpu_index,
+	      incr_decap_stats (lgm->vnet_main, thread_index,
 				vlib_buffer_length_in_chain (vm, b0), si0[0],
 				&last_sw_if_index, &n_packets, &n_bytes);
 	      vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0];
@@ -282,7 +282,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
 	  if (si1)
 	    {
-	      incr_decap_stats (lgm->vnet_main, cpu_index,
+	      incr_decap_stats (lgm->vnet_main, thread_index,
 				vlib_buffer_length_in_chain (vm, b1), si1[0],
 				&last_sw_if_index, &n_packets, &n_bytes);
 	      vnet_buffer (b1)->sw_if_index[VLIB_RX] = si1[0];
@@ -397,7 +397,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
 	  if (si0)
 	    {
-	      incr_decap_stats (lgm->vnet_main, cpu_index,
+	      incr_decap_stats (lgm->vnet_main, thread_index,
 				vlib_buffer_length_in_chain (vm, b0), si0[0],
 				&last_sw_if_index, &n_packets, &n_bytes);
 	      vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0];
@@ -430,7 +430,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
     }
 
   /* flush iface stats */
-  incr_decap_stats (lgm->vnet_main, cpu_index, 0, ~0, &last_sw_if_index,
+  incr_decap_stats (lgm->vnet_main, thread_index, 0, ~0, &last_sw_if_index,
 		    &n_packets, &n_bytes);
   vlib_node_increment_counter (vm, lisp_gpe_ip4_input_node.index,
 			       LISP_GPE_ERROR_NO_TUNNEL, drops);
diff --git a/src/vnet/lldp/lldp_input.c b/src/vnet/lldp/lldp_input.c
index 762743d0..e88f6fdb 100644
--- a/src/vnet/lldp/lldp_input.c
+++ b/src/vnet/lldp/lldp_input.c
@@ -35,7 +35,7 @@ typedef struct
 static void
 lldp_rpc_update_peer_cb (const lldp_intf_update_t * a)
 {
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   lldp_intf_t *n = lldp_get_intf (&lldp_main, a->hw_if_index);
   if (!n)
diff --git a/src/vnet/map/ip4_map.c b/src/vnet/map/ip4_map.c
index 1a20d704..e39b6f14 100644
--- a/src/vnet/map/ip4_map.c
+++ b/src/vnet/map/ip4_map.c
@@ -248,7 +248,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   next_index = node->cached_next_index;
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -377,7 +377,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 					       ip40) ?
 		    IP4_MAP_NEXT_IP6_REWRITE : next0;
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip6h0->payload_length) +
@@ -409,7 +409,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 					       ip41) ?
 		    IP4_MAP_NEXT_IP6_REWRITE : next1;
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index1, 1,
 						   clib_net_to_host_u16
 						   (ip6h1->payload_length) +
@@ -520,7 +520,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 					       ip40) ?
 		    IP4_MAP_NEXT_IP6_REWRITE : next0;
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip6h0->payload_length) +
@@ -564,7 +564,7 @@ ip4_map_reass (vlib_main_t * vm,
   next_index = node->cached_next_index;
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 *fragments_to_drop = NULL;
   u32 *fragments_to_loopback = NULL;
 
@@ -694,8 +694,8 @@ ip4_map_reass (vlib_main_t * vm,
 	    {
 	      if (error0 == MAP_ERROR_NONE)
 		vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						 cpu_index, map_domain_index0,
-						 1,
+						 thread_index,
+						 map_domain_index0, 1,
 						 clib_net_to_host_u16
 						 (ip60->payload_length) + 40);
 	      next0 =
diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c
index b63d76bf..5f2bcbf9 100644
--- a/src/vnet/map/ip4_map_t.c
+++ b/src/vnet/map/ip4_map_t.c
@@ -477,7 +477,7 @@ ip4_map_t_icmp (vlib_main_t * vm,
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -520,7 +520,7 @@ ip4_map_t_icmp (vlib_main_t * vm,
 	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1, len0);
 	    }
@@ -1051,7 +1051,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -1158,7 +1158,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip40->
@@ -1169,7 +1169,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p1)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip41->
@@ -1252,7 +1252,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip40->
diff --git a/src/vnet/map/ip6_map.c b/src/vnet/map/ip6_map.c
index f7eb768f..63ada962 100644
--- a/src/vnet/map/ip6_map.c
+++ b/src/vnet/map/ip6_map.c
@@ -172,7 +172,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
     vlib_node_get_runtime (vm, ip6_map_node.index);
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -319,7 +319,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 			IP6_MAP_NEXT_IP4_REWRITE : next0;
 		    }
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip40->length));
@@ -352,7 +352,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 			IP6_MAP_NEXT_IP4_REWRITE : next1;
 		    }
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index1, 1,
 						   clib_net_to_host_u16
 						   (ip41->length));
@@ -505,7 +505,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 			IP6_MAP_NEXT_IP4_REWRITE : next0;
 		    }
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip40->length));
@@ -820,7 +820,7 @@ ip6_map_ip4_reass (vlib_main_t * vm,
     vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 *fragments_to_drop = NULL;
   u32 *fragments_to_loopback = NULL;
 
@@ -958,8 +958,8 @@ ip6_map_ip4_reass (vlib_main_t * vm,
 	    {
 	      if (error0 == MAP_ERROR_NONE)
 		vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						 cpu_index, map_domain_index0,
-						 1,
+						 thread_index,
+						 map_domain_index0, 1,
 						 clib_net_to_host_u16
 						 (ip40->length));
 	      next0 =
@@ -1015,7 +1015,7 @@ ip6_map_icmp_relay (vlib_main_t * vm,
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
   map_main_t *mm = &map_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u16 *fragment_ids, *fid;
 
   from = vlib_frame_vector_args (frame);
@@ -1143,7 +1143,8 @@ ip6_map_icmp_relay (vlib_main_t * vm,
 	  ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
 	  new_icmp40->checksum = ~ip_csum_fold (sum);
 
-	  vlib_increment_simple_counter (&mm->icmp_relayed, cpu_index, 0, 1);
+	  vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0,
+					 1);
 
 	error:
 	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
diff --git a/src/vnet/map/ip6_map_t.c b/src/vnet/map/ip6_map_t.c
index eb3996c2..99151678 100644
--- a/src/vnet/map/ip6_map_t.c
+++ b/src/vnet/map/ip6_map_t.c
@@ -448,7 +448,7 @@ ip6_map_t_icmp (vlib_main_t * vm,
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -493,7 +493,7 @@ ip6_map_t_icmp (vlib_main_t * vm,
 	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->
 					       map_t.map_domain_index, 1,
 					       len0);
@@ -1051,7 +1051,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip6_map_t_node.index);
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -1218,7 +1218,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->
 					       map_t.map_domain_index, 1,
 					       clib_net_to_host_u16
@@ -1229,7 +1229,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p1)->
 					       map_t.map_domain_index, 1,
 					       clib_net_to_host_u16
@@ -1403,7 +1403,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->
 					       map_t.map_domain_index, 1,
 					       clib_net_to_host_u16
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
index 893c4511..1b9bdd05 100644
--- a/src/vnet/mpls/mpls_input.c
+++ b/src/vnet/mpls/mpls_input.c
@@ -76,7 +76,7 @@ mpls_input_inline (vlib_main_t * vm,
   u32 n_left_from, next_index, * from, * to_next;
   mpls_input_runtime_t * rt;
   mpls_main_t * mm;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   vlib_simple_counter_main_t * cm;
   vnet_main_t * vnm = vnet_get_main();
 
@@ -151,7 +151,7 @@ mpls_input_inline (vlib_main_t * vm,
               next0 = MPLS_INPUT_NEXT_LOOKUP;
               vnet_feature_arc_start(mm->input_feature_arc_index,
                                      sw_if_index0, &next0, b0);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+              vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
           }
 
           if (PREDICT_FALSE(h1[3] == 0))
@@ -164,7 +164,7 @@ mpls_input_inline (vlib_main_t * vm,
               next1 = MPLS_INPUT_NEXT_LOOKUP;
               vnet_feature_arc_start(mm->input_feature_arc_index,
                                      sw_if_index1, &next1, b1);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+              vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
           }
 
           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -215,7 +215,7 @@ mpls_input_inline (vlib_main_t * vm,
             {
               next0 = MPLS_INPUT_NEXT_LOOKUP;
 	      vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+              vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
             }
 
           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 475bb204..ace6a70f 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -67,7 +67,7 @@ mpls_lookup (vlib_main_t * vm,
   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
   u32 n_left_from, next_index, * from, * to_next;
   mpls_main_t * mm = &mpls_main;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -220,16 +220,16 @@ mpls_lookup (vlib_main_t * vm,
           vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, b0));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi1, 1,
+              (cm, thread_index, lbi1, 1,
                vlib_buffer_length_in_chain (vm, b1));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi2, 1,
+              (cm, thread_index, lbi2, 1,
                vlib_buffer_length_in_chain (vm, b2));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi3, 1,
+              (cm, thread_index, lbi3, 1,
                vlib_buffer_length_in_chain (vm, b3));
 
           /*
@@ -351,7 +351,7 @@ mpls_lookup (vlib_main_t * vm,
           vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, b0));
 
           /*
@@ -440,7 +440,7 @@ mpls_load_balance (vlib_main_t * vm,
 {
   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, * from, * to_next;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 next;
 
   from = vlib_frame_vector_args (frame);
@@ -536,10 +536,10 @@ mpls_load_balance (vlib_main_t * vm,
           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, p0));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi1, 1,
+              (cm, thread_index, lbi1, 1,
                vlib_buffer_length_in_chain (vm, p1));
 
           if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
@@ -597,7 +597,7 @@ mpls_load_balance (vlib_main_t * vm,
           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, p0));
 
           vlib_validate_buffer_enqueue_x1 (vm, node, next,
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 08018fd1..d90dec21 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -64,12 +64,12 @@ mpls_output_inline (vlib_main_t * vm,
                     vlib_frame_t * from_frame,
 		    int is_midchain)
 {
-  u32 n_left_from, next_index, * from, * to_next, cpu_index;
+  u32 n_left_from, next_index, * from, * to_next, thread_index;
   vlib_node_runtime_t * error_node;
   u32 n_left_to_next;
   mpls_main_t *mm;
 
-  cpu_index = os_get_cpu_number();
+  thread_index = vlib_get_thread_index();
   error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -137,13 +137,13 @@ mpls_output_inline (vlib_main_t * vm,
           /* Bump the adj counters for packet and bytes */
           vlib_increment_combined_counter
               (&adjacency_counters,
-               cpu_index,
+               thread_index,
                adj_index0,
                1,
                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
           vlib_increment_combined_counter
               (&adjacency_counters,
-               cpu_index,
+               thread_index,
                adj_index1,
                1,
                vlib_buffer_length_in_chain (vm, p1) + rw_len1);
@@ -245,7 +245,7 @@ mpls_output_inline (vlib_main_t * vm,
           
           vlib_increment_combined_counter
               (&adjacency_counters,
-               cpu_index,
+               thread_index,
                adj_index0,
                1,
                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c
index 2649798b..597ae060 100644
--- a/src/vnet/pg/input.c
+++ b/src/vnet/pg/input.c
@@ -893,7 +893,7 @@ pg_generate_set_lengths (pg_main_t * pg,
 
     vlib_increment_combined_counter (im->combined_sw_if_counters
 				     + VNET_INTERFACE_COUNTER_RX,
-				     os_get_cpu_number (),
+				     vlib_get_thread_index (),
 				     si->sw_if_index, n_buffers, length_sum);
   }
 
@@ -1266,7 +1266,7 @@ pg_stream_fill_helper (pg_main_t * pg,
 	    l += vlib_buffer_index_length_in_chain (vm, buffers[i]);
 	  vlib_increment_combined_counter (im->combined_sw_if_counters
 					   + VNET_INTERFACE_COUNTER_RX,
-					   os_get_cpu_number (),
+					   vlib_get_thread_index (),
 					   si->sw_if_index, n_alloc, l);
 	  s->current_replay_packet_index += n_alloc;
 	  s->current_replay_packet_index %=
diff --git a/src/vnet/replication.c b/src/vnet/replication.c
index 86d922b5..233a8c2f 100644
--- a/src/vnet/replication.c
+++ b/src/vnet/replication.c
@@ -31,16 +31,16 @@ replication_prep (vlib_main_t * vm,
 {
   replication_main_t *rm = &replication_main;
   replication_context_t *ctx;
-  uword cpu_number = vm->cpu_index;
+  uword thread_index = vm->thread_index;
   ip4_header_t *ip;
   u32 ctx_id;
 
   /* Allocate a context, reserve context 0 */
-  if (PREDICT_FALSE (rm->contexts[cpu_number] == 0))
-    pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES);
+  if (PREDICT_FALSE (rm->contexts[thread_index] == 0))
+    pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES);
 
-  pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES);
-  ctx_id = ctx - rm->contexts[cpu_number];
+  pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES);
+  ctx_id = ctx - rm->contexts[thread_index];
 
   /* Save state from vlib buffer */
   ctx->saved_free_list_index = b0->free_list_index;
@@ -94,11 +94,11 @@ replication_recycle (vlib_main_t * vm, vlib_buffer_t * b0, u32 is_last)
 {
   replication_main_t *rm = &replication_main;
   replication_context_t *ctx;
-  uword cpu_number = vm->cpu_index;
+  uword thread_index = vm->thread_index;
   ip4_header_t *ip;
 
   /* Get access to the replication context */
-  ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count);
+  ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count);
 
   /* Restore vnet buffer state */
   clib_memcpy (vnet_buffer (b0), ctx->vnet_buffer,
@@ -133,7 +133,7 @@ replication_recycle (vlib_main_t * vm, vlib_buffer_t * b0, u32 is_last)
       b0->flags &= ~VLIB_BUFFER_RECYCLE;
 
       /* Free context back to its pool */
-      pool_put (rm->contexts[cpu_number], ctx);
+      pool_put (rm->contexts[thread_index], ctx);
     }
 
   return ctx;
@@ -160,7 +160,7 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl)
   replication_main_t *rm = &replication_main;
   replication_context_t *ctx;
   u32 feature_node_index = 0;
-  uword cpu_number = vm->cpu_index;
+  uword thread_index = vm->thread_index;
 
   /*
    * All buffers in the list are destined to the same recycle node.
@@ -172,7 +172,7 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl)
     {
       bi0 = fl->buffers[0];
       b0 = vlib_get_buffer (vm, bi0);
-      ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count);
+      ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count);
       feature_node_index = ctx->recycle_node_index;
     }
 
diff --git a/src/vnet/replication.h b/src/vnet/replication.h
index 5dc554c9..ce4b3ff1 100644
--- a/src/vnet/replication.h
+++ b/src/vnet/replication.h
@@ -100,7 +100,7 @@ replication_get_ctx (vlib_buffer_t * b0)
   replication_main_t *rm = &replication_main;
 
   return replication_is_recycled (b0) ?
-    pool_elt_at_index (rm->contexts[os_get_cpu_number ()],
+    pool_elt_at_index (rm->contexts[vlib_get_thread_index ()],
 		       b0->recycle_count) : 0;
 }
 
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index b86e87d9..dd211c51 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -311,7 +311,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   unix_shared_memory_queue_t *q;
   application_t *app;
   int n_tx_packets = 0;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   int i, rv;
   f64 now = vlib_time_now (vm);
 
diff --git a/src/vnet/sr/sr_localsid.c b/src/vnet/sr/sr_localsid.c
index 2e3d56de..6d72a506 100755
--- a/src/vnet/sr/sr_localsid.c
+++ b/src/vnet/sr/sr_localsid.c
@@ -887,7 +887,7 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -974,26 +974,26 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_increment_combined_counter
 	    (((next1 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b1));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b1));
 
 	  vlib_increment_combined_counter
 	    (((next2 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b2));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b2));
 
 	  vlib_increment_combined_counter
 	    (((next3 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b3));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b3));
 
 	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, bi1, bi2, bi3,
@@ -1062,8 +1062,8 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, next0);
@@ -1103,7 +1103,7 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -1205,26 +1205,26 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_increment_combined_counter
 	    (((next1 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b1));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b1));
 
 	  vlib_increment_combined_counter
 	    (((next2 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b2));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b2));
 
 	  vlib_increment_combined_counter
 	    (((next3 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b3));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b3));
 
 	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, bi1, bi2, bi3,
@@ -1295,8 +1295,8 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, next0);
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index e3705060..c1567aa0 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -174,7 +174,7 @@ tclient_thread_fn (void *arg)
     pthread_sigmask (SIG_SETMASK, &s, 0);
   }
 
-  clib_per_cpu_mheaps[os_get_cpu_number ()] = clib_per_cpu_mheaps[0];
+  clib_per_cpu_mheaps[vlib_get_thread_index ()] = clib_per_cpu_mheaps[0];
 
   while (1)
     {
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index b2a371e2..b6c34828 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -646,10 +646,10 @@ const static transport_proto_vft_t tcp6_proto = {
 void
 tcp_timer_keep_handler (u32 conn_index)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
-  tc = tcp_connection_get (conn_index, cpu_index);
+  tc = tcp_connection_get (conn_index, thread_index);
   tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID;
 
   tcp_connection_close (tc);
@@ -675,10 +675,10 @@ tcp_timer_establish_handler (u32 conn_index)
 void
 tcp_timer_waitclose_handler (u32 conn_index)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
-  tc = tcp_connection_get (conn_index, cpu_index);
+  tc = tcp_connection_get (conn_index, thread_index);
   tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID;
 
   /* Session didn't come back with a close(). Send FIN either way
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 0090e15e..eaca672c 100644
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -343,7 +343,7 @@ typedef enum _tcp_dbg_evt
     }                                                           	\
   else                                                          	\
     {                                                           	\
-      u32 _thread_index = os_get_cpu_number ();                 	\
+      u32 _thread_index = vlib_get_thread_index ();                 	\
       _tc = tcp_connection_get (_tc_index, _thread_index);      	\
     }                                                           	\
   ELOG_TYPE_DECLARE (_e) =                                      	\
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index a8224dc2..7e9fa47b 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -1142,7 +1142,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 			  vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
   tcp_main_t *tm = vnet_get_tcp_main ();
 
   from = vlib_frame_vector_args (from_frame);
@@ -1332,7 +1332,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
   u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
 
   from = vlib_frame_vector_args (from_frame);
@@ -1634,7 +1634,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -1989,7 +1989,7 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 		     vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   tcp_main_t *tm = vnet_get_tcp_main ();
   u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
 
@@ -2243,7 +2243,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 		    vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   tcp_main_t *tm = vnet_get_tcp_main ();
 
   from = vlib_frame_vector_args (from_frame);
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index ea157bd7..e18bfad7 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -387,8 +387,8 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
 #define tcp_get_free_buffer_index(tm, bidx)                             \
 do {                                                                    \
   u32 *my_tx_buffers, n_free_buffers;                                   \
-  u32 cpu_index = os_get_cpu_number();                             	\
-  my_tx_buffers = tm->tx_buffers[cpu_index];                            \
+  u32 thread_index = vlib_get_thread_index();                             	\
+  my_tx_buffers = tm->tx_buffers[thread_index];                            \
   if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0))                      \
     {                                                                   \
       n_free_buffers = 32;      /* TODO config or macro */              \
@@ -396,7 +396,7 @@ do {                                                                    \
       _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list (      \
           tm->vlib_main, my_tx_buffers, n_free_buffers,                 \
           VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);                         \
-      tm->tx_buffers[cpu_index] = my_tx_buffers;                        \
+      tm->tx_buffers[thread_index] = my_tx_buffers;                        \
     }                                                                   \
   /* buffer shortage */                                                 \
   if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0))                     \
@@ -408,8 +408,8 @@ do {                                                                    \
 #define tcp_return_buffer(tm)						\
 do {									\
   u32 *my_tx_buffers;							\
-  u32 cpu_index = os_get_cpu_number();                             	\
-  my_tx_buffers = tm->tx_buffers[cpu_index];                          	\
+  u32 thread_index = vlib_get_thread_index();                             	\
+  my_tx_buffers = tm->tx_buffers[thread_index];                          	\
   _vec_len (my_tx_buffers) +=1;						\
 } while (0)
 
@@ -942,7 +942,7 @@ tcp_send_ack (tcp_connection_t * tc)
 void
 tcp_timer_delack_handler (u32 index)
 {
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
   tc = tcp_connection_get (index, thread_index);
@@ -1022,7 +1022,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
   vlib_buffer_t *b;
   u32 bi, snd_space, n_bytes;
@@ -1152,7 +1152,7 @@ tcp_timer_persist_handler (u32 index)
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
   vlib_buffer_t *b;
   u32 bi, n_bytes;
@@ -1313,7 +1313,7 @@ tcp46_output_inline (vlib_main_t * vm,
 		     vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -1524,7 +1524,7 @@ tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 			 vlib_frame_t * from_frame, u8 is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index 4b22109b..810278e6 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -70,7 +70,7 @@ udp4_uri_input_node_fn (vlib_main_t * vm,
   udp4_uri_input_next_t next_index;
   udp_uri_main_t *um = vnet_get_udp_main ();
   session_manager_main_t *smm = vnet_get_session_manager_main ();
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   u8 my_enqueue_epoch;
   u32 *session_indices_to_enqueue;
   static u32 serial_number;
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index fb1a8bac..0fc62f6c 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -366,7 +366,7 @@ static uword tapcli_rx_iface(vlib_main_t * vm,
       vlib_increment_combined_counter (
           vnet_main.interface_main.combined_sw_if_counters
           + VNET_INTERFACE_COUNTER_RX,
-          os_get_cpu_number(), ti->sw_if_index,
+          vlib_get_thread_index(), ti->sw_if_index,
           1, n_bytes_in_packet);
 
       if (PREDICT_FALSE(n_trace > 0)) {
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
index 2cfcc92f..ac674653 100644
--- a/src/vnet/unix/tuntap.c
+++ b/src/vnet/unix/tuntap.c
@@ -189,7 +189,7 @@ tuntap_tx (vlib_main_t * vm,
   /* Update tuntap interface output stats. */
   vlib_increment_combined_counter (im->combined_sw_if_counters
 				   + VNET_INTERFACE_COUNTER_TX,
-				   vm->cpu_index,
+				   vm->thread_index,
 				   tm->sw_if_index, n_packets, n_bytes);
 
 
@@ -297,7 +297,7 @@ tuntap_rx (vlib_main_t * vm,
     vlib_increment_combined_counter
         (vnet_main.interface_main.combined_sw_if_counters
          + VNET_INTERFACE_COUNTER_RX,
-         os_get_cpu_number(),
+         vlib_get_thread_index(),
          tm->sw_if_index,
          1, n_bytes_in_packet);
 
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c
index 22ab4b62..d4fe4231 100644
--- a/src/vnet/vxlan-gpe/decap.c
+++ b/src/vnet/vxlan-gpe/decap.c
@@ -115,7 +115,7 @@ vxlan_gpe_input (vlib_main_t * vm,
   vxlan4_gpe_tunnel_key_t last_key4;
   vxlan6_gpe_tunnel_key_t last_key6;
   u32 pkts_decapsulated = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
 
   if (is_ip4)
@@ -342,7 +342,7 @@ vxlan_gpe_input (vlib_main_t * vm,
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len0;
         stats_sw_if_index = sw_if_index0;
@@ -427,7 +427,7 @@ vxlan_gpe_input (vlib_main_t * vm,
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len1;
         stats_sw_if_index = sw_if_index1;
@@ -588,7 +588,7 @@ vxlan_gpe_input (vlib_main_t * vm,
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len0;
         stats_sw_if_index = sw_if_index0;
@@ -615,7 +615,7 @@ vxlan_gpe_input (vlib_main_t * vm,
   if (stats_n_packets)
   {
     vlib_increment_combined_counter (
-        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, cpu_index,
+        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, thread_index,
         stats_sw_if_index, stats_n_packets, stats_n_bytes);
     node->runtime_data[0] = stats_sw_if_index;
   }
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
index 3a486e56..67ed94b4 100644
--- a/src/vnet/vxlan-gpe/encap.c
+++ b/src/vnet/vxlan-gpe/encap.c
@@ -151,7 +151,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
   vnet_main_t * vnm = ngm->vnet_main;
   vnet_interface_main_t * im = &vnm->interface_main;
   u32 pkts_encapsulated = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
 
   from = vlib_frame_vector_args (from_frame);
@@ -253,7 +253,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
           if (stats_n_packets)
             vlib_increment_combined_counter (
                 im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-                cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+                thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
           stats_sw_if_index = sw_if_index0;
           stats_n_packets = 2;
           stats_n_bytes = len0 + len1;
@@ -262,10 +262,10 @@ vxlan_gpe_encap (vlib_main_t * vm,
         {
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-              cpu_index, sw_if_index0, 1, len0);
+              thread_index, sw_if_index0, 1, len0);
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-              cpu_index, sw_if_index1, 1, len1);
+              thread_index, sw_if_index1, 1, len1);
         }
       }
 
@@ -335,7 +335,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len0;
         stats_sw_if_index = sw_if_index0;
@@ -359,7 +359,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
   if (stats_n_packets)
   {
     vlib_increment_combined_counter (
-        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, cpu_index,
+        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, thread_index,
         stats_sw_if_index, stats_n_packets, stats_n_bytes);
     node->runtime_data[0] = stats_sw_if_index;
   }
diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c
index 514b2c99..2acb1f6f 100644
--- a/src/vnet/vxlan/decap.c
+++ b/src/vnet/vxlan/decap.c
@@ -81,7 +81,7 @@ vxlan_input (vlib_main_t * vm,
   vxlan4_tunnel_key_t last_key4;
   vxlan6_tunnel_key_t last_key6;
   u32 pkts_decapsulated = 0;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
 
   if (is_ip4)
@@ -314,7 +314,7 @@ vxlan_input (vlib_main_t * vm,
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len0;
@@ -468,7 +468,7 @@ vxlan_input (vlib_main_t * vm,
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len1;
@@ -674,7 +674,7 @@ vxlan_input (vlib_main_t * vm,
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len0;
@@ -711,7 +711,7 @@ vxlan_input (vlib_main_t * vm,
     {
       vlib_increment_combined_counter 
 	(im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-	 cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
       node->runtime_data[0] = stats_sw_if_index;
     }
 
diff --git a/src/vnet/vxlan/encap.c b/src/vnet/vxlan/encap.c
index 5b63064a..4cfbbc23 100644
--- a/src/vnet/vxlan/encap.c
+++ b/src/vnet/vxlan/encap.c
@@ -77,7 +77,7 @@ vxlan_encap_inline (vlib_main_t * vm,
   vnet_interface_main_t * im = &vnm->interface_main;
   u32 pkts_encapsulated = 0;
   u16 old_l0 = 0, old_l1 = 0;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
   u32 sw_if_index0 = 0, sw_if_index1 = 0;
   u32 next0 = 0, next1 = 0;
@@ -301,7 +301,7 @@ vxlan_encap_inline (vlib_main_t * vm,
 		  if (stats_n_packets) 
 		    vlib_increment_combined_counter 
 		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       cpu_index, stats_sw_if_index, 
+		       thread_index, stats_sw_if_index, 
 		       stats_n_packets, stats_n_bytes);
 		  stats_sw_if_index = sw_if_index0;
 		  stats_n_packets = 2;
@@ -311,10 +311,10 @@ vxlan_encap_inline (vlib_main_t * vm,
 	        {
 		  vlib_increment_combined_counter 
 		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       cpu_index, sw_if_index0, 1, len0);
+		       thread_index, sw_if_index0, 1, len0);
 		  vlib_increment_combined_counter 
 		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       cpu_index, sw_if_index1, 1, len1);
+		       thread_index, sw_if_index1, 1, len1);
 		}
 	    }
 
@@ -464,7 +464,7 @@ vxlan_encap_inline (vlib_main_t * vm,
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len0;
@@ -496,7 +496,7 @@ vxlan_encap_inline (vlib_main_t * vm,
     {
       vlib_increment_combined_counter 
 	(im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-	 cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
       node->runtime_data[0] = stats_sw_if_index;
     }
 
diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c
index 042d02e2..4309cd51 100644
--- a/src/vpp/stats/stats.c
+++ b/src/vpp/stats/stats.c
@@ -66,14 +66,14 @@ _(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters)
 void
 dslock (stats_main_t * sm, int release_hint, int tag)
 {
-  u32 thread_id;
+  u32 thread_index;
   data_structure_lock_t *l = sm->data_structure_lock;
 
   if (PREDICT_FALSE (l == 0))
     return;
 
-  thread_id = os_get_cpu_number ();
-  if (l->lock && l->thread_id == thread_id)
+  thread_index = vlib_get_thread_index ();
+  if (l->lock && l->thread_index == thread_index)
     {
       l->count++;
       return;
@@ -85,7 +85,7 @@ dslock (stats_main_t * sm, int release_hint, int tag)
   while (__sync_lock_test_and_set (&l->lock, 1))
     /* zzzz */ ;
   l->tag = tag;
-  l->thread_id = thread_id;
+  l->thread_index = thread_index;
   l->count = 1;
 }
 
@@ -99,14 +99,14 @@ stats_dslock_with_hint (int hint, int tag)
 void
 dsunlock (stats_main_t * sm)
 {
-  u32 thread_id;
+  u32 thread_index;
   data_structure_lock_t *l = sm->data_structure_lock;
 
   if (PREDICT_FALSE (l == 0))
     return;
 
-  thread_id = os_get_cpu_number ();
-  ASSERT (l->lock && l->thread_id == thread_id);
+  thread_index = vlib_get_thread_index ();
+  ASSERT (l->lock && l->thread_index == thread_index);
   l->count--;
   if (l->count == 0)
     {
diff --git a/src/vpp/stats/stats.h b/src/vpp/stats/stats.h
index 118115be..024dc78e 100644
--- a/src/vpp/stats/stats.h
+++ b/src/vpp/stats/stats.h
@@ -30,7 +30,7 @@ typedef struct
 {
   volatile u32 lock;
   volatile u32 release_hint;
-  u32 thread_id;
+  u32 thread_index;
   u32 count;
   int tag;
 } data_structure_lock_t;
-- 
cgit 1.2.3-korg


From 0f26c5a0138ac86d7ebd197c31a09d8d624c35fe Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Wed, 1 Mar 2017 15:12:11 -0800
Subject: MPLS Mcast

 1 - interface-DPO
        Used in the Data-plane to change a packet's input interface
 2 - MPLS multicast FIB entry
        Same as a unicast entry but it links to a replicate not a load-balance DPO
 3 - Multicast MPLS tunnel
        Update MPLS tunnels to use a FIB path-list to describe the endpoint[s]. Use the path-list to generate the forwarding chain (DPOs) to link to .
 4 - Resolve a path via a local label (of an mLDP LSP)
        For IP multicast entries to use an LSP in the replication list, we need to decribe the 'resolve-via-label' where the label is that of a multicast LSP.
 5 - MPLS disposition path sets RPF-ID
        For a interface-less LSP (i.e. mLDP not RSVP-TE) at the tail of the LSP we still need to perform an RPF check. An MPLS disposition DPO performs the MPLS pop validation checks and sets the RPF-ID in the packet.
 6 - RPF check with per-entry RPF-ID
       An RPF-ID is used instead of a real interface SW if index in the case the IP traffic arrives from an LSP that does not have an associated interface.

Change-Id: Ib92e177be919147bafeb599729abf3d1abc2f4b3
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/plugins/dpdk/device/node.c         |   2 +-
 src/vat/api_format.c                   | 133 ++---
 src/vnet.am                            |   2 +
 src/vnet/adj/adj.c                     |  13 +-
 src/vnet/adj/adj.h                     |   6 +
 src/vnet/adj/adj_internal.h            |  14 +-
 src/vnet/adj/adj_mcast.c               | 134 ++++-
 src/vnet/adj/adj_mcast.h               |  27 +
 src/vnet/adj/adj_midchain.c            |  62 ++-
 src/vnet/adj/adj_nbr.c                 |   2 -
 src/vnet/buffer.h                      |   3 +
 src/vnet/devices/ssvm/node.c           |   2 +-
 src/vnet/dhcp/dhcp6_proxy_node.c       |   1 +
 src/vnet/dpo/dpo.c                     |  10 +
 src/vnet/dpo/dpo.h                     |   8 +-
 src/vnet/dpo/interface_dpo.c           | 416 ++++++++++++++++
 src/vnet/dpo/interface_dpo.h           |  67 +++
 src/vnet/dpo/lookup_dpo.c              | 211 +++++++-
 src/vnet/dpo/lookup_dpo.h              |  20 +
 src/vnet/dpo/mpls_disposition.c        | 364 ++++++++++++++
 src/vnet/dpo/mpls_disposition.h        |  85 ++++
 src/vnet/dpo/mpls_label_dpo.c          |   6 +-
 src/vnet/dpo/replicate_dpo.c           |  48 +-
 src/vnet/dpo/replicate_dpo.h           |   2 +
 src/vnet/ethernet/arp.c                |   1 +
 src/vnet/ethernet/interface.c          |   2 +-
 src/vnet/ethernet/node.c               |   4 +-
 src/vnet/ethernet/types.def            |   4 +-
 src/vnet/fib/fib_api.h                 |   4 +
 src/vnet/fib/fib_entry.c               |  47 +-
 src/vnet/fib/fib_entry.h               |  13 +-
 src/vnet/fib/fib_entry_src.c           | 154 +++---
 src/vnet/fib/fib_internal.h            |   1 +
 src/vnet/fib/fib_path.c                | 222 +++++++--
 src/vnet/fib/fib_path.h                |  17 +-
 src/vnet/fib/fib_path_ext.c            |   4 +-
 src/vnet/fib/fib_path_ext.h            |   3 +-
 src/vnet/fib/fib_path_list.c           | 270 ++++++----
 src/vnet/fib/fib_path_list.h           |  22 +-
 src/vnet/fib/fib_table.c               |  47 +-
 src/vnet/fib/fib_test.c                | 345 +++++++++++--
 src/vnet/fib/fib_test.h                | 111 +++++
 src/vnet/fib/fib_types.c               |  15 +-
 src/vnet/fib/fib_types.h               |  60 ++-
 src/vnet/fib/mpls_fib.c                |  15 +-
 src/vnet/handoff.h                     |  10 +-
 src/vnet/interface.c                   |   2 +-
 src/vnet/ip/ip.api                     |   3 +
 src/vnet/ip/ip4_forward.c              |  20 +
 src/vnet/ip/ip6_forward.c              |  23 +
 src/vnet/ip/ip6_neighbor.c             |   1 +
 src/vnet/ip/ip_api.c                   |  98 ++--
 src/vnet/ip/lookup.c                   |   3 +-
 src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c |   1 +
 src/vnet/mfib/ip4_mfib.c               |   1 +
 src/vnet/mfib/ip6_mfib.c               |   1 +
 src/vnet/mfib/mfib_entry.c             | 395 +++++++++------
 src/vnet/mfib/mfib_entry.h             |  20 +-
 src/vnet/mfib/mfib_forward.c           |  29 +-
 src/vnet/mfib/mfib_table.c             |   8 +-
 src/vnet/mfib/mfib_table.h             |   1 +
 src/vnet/mfib/mfib_test.c              | 127 ++++-
 src/vnet/mpls/mpls.api                 |  87 ++--
 src/vnet/mpls/mpls.c                   |  17 +-
 src/vnet/mpls/mpls_api.c               |  97 ++--
 src/vnet/mpls/mpls_input.c             |   2 +-
 src/vnet/mpls/mpls_lookup.c            | 236 ++++++---
 src/vnet/mpls/mpls_tunnel.c            | 883 ++++++++++++++++++++++-----------
 src/vnet/mpls/mpls_tunnel.h            |  57 ++-
 src/vnet/mpls/mpls_types.h             |  20 +
 src/vnet/srp/interface.c               |   2 +-
 test/test_ip_mcast.py                  |   1 +
 test/test_mpls.py                      | 277 ++++++++++-
 test/vpp_ip_route.py                   |  38 +-
 test/vpp_mpls_tunnel_interface.py      |  46 ++
 test/vpp_papi_provider.py              |  16 +-
 76 files changed, 4393 insertions(+), 1128 deletions(-)
 create mode 100644 src/vnet/dpo/interface_dpo.c
 create mode 100644 src/vnet/dpo/interface_dpo.h
 create mode 100644 src/vnet/dpo/mpls_disposition.c
 create mode 100644 src/vnet/dpo/mpls_disposition.h
 create mode 100644 src/vnet/fib/fib_test.h
 create mode 100644 test/vpp_mpls_tunnel_interface.py

(limited to 'src/vnet/dpo')

diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index b10e0fad..0549ba5d 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -52,7 +52,7 @@ always_inline int
 vlib_buffer_is_mpls (vlib_buffer_t * b)
 {
   ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b);
-  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
+  return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS));
 }
 
 always_inline u32
diff --git a/src/vat/api_format.c b/src/vat/api_format.c
index 61b8e1d8..107aa012 100644
--- a/src/vat/api_format.c
+++ b/src/vat/api_format.c
@@ -16369,32 +16369,82 @@ api_netmap_delete (vat_main_t * vam)
   return ret;
 }
 
-static void vl_api_mpls_tunnel_details_t_handler
-  (vl_api_mpls_tunnel_details_t * mp)
+static void
+vl_api_mpls_fib_path_print (vat_main_t * vam, vl_api_fib_path2_t * fp)
+{
+  if (fp->afi == IP46_TYPE_IP6)
+    print (vam->ofp,
+	   "  weight %d, sw_if_index %d, is_local %d, is_drop %d, "
+	   "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
+	   ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
+	   fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
+	   format_ip6_address, fp->next_hop);
+  else if (fp->afi == IP46_TYPE_IP4)
+    print (vam->ofp,
+	   "  weight %d, sw_if_index %d, is_local %d, is_drop %d, "
+	   "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
+	   ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
+	   fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
+	   format_ip4_address, fp->next_hop);
+}
+
+static void
+vl_api_mpls_fib_path_json_print (vat_json_node_t * node,
+				 vl_api_fib_path2_t * fp)
+{
+  struct in_addr ip4;
+  struct in6_addr ip6;
+
+  vat_json_object_add_uint (node, "weight", ntohl (fp->weight));
+  vat_json_object_add_uint (node, "sw_if_index", ntohl (fp->sw_if_index));
+  vat_json_object_add_uint (node, "is_local", fp->is_local);
+  vat_json_object_add_uint (node, "is_drop", fp->is_drop);
+  vat_json_object_add_uint (node, "is_unreach", fp->is_unreach);
+  vat_json_object_add_uint (node, "is_prohibit", fp->is_prohibit);
+  vat_json_object_add_uint (node, "next_hop_afi", fp->afi);
+  if (fp->afi == IP46_TYPE_IP4)
+    {
+      clib_memcpy (&ip4, &fp->next_hop, sizeof (ip4));
+      vat_json_object_add_ip4 (node, "next_hop", ip4);
+    }
+  else if (fp->afi == IP46_TYPE_IP6)
+    {
+      clib_memcpy (&ip6, &fp->next_hop, sizeof (ip6));
+      vat_json_object_add_ip6 (node, "next_hop", ip6);
+    }
+}
+
+static void
+vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_tunnel_details_t * mp)
 {
   vat_main_t *vam = &vat_main;
-  i32 len = mp->mt_next_hop_n_labels;
+  int count = ntohl (mp->mt_count);
+  vl_api_fib_path2_t *fp;
   i32 i;
 
-  print (vam->ofp, "[%d]: via %U %d labels ",
-	 mp->tunnel_index,
-	 format_ip4_address, mp->mt_next_hop,
-	 ntohl (mp->mt_next_hop_sw_if_index));
-  for (i = 0; i < len; i++)
+  print (vam->ofp, "[%d]: sw_if_index %d via:",
+	 ntohl (mp->mt_tunnel_index), ntohl (mp->mt_sw_if_index));
+  fp = mp->mt_paths;
+  for (i = 0; i < count; i++)
     {
-      print (vam->ofp, "%u ", ntohl (mp->mt_next_hop_out_labels[i]));
+      vl_api_mpls_fib_path_print (vam, fp);
+      fp++;
     }
+
   print (vam->ofp, "");
 }
 
-static void vl_api_mpls_tunnel_details_t_handler_json
-  (vl_api_mpls_tunnel_details_t * mp)
+#define vl_api_mpls_tunnel_details_t_endian vl_noop_handler
+#define vl_api_mpls_tunnel_details_t_print vl_noop_handler
+
+static void
+vl_api_mpls_tunnel_details_t_handler_json (vl_api_mpls_tunnel_details_t * mp)
 {
   vat_main_t *vam = &vat_main;
   vat_json_node_t *node = NULL;
-  struct in_addr ip4;
+  int count = ntohl (mp->mt_count);
+  vl_api_fib_path2_t *fp;
   i32 i;
-  i32 len = mp->mt_next_hop_n_labels;
 
   if (VAT_JSON_ARRAY != vam->json_tree.type)
     {
@@ -16404,17 +16454,17 @@ static void vl_api_mpls_tunnel_details_t_handler_json
   node = vat_json_array_add (&vam->json_tree);
 
   vat_json_init_object (node);
-  vat_json_object_add_uint (node, "tunnel_index", ntohl (mp->tunnel_index));
-  clib_memcpy (&ip4, &(mp->mt_next_hop), sizeof (ip4));
-  vat_json_object_add_ip4 (node, "next_hop", ip4);
-  vat_json_object_add_uint (node, "next_hop_sw_if_index",
-			    ntohl (mp->mt_next_hop_sw_if_index));
-  vat_json_object_add_uint (node, "l2_only", ntohl (mp->mt_l2_only));
-  vat_json_object_add_uint (node, "label_count", len);
-  for (i = 0; i < len; i++)
+  vat_json_object_add_uint (node, "tunnel_index",
+			    ntohl (mp->mt_tunnel_index));
+  vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->mt_sw_if_index));
+
+  vat_json_object_add_uint (node, "l2_only", mp->mt_l2_only);
+
+  fp = mp->mt_paths;
+  for (i = 0; i < count; i++)
     {
-      vat_json_object_add_uint (node, "label",
-				ntohl (mp->mt_next_hop_out_labels[i]));
+      vl_api_mpls_fib_path_json_print (node, fp);
+      fp++;
     }
 }
 
@@ -16453,6 +16503,7 @@ api_mpls_tunnel_dump (vat_main_t * vam)
 #define vl_api_mpls_fib_details_t_endian vl_noop_handler
 #define vl_api_mpls_fib_details_t_print vl_noop_handler
 
+
 static void
 vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp)
 {
@@ -16467,20 +16518,7 @@ vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp)
   fp = mp->path;
   for (i = 0; i < count; i++)
     {
-      if (fp->afi == IP46_TYPE_IP6)
-	print (vam->ofp,
-	       "  weight %d, sw_if_index %d, is_local %d, is_drop %d, "
-	       "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
-	       ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
-	       fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
-	       format_ip6_address, fp->next_hop);
-      else if (fp->afi == IP46_TYPE_IP4)
-	print (vam->ofp,
-	       "  weight %d, sw_if_index %d, is_local %d, is_drop %d, "
-	       "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U",
-	       ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local,
-	       fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi,
-	       format_ip4_address, fp->next_hop);
+      vl_api_mpls_fib_path_print (vam, fp);
       fp++;
     }
 }
@@ -16491,8 +16529,6 @@ static void vl_api_mpls_fib_details_t_handler_json
   vat_main_t *vam = &vat_main;
   int count = ntohl (mp->count);
   vat_json_node_t *node = NULL;
-  struct in_addr ip4;
-  struct in6_addr ip6;
   vl_api_fib_path2_t *fp;
   int i;
 
@@ -16511,23 +16547,8 @@ static void vl_api_mpls_fib_details_t_handler_json
   fp = mp->path;
   for (i = 0; i < count; i++)
     {
-      vat_json_object_add_uint (node, "weight", ntohl (fp->weight));
-      vat_json_object_add_uint (node, "sw_if_index", ntohl (fp->sw_if_index));
-      vat_json_object_add_uint (node, "is_local", fp->is_local);
-      vat_json_object_add_uint (node, "is_drop", fp->is_drop);
-      vat_json_object_add_uint (node, "is_unreach", fp->is_unreach);
-      vat_json_object_add_uint (node, "is_prohibit", fp->is_prohibit);
-      vat_json_object_add_uint (node, "next_hop_afi", fp->afi);
-      if (fp->afi == IP46_TYPE_IP4)
-	{
-	  clib_memcpy (&ip4, &fp->next_hop, sizeof (ip4));
-	  vat_json_object_add_ip4 (node, "next_hop", ip4);
-	}
-      else if (fp->afi == IP46_TYPE_IP6)
-	{
-	  clib_memcpy (&ip6, &fp->next_hop, sizeof (ip6));
-	  vat_json_object_add_ip6 (node, "next_hop", ip6);
-	}
+      vl_api_mpls_fib_path_json_print (node, fp);
+      fp++;
     }
 }
 
diff --git a/src/vnet.am b/src/vnet.am
index 643ae92e..bed4902b 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -990,6 +990,8 @@ libvnet_la_SOURCES +=				\
   vnet/dpo/lookup_dpo.c   			\
   vnet/dpo/classify_dpo.c   			\
   vnet/dpo/replicate_dpo.c   			\
+  vnet/dpo/interface_dpo.c   			\
+  vnet/dpo/mpls_disposition.c   		\
   vnet/dpo/mpls_label_dpo.c
 
 nobase_include_HEADERS +=			\
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index 90182006..36dfe500 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -67,6 +67,10 @@ adj_alloc (fib_protocol_t proto)
     adj->lookup_next_index = 0;
     adj->ia_delegates = NULL;
 
+    /* lest it become a midchain in the future */
+    memset(&adj->sub_type.midchain.next_dpo, 0,
+           sizeof(adj->sub_type.midchain.next_dpo));
+
     ip4_main.lookup_main.adjacency_heap = adj_pool;
     ip6_main.lookup_main.adjacency_heap = adj_pool;
 
@@ -118,6 +122,9 @@ format_ip_adjacency (u8 * s, va_list * args)
     case IP_LOOKUP_NEXT_MCAST:
 	s = format (s, "%U", format_adj_mcast, adj_index, 0);
 	break;
+    case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+	s = format (s, "%U", format_adj_mcast_midchain, adj_index, 0);
+	break;
     default:
 	break;
     }
@@ -180,6 +187,7 @@ adj_last_lock_gone (ip_adjacency_t *adj)
 			 adj->rewrite_header.sw_if_index);
 	break;
     case IP_LOOKUP_NEXT_MCAST:
+    case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
 	adj_mcast_remove(adj->ia_nh_proto,
 			 adj->rewrite_header.sw_if_index);
 	break;
@@ -338,6 +346,7 @@ adj_walk (u32 sw_if_index,
     FOR_EACH_FIB_IP_PROTOCOL(proto)
     {
         adj_nbr_walk(sw_if_index, proto, cb, ctx);
+        adj_mcast_walk(sw_if_index, proto, cb, ctx);
     }
 }
 
@@ -544,9 +553,9 @@ adj_show (vlib_main_t * vm,
  * [@0]
  * [@1]  glean: loop0
  * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
- * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
  * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
- * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
  * @cliexend
  ?*/
 VLIB_CLI_COMMAND (adj_show_command, static) = {
diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h
index 32997c91..ed5eb1f1 100644
--- a/src/vnet/adj/adj.h
+++ b/src/vnet/adj/adj.h
@@ -81,6 +81,10 @@ typedef enum
   /** Multicast Adjacency. */
   IP_LOOKUP_NEXT_MCAST,
 
+  /** Multicast Midchain Adjacency. An Adjacency for sending macst packets
+   *  on a tunnel/virtual interface */
+  IP_LOOKUP_NEXT_MCAST_MIDCHAIN,
+
   IP_LOOKUP_N_NEXT,
 } __attribute__ ((packed)) ip_lookup_next_t;
 
@@ -107,6 +111,7 @@ typedef enum
     [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite",    		\
     [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast",	        \
     [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain",		        \
+    [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip4-mcast-midchain",     \
     [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error",		\
 }
 
@@ -119,6 +124,7 @@ typedef enum
     [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",			\
     [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast",		\
     [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain",			\
+    [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip6-mcast-midchain",     \
     [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error",		\
     [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",		\
     [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",	\
diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h
index 30668625..2c123c54 100644
--- a/src/vnet/adj/adj_internal.h
+++ b/src/vnet/adj/adj_internal.h
@@ -17,6 +17,7 @@
 #define __ADJ_INTERNAL_H__
 
 #include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
 #include <vnet/ip/ip.h>
 #include <vnet/mpls/mpls.h>
 #include <vnet/adj/adj_l2.h>
@@ -87,11 +88,14 @@ adj_get_index (ip_adjacency_t *adj)
     return (adj - adj_pool);
 }
 
-extern void adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
-					     ip_lookup_next_t adj_next_index,
-					     u32 complete_next_index,
-					     u32 next_index,
-					     u8 *rewrite);
+extern void adj_nbr_update_rewrite_internal(ip_adjacency_t *adj,
+                                            ip_lookup_next_t adj_next_index,
+                                            u32 complete_next_index,
+                                            u32 next_index,
+                                            u8 *rewrite);
+extern void adj_midchain_setup(adj_index_t adj_index,
+                               adj_midchain_fixup_t fixup,
+                               adj_flags_t flags);
 
 extern ip_adjacency_t * adj_alloc(fib_protocol_t proto);
 
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
index 4f678e43..755abfd4 100644
--- a/src/vnet/adj/adj_mcast.c
+++ b/src/vnet/adj/adj_mcast.c
@@ -13,7 +13,7 @@
  * limitations under the License.
  */
 
-#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
 #include <vnet/adj/adj_internal.h>
 #include <vnet/fib/fib_walk.h>
 #include <vnet/ip/ip.h>
@@ -129,6 +129,59 @@ adj_mcast_update_rewrite (adj_index_t adj_index,
     adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask);
 }
 
+/**
+ * adj_mcast_midchain_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_mcast_midchain_update_rewrite (adj_index_t adj_index,
+                                   adj_midchain_fixup_t fixup,
+                                   adj_flags_t flags,
+                                   u8 *rewrite,
+                                   u8 offset,
+                                   u32 mask)
+{
+    ip_adjacency_t *adj;
+
+    ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+    adj = adj_get(adj_index);
+
+    /*
+     * one time only update. since we don't support chainging the tunnel
+     * src,dst, this is all we need.
+     */
+    ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_MCAST);
+    /*
+     * tunnels can always provide a rewrite.
+     */
+    ASSERT(NULL != rewrite);
+
+    adj_midchain_setup(adj_index, fixup, flags);
+
+    /*
+     * update the adj's rewrite string and build the arc
+     * from the rewrite node to the interface's TX node
+     */
+    adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST_MIDCHAIN,
+                                    adj_get_mcast_node(adj->ia_nh_proto),
+                                    vnet_tx_node_index_for_sw_interface(
+                                        vnet_get_main(),
+                                        adj->rewrite_header.sw_if_index),
+                                    rewrite);
+
+    /*
+     * set the fields corresponding to the mcast IP address rewrite
+     * The mask must be stored in network byte order, since the packet's
+     * IP address will also be in network order.
+     */
+    adj->rewrite_header.dst_mcast_offset = offset;
+    adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask);
+}
+
 void
 adj_mcast_remove (fib_protocol_t proto,
 		  u32 sw_if_index)
@@ -260,6 +313,24 @@ adj_mcast_interface_delete (vnet_main_t * vnm,
 
 VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_mcast_interface_delete);
 
+/**
+ * @brief Walk the multicast Adjacencies on a given interface
+ */
+void
+adj_mcast_walk (u32 sw_if_index,
+                fib_protocol_t proto,
+                adj_walk_cb_t cb,
+                void *ctx)
+{
+    if (vec_len(adj_mcasts[proto]) > sw_if_index)
+    {
+        if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index])
+        {
+            cb(adj_mcasts[proto][sw_if_index], ctx);
+        }
+    }
+}
+
 u8*
 format_adj_mcast (u8* s, va_list *ap)
 {
@@ -269,6 +340,8 @@ format_adj_mcast (u8* s, va_list *ap)
 
     s = format(s, "%U-mcast: ",
                format_fib_protocol, adj->ia_nh_proto);
+    if (adj->rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)
+        s = format(s, "[features] ");
     s = format (s, "%U",
 		format_vnet_rewrite,
                 &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
@@ -276,6 +349,28 @@ format_adj_mcast (u8* s, va_list *ap)
     return (s);
 }
 
+u8*
+format_adj_mcast_midchain (u8* s, va_list *ap)
+{
+    index_t index = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    vnet_main_t * vnm = vnet_get_main();
+    ip_adjacency_t * adj = adj_get(index);
+
+    s = format(s, "%U-mcast-midchain: ",
+               format_fib_protocol, adj->ia_nh_proto);
+    s = format (s, "%U",
+		format_vnet_rewrite,
+		vnm->vlib_main, &adj->rewrite_header,
+                sizeof (adj->rewrite_data), 0);
+    s = format (s, "\n%Ustacked-on:\n%U%U",
+		format_white_space, indent,
+		format_white_space, indent+2,
+		format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
+
+    return (s);
+}
+
 
 static void
 adj_dpo_lock (dpo_id_t *dpo)
@@ -293,6 +388,11 @@ const static dpo_vft_t adj_mcast_dpo_vft = {
     .dv_unlock = adj_dpo_unlock,
     .dv_format = format_adj_mcast,
 };
+const static dpo_vft_t adj_mcast_midchain_dpo_vft = {
+    .dv_lock = adj_dpo_lock,
+    .dv_unlock = adj_dpo_unlock,
+    .dv_format = format_adj_mcast_midchain,
+};
 
 /**
  * @brief The per-protocol VLIB graph nodes that are assigned to a mcast
@@ -319,6 +419,31 @@ const static char* const * const adj_mcast_nodes[DPO_PROTO_NUM] =
     [DPO_PROTO_MPLS] = NULL,
 };
 
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a mcast
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a mcast is the
+ * parent object in the DPO-graph.
+ */
+const static char* const adj_mcast_midchain_ip4_nodes[] =
+{
+    "ip4-mcast-midchain",
+    NULL,
+};
+const static char* const adj_mcast_midchain_ip6_nodes[] =
+{
+    "ip6-mcast-midchain",
+    NULL,
+};
+
+const static char* const * const adj_mcast_midchain_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = adj_mcast_midchain_ip4_nodes,
+    [DPO_PROTO_IP6]  = adj_mcast_midchain_ip6_nodes,
+    [DPO_PROTO_MPLS] = NULL,
+};
+
 /**
  * @brief Return the size of the adj DB.
  * This is only for testing purposes so an efficient implementation is not needed
@@ -349,5 +474,10 @@ adj_mcast_db_size (void)
 void
 adj_mcast_module_init (void)
 {
-    dpo_register(DPO_ADJACENCY_MCAST, &adj_mcast_dpo_vft, adj_mcast_nodes);
+    dpo_register(DPO_ADJACENCY_MCAST,
+                 &adj_mcast_dpo_vft,
+                 adj_mcast_nodes);
+    dpo_register(DPO_ADJACENCY_MCAST_MIDCHAIN,
+                 &adj_mcast_midchain_dpo_vft,
+                 adj_mcast_midchain_nodes);
 }
diff --git a/src/vnet/adj/adj_mcast.h b/src/vnet/adj/adj_mcast.h
index 40d44313..bfb0d6f6 100644
--- a/src/vnet/adj/adj_mcast.h
+++ b/src/vnet/adj/adj_mcast.h
@@ -26,6 +26,7 @@
 #define __ADJ_MCAST_H__
 
 #include <vnet/adj/adj_types.h>
+#include <vnet/adj/adj_midchain.h>
 
 /**
  * @brief
@@ -68,10 +69,36 @@ extern void adj_mcast_update_rewrite(adj_index_t adj_index,
                                      u8 offset,
                                      u32 mask);
 
+/**
+ * @brief
+ *  Update the rewrite string for an existing adjacecny and
+ *  Convert the adjacency into a midchain
+ *
+ * @param
+ *  The index of the adj to update
+ *
+ * @param
+ *  The new rewrite
+ */
+extern void adj_mcast_midchain_update_rewrite(adj_index_t adj_index,
+                                              adj_midchain_fixup_t fixup,
+                                              adj_flags_t flags,
+                                              u8 *rewrite,
+                                              u8 offset,
+                                              u32 mask);
+/**
+ * @brief Walk the multicast Adjacencies on a given interface
+ */
+extern void adj_mcast_walk (u32 sw_if_index,
+                            fib_protocol_t adj_nh_proto,
+                            adj_walk_cb_t cb,
+                            void *ctx);
+
 /**
  * @brief Format/display a mcast adjacency.
  */
 extern u8* format_adj_mcast(u8* s, va_list *ap);
+extern u8* format_adj_mcast_midchain(u8* s, va_list *ap);
 
 /**
  * @brief Get the sze of the mcast adj DB. Test purposes only.
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index 5756de43..a93a1c3e 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -346,7 +346,7 @@ adj_get_midchain_node (vnet_link_t link)
 static u8
 adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
 {
-  u8 arc = (u8) ~0;
+    u8 arc = (u8) ~0;
     switch (adj->ia_link)
     {
     case VNET_LINK_IP4:
@@ -393,17 +393,14 @@ adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
 }
 
 /**
- * adj_nbr_midchain_update_rewrite
+ * adj_midchain_setup
  *
- * Update the adjacency's rewrite string. A NULL string implies the
- * rewrite is reset (i.e. when ARP/ND etnry is gone).
- * NB: the adj being updated may be handling traffic in the DP.
+ * Setup the adj as a mid-chain
  */
 void
-adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
-				 adj_midchain_fixup_t fixup,
-				 adj_flags_t flags,
-				 u8 *rewrite)
+adj_midchain_setup (adj_index_t adj_index,
+                    adj_midchain_fixup_t fixup,
+                    adj_flags_t flags)
 {
     u32 feature_index, tx_node;
     ip_adjacency_t *adj;
@@ -413,16 +410,6 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
 
     adj = adj_get(adj_index);
 
-    /*
-     * one time only update. since we don't support chainging the tunnel
-     * src,dst, this is all we need.
-     */
-    ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP);
-    /*
-     * tunnels can always provide a rewrite.
-     */
-    ASSERT(NULL != rewrite);
-
     adj->sub_type.midchain.fixup_func = fixup;
     adj->ia_flags |= flags;
 
@@ -447,6 +434,38 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
     dpo_stack_from_node(tx_node,
 			&adj->sub_type.midchain.next_dpo,
 			drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+}
+
+/**
+ * adj_nbr_midchain_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewrite is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
+				 adj_midchain_fixup_t fixup,
+				 adj_flags_t flags,
+				 u8 *rewrite)
+{
+    ip_adjacency_t *adj;
+
+    ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+    adj = adj_get(adj_index);
+
+    /*
+     * one time only update. since we don't support chainging the tunnel
+     * src,dst, this is all we need.
+     */
+    ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP);
+    /*
+     * tunnels can always provide a rewrite.
+     */
+    ASSERT(NULL != rewrite);
+
+    adj_midchain_setup(adj_index, fixup, flags);
 
     /*
      * update the rewirte with the workers paused.
@@ -454,7 +473,7 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
     adj_nbr_update_rewrite_internal(adj,
 				    IP_LOOKUP_NEXT_MIDCHAIN,
 				    adj_get_midchain_node(adj->ia_link),
-				    tx_node,
+				    adj_nbr_midchain_get_tx_node(adj),
 				    rewrite);
 }
 
@@ -496,7 +515,8 @@ adj_nbr_midchain_stack (adj_index_t adj_index,
 
     adj = adj_get(adj_index);
 
-    ASSERT(IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index);
+    ASSERT((IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index) ||
+           (IP_LOOKUP_NEXT_MCAST_MIDCHAIN == adj->lookup_next_index));
 
     dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj),
 			&adj->sub_type.midchain.next_dpo,
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
index ddacb030..3d450d1f 100644
--- a/src/vnet/adj/adj_nbr.c
+++ b/src/vnet/adj/adj_nbr.c
@@ -195,8 +195,6 @@ adj_nbr_alloc (fib_protocol_t nh_proto,
     adj->ia_link = link_type;
     adj->ia_nh_proto = nh_proto;
     adj->rewrite_header.sw_if_index = sw_if_index;
-    memset(&adj->sub_type.midchain.next_dpo, 0,
-           sizeof(adj->sub_type.midchain.next_dpo));
 
     adj_nbr_evaluate_feature (adj_get_index(adj));
     return (adj);
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index ea3ce093..ed869d1f 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -130,6 +130,9 @@ typedef struct
 
 	  /* Rewrite length */
 	  u32 save_rewrite_length;
+
+	  /* MFIB RPF ID */
+	  u32 rpf_id;
 	};
 
 	/* ICMP */
diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c
index 539b4161..b7a8db05 100644
--- a/src/vnet/devices/ssvm/node.c
+++ b/src/vnet/devices/ssvm/node.c
@@ -210,7 +210,7 @@ ssvm_eth_device_input (ssvm_eth_main_t * em,
 	    next0 = SSVM_ETH_INPUT_NEXT_IP4_INPUT;
 	  else if (type0 == ETHERNET_TYPE_IP6)
 	    next0 = SSVM_ETH_INPUT_NEXT_IP6_INPUT;
-	  else if (type0 == ETHERNET_TYPE_MPLS_UNICAST)
+	  else if (type0 == ETHERNET_TYPE_MPLS)
 	    next0 = SSVM_ETH_INPUT_NEXT_MPLS_INPUT;
 
 	  l3_offset0 = ((next0 == SSVM_ETH_INPUT_NEXT_IP4_INPUT ||
diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c
index 524cb095..de73154d 100644
--- a/src/vnet/dhcp/dhcp6_proxy_node.c
+++ b/src/vnet/dhcp/dhcp6_proxy_node.c
@@ -883,6 +883,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr,
          mfib_table_entry_update(rx_fib_index,
                                  &all_dhcp_servers,
                                  MFIB_SOURCE_DHCP,
+                                 MFIB_RPF_ID_NONE,
                                  MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
          mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6);
      }
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index d8e075a7..dfc2bd92 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -37,6 +37,8 @@
 #include <vnet/dpo/classify_dpo.h>
 #include <vnet/dpo/ip_null_dpo.h>
 #include <vnet/dpo/replicate_dpo.h>
+#include <vnet/dpo/interface_dpo.h>
+#include <vnet/dpo/mpls_disposition.h>
 
 /**
  * Array of char* names for the DPO types and protos
@@ -182,6 +184,12 @@ dpo_set (dpo_id_t *dpo,
 	case IP_LOOKUP_NEXT_MIDCHAIN:
 	    dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN;
 	    break;
+	case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+	    dpo->dpoi_type = DPO_ADJACENCY_MCAST_MIDCHAIN;
+	    break;
+	case IP_LOOKUP_NEXT_MCAST:
+	    dpo->dpoi_type = DPO_ADJACENCY_MCAST;
+	    break;
 	default:
 	    break;
 	}
@@ -453,6 +461,8 @@ dpo_module_init (vlib_main_t * vm)
     lookup_dpo_module_init();
     ip_null_dpo_module_init();
     replicate_module_init();
+    interface_dpo_module_init();
+    mpls_disp_dpo_module_init();
 
     return (NULL);
 }
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
index 48b92d3d..5aa4e2d2 100644
--- a/src/vnet/dpo/dpo.h
+++ b/src/vnet/dpo/dpo.h
@@ -108,12 +108,15 @@ typedef enum dpo_type_t_ {
     DPO_ADJACENCY_MIDCHAIN,
     DPO_ADJACENCY_GLEAN,
     DPO_ADJACENCY_MCAST,
+    DPO_ADJACENCY_MCAST_MIDCHAIN,
     DPO_RECEIVE,
     DPO_LOOKUP,
     DPO_LISP_CP,
     DPO_CLASSIFY,
     DPO_MPLS_LABEL,
+    DPO_MPLS_DISPOSITION,
     DPO_MFIB_ENTRY,
+    DPO_INTERFACE,
     DPO_LAST,
 } __attribute__((packed)) dpo_type_t;
 
@@ -129,6 +132,7 @@ typedef enum dpo_type_t_ {
     [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin",	\
     [DPO_ADJACENCY_GLEAN] = "dpo-glean",	\
     [DPO_ADJACENCY_MCAST] = "dpo-adj-mcast",	\
+    [DPO_ADJACENCY_MCAST_MIDCHAIN] = "dpo-adj-mcast-midchain",	\
     [DPO_RECEIVE] = "dpo-receive",	\
     [DPO_LOOKUP] = "dpo-lookup",	\
     [DPO_LOAD_BALANCE] = "dpo-load-balance",	\
@@ -136,7 +140,9 @@ typedef enum dpo_type_t_ {
     [DPO_LISP_CP] = "dpo-lisp-cp",	\
     [DPO_CLASSIFY] = "dpo-classify",	\
     [DPO_MPLS_LABEL] = "dpo-mpls-label", \
-    [DPO_MFIB_ENTRY] = "dpo-mfib_entry"	\
+    [DPO_MPLS_DISPOSITION] = "dpo-mpls-diposition", \
+    [DPO_MFIB_ENTRY] = "dpo-mfib_entry", \
+    [DPO_INTERFACE] = "dpo-interface"	\
 }
 
 /**
diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c
new file mode 100644
index 00000000..50ca756f
--- /dev/null
+++ b/src/vnet/dpo/interface_dpo.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dpo/interface_dpo.h>
+#include <vnet/fib/fib_node.h>
+
+/*
+ * The 'DB' of interface DPOs.
+ * There is only one  per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static index_t *interface_dpo_db[DPO_PROTO_NUM];
+
+static interface_dpo_t *
+interface_dpo_alloc (void)
+{
+    interface_dpo_t *ido;
+
+    pool_get(interface_dpo_pool, ido);
+
+    return (ido);
+}
+
+static inline interface_dpo_t *
+interface_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+    ASSERT(DPO_INTERFACE == dpo->dpoi_type);
+
+    return (interface_dpo_get(dpo->dpoi_index));
+}
+
+static inline index_t
+interface_dpo_get_index (interface_dpo_t *ido)
+{
+    return (ido - interface_dpo_pool);
+}
+
+static void
+interface_dpo_lock (dpo_id_t *dpo)
+{
+    interface_dpo_t *ido;
+
+    ido = interface_dpo_get_from_dpo(dpo);
+    ido->ido_locks++;
+}
+
+static void
+interface_dpo_unlock (dpo_id_t *dpo)
+{
+    interface_dpo_t *ido;
+
+    ido = interface_dpo_get_from_dpo(dpo);
+    ido->ido_locks--;
+
+    if (0 == ido->ido_locks)
+    {
+	interface_dpo_db[ido->ido_proto][ido->ido_sw_if_index] =
+            INDEX_INVALID;
+        pool_put(interface_dpo_pool, ido);
+    }
+}
+
+/*
+ * interface_dpo_add_or_lock
+ *
+ * Add/create and lock a new or lock an existing for the interface DPO
+ * on the interface and protocol given
+ */
+void
+interface_dpo_add_or_lock (dpo_proto_t proto,
+                           u32 sw_if_index,
+                           dpo_id_t *dpo)
+{
+    interface_dpo_t *ido;
+
+    vec_validate_init_empty(interface_dpo_db[proto],
+                            sw_if_index,
+                            INDEX_INVALID);
+
+    if (INDEX_INVALID == interface_dpo_db[proto][sw_if_index])
+    {
+	ido = interface_dpo_alloc();
+
+        ido->ido_sw_if_index = sw_if_index;
+        ido->ido_proto = proto;
+
+	interface_dpo_db[proto][sw_if_index] =
+            interface_dpo_get_index(ido);
+    }
+    else
+    {
+	ido = interface_dpo_get(interface_dpo_db[proto][sw_if_index]);
+    }
+
+    dpo_set(dpo, DPO_INTERFACE, proto, interface_dpo_get_index(ido));
+}
+
+
+static clib_error_t *
+interface_dpo_interface_state_change (vnet_main_t * vnm,
+                                      u32 sw_if_index,
+                                      u32 flags)
+{
+    /*
+     */
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(
+    interface_dpo_interface_state_change);
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+interface_dpo_hw_interface_state_change (vnet_main_t * vnm,
+                                         u32 hw_if_index,
+                                         u32 flags)
+{
+    return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+    interface_dpo_hw_interface_state_change);
+
+static clib_error_t *
+interface_dpo_interface_delete (vnet_main_t * vnm,
+                                u32 sw_if_index,
+                                u32 is_add)
+{
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(
+    interface_dpo_interface_delete);
+
+u8*
+format_interface_dpo (u8* s, va_list *ap)
+{
+    index_t index = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    vnet_main_t * vnm = vnet_get_main();
+    interface_dpo_t *ido = interface_dpo_get(index);
+
+    return (format(s, "%U-dpo: %U",
+                   format_vnet_sw_interface_name,
+                   vnm,
+                   vnet_get_sw_interface(vnm, ido->ido_sw_if_index),
+                   format_dpo_proto, ido->ido_proto));
+}
+
+static void
+interface_dpo_mem_show (void)
+{
+    fib_show_memory_usage("Interface",
+			  pool_elts(interface_dpo_pool),
+			  pool_len(interface_dpo_pool),
+			  sizeof(interface_dpo_t));
+}
+
+
+const static dpo_vft_t interface_dpo_vft = {
+    .dv_lock = interface_dpo_lock,
+    .dv_unlock = interface_dpo_unlock,
+    .dv_format = format_interface_dpo,
+    .dv_mem_show = interface_dpo_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a glean
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a glean is the
+ * parent object in the DPO-graph.
+ */
+const static char* const interface_dpo_ip4_nodes[] =
+{
+    "interface-dpo-ip4",
+    NULL,
+};
+const static char* const interface_dpo_ip6_nodes[] =
+{
+    "interface-dpo-ip4",
+    NULL,
+};
+
+const static char* const * const interface_dpo_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = interface_dpo_ip4_nodes,
+    [DPO_PROTO_IP6]  = interface_dpo_ip6_nodes,
+    [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+interface_dpo_module_init (void)
+{
+    dpo_register(DPO_INTERFACE,
+                 &interface_dpo_vft,
+                 interface_dpo_nodes);
+}
+
+/**
+ * @brief Interface DPO trace data
+ */
+typedef struct interface_dpo_trace_t_
+{
+    u32 sw_if_index;
+} interface_dpo_trace_t;
+
+typedef enum interface_dpo_next_t_
+{
+    INTERFACE_DPO_DROP = 0,
+    INTERFACE_DPO_INPUT = 1,
+} interface_dpo_next_t;
+
+always_inline uword
+interface_dpo_inline (vlib_main_t * vm,
+                      vlib_node_runtime_t * node,
+                      vlib_frame_t * from_frame)
+{
+    u32 n_left_from, next_index, * from, * to_next;
+    u32 cpu_index = os_get_cpu_number();
+    vnet_interface_main_t *im;
+
+    im = &vnet_get_main ()->interface_main;
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+	while (n_left_from >= 4 && n_left_to_next > 2)
+	{
+	    const interface_dpo_t *ido0, *ido1;
+	    u32 bi0, idoi0, bi1, idoi1;
+	    vlib_buffer_t *b0, *b1;
+
+	    bi0 = from[0];
+	    to_next[0] = bi0;
+	    bi1 = from[1];
+	    to_next[1] = bi1;
+	    from += 2;
+	    to_next += 2;
+	    n_left_from -= 2;
+	    n_left_to_next -= 2;
+
+	    b0 = vlib_get_buffer (vm, bi0);
+	    b1 = vlib_get_buffer (vm, bi1);
+
+	    idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+	    idoi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+	    ido0 = interface_dpo_get(idoi0);
+	    ido1 = interface_dpo_get(idoi1);
+
+	    vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index;
+	    vnet_buffer(b1)->sw_if_index[VLIB_RX] = ido1->ido_sw_if_index;
+
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_RX,
+                                             cpu_index,
+                                             ido0->ido_sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b0));
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_RX,
+                                             cpu_index,
+                                             ido1->ido_sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b1));
+
+	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		interface_dpo_trace_t *tr0;
+
+                tr0 = vlib_add_trace (vm, node, b0, sizeof (*tr0));
+		tr0->sw_if_index = ido0->ido_sw_if_index;
+	    }
+	    if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		interface_dpo_trace_t *tr1;
+
+                tr1 = vlib_add_trace (vm, node, b1, sizeof (*tr1));
+		tr1->sw_if_index = ido1->ido_sw_if_index;
+	    }
+
+	    vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+					    n_left_to_next, bi0, bi1,
+                                            INTERFACE_DPO_INPUT,
+                                            INTERFACE_DPO_INPUT);
+	}
+
+	while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	    const interface_dpo_t * ido0;
+	    vlib_buffer_t * b0;
+	    u32 bi0, idoi0;
+
+	    bi0 = from[0];
+	    to_next[0] = bi0;
+	    from += 1;
+	    to_next += 1;
+	    n_left_from -= 1;
+	    n_left_to_next -= 1;
+
+	    b0 = vlib_get_buffer (vm, bi0);
+
+	    idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+	    ido0 = interface_dpo_get(idoi0);
+
+            /* Swap the RX interface of the packet to the one the
+             * interface DPR represents */
+	    vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index;
+
+            /* Bump the interface's RX coutners */
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_RX,
+                                             cpu_index,
+                                             ido0->ido_sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b0));
+
+	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		interface_dpo_trace_t *tr;
+
+                tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+		tr->sw_if_index = ido0->ido_sw_if_index;
+	    }
+
+	    vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+					    n_left_to_next, bi0,
+                                            INTERFACE_DPO_INPUT);
+	}
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+static u8 *
+format_interface_dpo_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    interface_dpo_trace_t * t = va_arg (*args, interface_dpo_trace_t *);
+    uword indent = format_get_indent (s);
+    s = format (s, "%U sw_if_index:%d",
+                format_white_space, indent,
+                t->sw_if_index);
+    return s;
+}
+
+static uword
+interface_dpo_ip4 (vlib_main_t * vm,
+                   vlib_node_runtime_t * node,
+                   vlib_frame_t * from_frame)
+{
+    return (interface_dpo_inline(vm, node, from_frame));
+}
+
+static uword
+interface_dpo_ip6 (vlib_main_t * vm,
+                   vlib_node_runtime_t * node,
+                   vlib_frame_t * from_frame)
+{
+    return (interface_dpo_inline(vm, node, from_frame));
+}
+
+VLIB_REGISTER_NODE (interface_dpo_ip4_node) = {
+    .function = interface_dpo_ip4,
+    .name = "interface-dpo-ip4",
+    .vector_size = sizeof (u32),
+    .format_trace = format_interface_dpo_trace,
+
+    .n_next_nodes = 2,
+    .next_nodes = {
+        [INTERFACE_DPO_DROP] = "ip4-drop",
+        [INTERFACE_DPO_INPUT] = "ip4-input",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip4_node,
+                              interface_dpo_ip4)
+
+VLIB_REGISTER_NODE (interface_dpo_ip6_node) = {
+    .function = interface_dpo_ip6,
+    .name = "interface-dpo-ip6",
+    .vector_size = sizeof (u32),
+    .format_trace = format_interface_dpo_trace,
+
+    .n_next_nodes = 2,
+    .next_nodes = {
+        [INTERFACE_DPO_DROP] = "ip6-drop",
+        [INTERFACE_DPO_INPUT] = "ip6-input",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip6_node,
+                              interface_dpo_ip6)
+
diff --git a/src/vnet/dpo/interface_dpo.h b/src/vnet/dpo/interface_dpo.h
new file mode 100644
index 00000000..1538dfbb
--- /dev/null
+++ b/src/vnet/dpo/interface_dpo.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing interfaceing the packet, i.e. it's for-us
+ */
+
+#ifndef __INTERFACE_DPO_H__
+#define __INTERFACE_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+typedef struct interface_dpo_t_
+{
+    /**
+     * The Software interface index that the packets will be given
+     * as the ingress/rx interface
+     */
+    u32 ido_sw_if_index;
+
+    /**
+     * next VLIB node. A '<proto>-input' node.
+     */
+    u32 ido_next_node;
+
+    /**
+     * DPO protocol that the packets will have as they 'ingress'
+     * on this interface
+     */
+    dpo_proto_t ido_proto;
+
+    /**
+     * number of locks.
+     */
+    u16 ido_locks;
+} interface_dpo_t;
+
+extern void interface_dpo_add_or_lock (dpo_proto_t proto,
+                                       u32 sw_if_index,
+                                       dpo_id_t *dpo);
+
+extern void interface_dpo_module_init(void);
+
+/**
+ * @brief pool of all interface DPOs
+ */
+interface_dpo_t *interface_dpo_pool;
+
+static inline interface_dpo_t *
+interface_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(interface_dpo_pool, index));
+}
+
+#endif
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index 97ad0a44..e5b00a79 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -21,8 +21,12 @@
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/fib/ip6_fib.h>
 #include <vnet/fib/mpls_fib.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/mfib/ip4_mfib.h>
+#include <vnet/mfib/ip6_mfib.h>
 
 static const char *const lookup_input_names[] = LOOKUP_INPUTS;
+static const char *const lookup_cast_names[] = LOOKUP_CASTS;
 
 /**
  * @brief Enumeration of the lookup subtypes
@@ -31,6 +35,7 @@ typedef enum lookup_sub_type_t_
 {
     LOOKUP_SUB_TYPE_SRC,
     LOOKUP_SUB_TYPE_DST,
+    LOOKUP_SUB_TYPE_DST_MCAST,
     LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE,
 } lookup_sub_type_t;
 #define LOOKUP_SUB_TYPE_NUM (LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE+1)
@@ -67,6 +72,7 @@ lookup_dpo_get_index (lookup_dpo_t *lkd)
 static void
 lookup_dpo_add_or_lock_i (fib_node_index_t fib_index,
                           dpo_proto_t proto,
+                          lookup_cast_t cast,
                           lookup_input_t input,
                           lookup_table_t table_config,
                           dpo_id_t *dpo)
@@ -79,6 +85,7 @@ lookup_dpo_add_or_lock_i (fib_node_index_t fib_index,
     lkd->lkd_proto = proto;
     lkd->lkd_input = input;
     lkd->lkd_table = table_config;
+    lkd->lkd_cast  = cast;
 
     /*
      * use the input type to select the lookup sub-type
@@ -100,6 +107,10 @@ lookup_dpo_add_or_lock_i (fib_node_index_t fib_index,
             type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST];
             break;
         }
+        if (LOOKUP_MULTICAST == cast)
+        {
+            type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_MCAST];
+        }
     }
 
     if (0 == type)
@@ -115,20 +126,29 @@ lookup_dpo_add_or_lock_i (fib_node_index_t fib_index,
 void
 lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index,
                                     dpo_proto_t proto,
+                                    lookup_cast_t cast,
                                     lookup_input_t input,
                                     lookup_table_t table_config,
                                     dpo_id_t *dpo)
 {
     if (LOOKUP_TABLE_FROM_CONFIG == table_config)
     {
-	fib_table_lock(fib_index, dpo_proto_to_fib(proto));
+        if (LOOKUP_UNICAST == cast)
+        {
+            fib_table_lock(fib_index, dpo_proto_to_fib(proto));
+        }
+        else
+        {
+            mfib_table_lock(fib_index, dpo_proto_to_fib(proto));
+        }
     }
-    lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo);
+    lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo);
 }
 
 void
 lookup_dpo_add_or_lock_w_table_id (u32 table_id,
                                    dpo_proto_t proto,
+                                   lookup_cast_t cast,
                                    lookup_input_t input,
                                    lookup_table_t table_config,
                                    dpo_id_t *dpo)
@@ -137,13 +157,22 @@ lookup_dpo_add_or_lock_w_table_id (u32 table_id,
 
     if (LOOKUP_TABLE_FROM_CONFIG == table_config)
     {
-	fib_index =
-	    fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
-					      table_id);
+        if (LOOKUP_UNICAST == cast)
+        {
+            fib_index =
+                fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
+                                                  table_id);
+        }
+        else
+        {
+            fib_index =
+                mfib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
+                                                   table_id);
+        }
     }
 
     ASSERT(FIB_NODE_INDEX_INVALID != fib_index);
-    lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo);    
+    lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo);
 }
 
 u8*
@@ -156,16 +185,29 @@ format_lookup_dpo (u8 *s, va_list *args)
 
     if (LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table)
     {
-        s = format(s, "%s lookup in interface's %U table",
+        s = format(s, "%s,%s lookup in interface's %U table",
                    lookup_input_names[lkd->lkd_input],
+                   lookup_cast_names[lkd->lkd_cast],
                    format_dpo_proto, lkd->lkd_proto);
     }
     else
     {
-	s = format(s, "%s lookup in %U",
-		   lookup_input_names[lkd->lkd_input],
-		   format_fib_table_name, lkd->lkd_fib_index,
-		   dpo_proto_to_fib(lkd->lkd_proto));
+        if (LOOKUP_UNICAST == lkd->lkd_cast)
+        {
+            s = format(s, "%s,%s lookup in %U",
+                       lookup_input_names[lkd->lkd_input],
+                       lookup_cast_names[lkd->lkd_cast],
+                       format_fib_table_name, lkd->lkd_fib_index,
+                       dpo_proto_to_fib(lkd->lkd_proto));
+        }
+        else
+        {
+            s = format(s, "%s,%s lookup in %U",
+                       lookup_input_names[lkd->lkd_input],
+                       lookup_cast_names[lkd->lkd_cast],
+                       format_mfib_table_name, lkd->lkd_fib_index,
+                       dpo_proto_to_fib(lkd->lkd_proto));
+        }
     }
     return (s);
 }
@@ -193,8 +235,16 @@ lookup_dpo_unlock (dpo_id_t *dpo)
     {
         if (LOOKUP_TABLE_FROM_CONFIG == lkd->lkd_table)
         {
-	    fib_table_unlock(lkd->lkd_fib_index,
-			     dpo_proto_to_fib(lkd->lkd_proto));
+            if (LOOKUP_UNICAST == lkd->lkd_cast)
+            {
+                fib_table_unlock(lkd->lkd_fib_index,
+                                 dpo_proto_to_fib(lkd->lkd_proto));
+            }
+            else
+            {
+                mfib_table_unlock(lkd->lkd_fib_index,
+                                  dpo_proto_to_fib(lkd->lkd_proto));
+            }
         }
         pool_put(lookup_dpo_pool, lkd);
     }
@@ -1069,6 +1119,123 @@ VLIB_REGISTER_NODE (lookup_mpls_dst_itf_node) = {
 };
 VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_itf_node, lookup_mpls_dst_itf)
 
+typedef enum lookup_ip_dst_mcast_next_t_ {
+    LOOKUP_IP_DST_MCAST_NEXT_RPF,
+    LOOKUP_IP_DST_MCAST_N_NEXT,
+} mfib_forward_lookup_next_t;
+
+always_inline uword
+lookup_dpo_ip_dst_mcast_inline (vlib_main_t * vm,
+                                vlib_node_runtime_t * node,
+                                vlib_frame_t * from_frame,
+                                int is_v4)
+{
+    u32 n_left_from, next_index, * from, * to_next;
+
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = LOOKUP_IP_DST_MCAST_NEXT_RPF;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+        /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+        /*   } */
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            u32 bi0, lkdi0, fib_index0,  next0;
+            const lookup_dpo_t * lkd0;
+            fib_node_index_t mfei0;
+            vlib_buffer_t * b0;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            from += 1;
+            to_next += 1;
+            n_left_from -= 1;
+            n_left_to_next -= 1;
+
+            b0 = vlib_get_buffer (vm, bi0);
+
+            /* dst lookup was done by mpls lookup */
+            lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            lkd0 = lookup_dpo_get(lkdi0);
+            fib_index0 = lkd0->lkd_fib_index;
+            next0 = LOOKUP_IP_DST_MCAST_NEXT_RPF;
+
+            if (is_v4)
+            {
+                ip4_header_t * ip0;
+
+                ip0 = vlib_buffer_get_current (b0);
+                mfei0 = ip4_mfib_table_lookup(ip4_mfib_get(fib_index0),
+                                              &ip0->src_address,
+                                              &ip0->dst_address,
+                                              64);
+                if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+                {
+                    lookup_trace_t *tr = vlib_add_trace (vm, node,
+                                                         b0, sizeof (*tr));
+                    tr->fib_index = fib_index0;
+                    tr->lbi = mfei0;
+                    tr->addr.ip4 = ip0->dst_address;
+                }
+            }
+            else
+            {
+                ip6_header_t * ip0;
+
+                ip0 = vlib_buffer_get_current (b0);
+                mfei0 = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index0),
+                                               &ip0->src_address,
+                                               &ip0->dst_address);
+                if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+                {
+                    lookup_trace_t *tr = vlib_add_trace (vm, node,
+                                                         b0, sizeof (*tr));
+                    tr->fib_index = fib_index0;
+                    tr->lbi = mfei0;
+                    tr->addr.ip6 = ip0->dst_address;
+                }
+            }
+
+            vnet_buffer (b0)->ip.adj_index[VLIB_TX] = mfei0;
+
+           vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0, next0);
+        }
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+always_inline uword
+lookup_ip4_dst_mcast (vlib_main_t * vm,
+                      vlib_node_runtime_t * node,
+                      vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_ip_dst_mcast_inline(vm, node, from_frame, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_mcast_node) = {
+    .function = lookup_ip4_dst_mcast,
+    .name = "lookup-ip4-dst-mcast",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_lookup_trace,
+    .n_next_nodes = LOOKUP_IP_DST_MCAST_N_NEXT,
+    .next_nodes = {
+        [LOOKUP_IP_DST_MCAST_NEXT_RPF] = "ip4-mfib-forward-rpf",
+    },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_mcast_node,
+                              lookup_ip4_dst_mcast)
+
 static void
 lookup_dpo_mem_show (void)
 {
@@ -1129,6 +1296,22 @@ const static char* const * const lookup_dst_nodes[DPO_PROTO_NUM] =
     [DPO_PROTO_MPLS] = lookup_dst_mpls_nodes,
 };
 
+const static char* const lookup_dst_mcast_ip4_nodes[] =
+{
+    "lookup-ip4-dst-mcast",
+    NULL,
+};
+const static char* const lookup_dst_mcast_ip6_nodes[] =
+{
+    "lookup-ip6-dst-mcast",
+    NULL,
+};
+const static char* const * const lookup_dst_mcast_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = lookup_dst_mcast_ip4_nodes,
+    [DPO_PROTO_IP6]  = lookup_dst_mcast_ip6_nodes,
+};
+
 const static char* const lookup_dst_from_interface_ip4_nodes[] =
 {
     "lookup-ip4-dst-itf",
@@ -1168,6 +1351,8 @@ lookup_dpo_module_init (void)
         dpo_register_new_type(&lkd_vft, lookup_src_nodes);
     lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST] =
         dpo_register_new_type(&lkd_vft, lookup_dst_nodes);
+    lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_MCAST] =
+        dpo_register_new_type(&lkd_vft, lookup_dst_mcast_nodes);
     lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE] =
         dpo_register_new_type(&lkd_vft, lookup_dst_from_interface_nodes);
 }
diff --git a/src/vnet/dpo/lookup_dpo.h b/src/vnet/dpo/lookup_dpo.h
index ff283388..7dfd0385 100644
--- a/src/vnet/dpo/lookup_dpo.h
+++ b/src/vnet/dpo/lookup_dpo.h
@@ -46,6 +46,19 @@ typedef enum lookup_table_t_ {
     [LOOKUP_INPUT_DST_ADDR] = "table-configured",         \
 }
 
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_cast_t_ {
+    LOOKUP_UNICAST,
+    LOOKUP_MULTICAST,
+} __attribute__ ((packed)) lookup_cast_t;
+
+#define LOOKUP_CASTS {                 \
+    [LOOKUP_UNICAST]   = "unicast",    \
+    [LOOKUP_MULTICAST] = "multicast",  \
+}
+
 /**
  * A representation of an MPLS label for imposition in the data-path
  */
@@ -73,6 +86,11 @@ typedef struct lookup_dpo_t
      */
     lookup_table_t lkd_table;
 
+    /**
+     * Unicast of rmulticast FIB lookup
+     */
+    lookup_cast_t lkd_cast;
+
     /**
      * Number of locks
      */
@@ -81,11 +99,13 @@ typedef struct lookup_dpo_t
 
 extern void lookup_dpo_add_or_lock_w_fib_index(fib_node_index_t fib_index,
                                                dpo_proto_t proto,
+                                               lookup_cast_t cast,
                                                lookup_input_t input,
                                                lookup_table_t table,
                                                dpo_id_t *dpo);
 extern void lookup_dpo_add_or_lock_w_table_id(u32 table_id,
                                               dpo_proto_t proto,
+                                              lookup_cast_t cast,
                                               lookup_input_t input,
                                               lookup_table_t table,
                                               dpo_id_t *dpo);
diff --git a/src/vnet/dpo/mpls_disposition.c b/src/vnet/dpo/mpls_disposition.c
new file mode 100644
index 00000000..5dc33fcf
--- /dev/null
+++ b/src/vnet/dpo/mpls_disposition.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/mpls_disposition.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+mpls_disp_dpo_t *mpls_disp_dpo_pool;
+
+static mpls_disp_dpo_t *
+mpls_disp_dpo_alloc (void)
+{
+    mpls_disp_dpo_t *mdd;
+
+    pool_get_aligned(mpls_disp_dpo_pool, mdd, CLIB_CACHE_LINE_BYTES);
+    memset(mdd, 0, sizeof(*mdd));
+
+    dpo_reset(&mdd->mdd_dpo);
+
+    return (mdd);
+}
+
+static index_t
+mpls_disp_dpo_get_index (mpls_disp_dpo_t *mdd)
+{
+    return (mdd - mpls_disp_dpo_pool);
+}
+
+index_t
+mpls_disp_dpo_create (dpo_proto_t payload_proto,
+                      fib_rpf_id_t rpf_id,
+                      const dpo_id_t *dpo)
+{
+    mpls_disp_dpo_t *mdd;
+
+    mdd = mpls_disp_dpo_alloc();
+
+    mdd->mdd_payload_proto = payload_proto;
+    mdd->mdd_rpf_id = rpf_id;
+
+    dpo_stack(DPO_MPLS_DISPOSITION,
+              mdd->mdd_payload_proto,
+              &mdd->mdd_dpo,
+              dpo);
+
+    return (mpls_disp_dpo_get_index(mdd));
+}
+
+u8*
+format_mpls_disp_dpo (u8 *s, va_list *args)
+{
+    index_t index = va_arg (*args, index_t);
+    u32 indent = va_arg (*args, u32);
+    mpls_disp_dpo_t *mdd;
+
+    mdd = mpls_disp_dpo_get(index);
+
+    s = format(s, "mpls-disposition:[%d]:[%U]",
+               index,
+               format_dpo_proto, mdd->mdd_payload_proto);
+
+    s = format(s, "\n%U", format_white_space, indent);
+    s = format(s, "%U", format_dpo_id, &mdd->mdd_dpo, indent+2);
+
+    return (s);
+}
+
+static void
+mpls_disp_dpo_lock (dpo_id_t *dpo)
+{
+    mpls_disp_dpo_t *mdd;
+
+    mdd = mpls_disp_dpo_get(dpo->dpoi_index);
+
+    mdd->mdd_locks++;
+}
+
+static void
+mpls_disp_dpo_unlock (dpo_id_t *dpo)
+{
+    mpls_disp_dpo_t *mdd;
+
+    mdd = mpls_disp_dpo_get(dpo->dpoi_index);
+
+    mdd->mdd_locks--;
+
+    if (0 == mdd->mdd_locks)
+    {
+	dpo_reset(&mdd->mdd_dpo);
+	pool_put(mpls_disp_dpo_pool, mdd);
+    }
+}
+
+/**
+ * @brief A struct to hold tracing information for the MPLS label disposition
+ * node.
+ */
+typedef struct mpls_label_disposition_trace_t_
+{
+    index_t mdd;
+} mpls_label_disposition_trace_t;
+
+always_inline uword
+mpls_label_disposition_inline (vlib_main_t * vm,
+                              vlib_node_runtime_t * node,
+                              vlib_frame_t * from_frame,
+                              u8 payload_is_ip4,
+                              u8 payload_is_ip6)
+{
+    u32 n_left_from, next_index, * from, * to_next;
+
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+        while (n_left_from >= 4 && n_left_to_next >= 2)
+        {
+            mpls_disp_dpo_t *mdd0, *mdd1;
+            u32 bi0, mddi0, bi1, mddi1;
+            vlib_buffer_t * b0, *b1;
+            u32 next0, next1;
+
+            bi0 = to_next[0] = from[0];
+            bi1 = to_next[1] = from[1];
+
+            /* Prefetch next iteration. */
+            {
+                vlib_buffer_t * p2, * p3;
+
+                p2 = vlib_get_buffer (vm, from[2]);
+                p3 = vlib_get_buffer (vm, from[3]);
+
+                vlib_prefetch_buffer_header (p2, STORE);
+                vlib_prefetch_buffer_header (p3, STORE);
+
+                CLIB_PREFETCH (p2->data, sizeof (ip6_header_t), STORE);
+                CLIB_PREFETCH (p3->data, sizeof (ip6_header_t), STORE);
+            }
+
+            from += 2;
+            to_next += 2;
+            n_left_from -= 2;
+            n_left_to_next -= 2;
+
+            b0 = vlib_get_buffer (vm, bi0);
+            b1 = vlib_get_buffer (vm, bi1);
+
+            /* dst lookup was done by ip4 lookup */
+            mddi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            mddi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+            mdd0 = mpls_disp_dpo_get(mddi0);
+            mdd1 = mpls_disp_dpo_get(mddi1);
+
+            if (payload_is_ip4)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+            }
+            else if (payload_is_ip6)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+            }
+ 
+            next0 = mdd0->mdd_dpo.dpoi_next_node;
+            next1 = mdd1->mdd_dpo.dpoi_next_node;
+            vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mdd0->mdd_dpo.dpoi_index;
+            vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mdd1->mdd_dpo.dpoi_index;
+            vnet_buffer(b0)->ip.rpf_id = mdd0->mdd_rpf_id;
+            vnet_buffer(b1)->ip.rpf_id = mdd1->mdd_rpf_id;
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_disposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b0, sizeof (*tr));
+
+                tr->mdd = mddi0;
+            }
+            if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_disposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b1, sizeof (*tr));
+                tr->mdd = mddi1;
+            }
+
+            vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+                                            n_left_to_next,
+                                            bi0, bi1, next0, next1);
+        }
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            mpls_disp_dpo_t *mdd0;
+            vlib_buffer_t * b0;
+            u32 bi0, mddi0;
+            u32 next0;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            from += 1;
+            to_next += 1;
+            n_left_from -= 1;
+            n_left_to_next -= 1;
+
+            b0 = vlib_get_buffer (vm, bi0);
+
+            /* dst lookup was done by ip4 lookup */
+            mddi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            mdd0 = mpls_disp_dpo_get(mddi0);
+
+            if (payload_is_ip4)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+            }
+            else if (payload_is_ip6)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+            }
+            else
+            {
+            }
+
+            next0 = mdd0->mdd_dpo.dpoi_next_node;
+            vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mdd0->mdd_dpo.dpoi_index;
+            vnet_buffer(b0)->ip.rpf_id = mdd0->mdd_rpf_id;
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_disposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b0, sizeof (*tr));
+                tr->mdd = mddi0;
+            }
+
+            vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0, next0);
+        }
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+static u8 *
+format_mpls_label_disposition_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    CLIB_UNUSED (mpls_label_disposition_trace_t * t);
+
+    t = va_arg (*args, mpls_label_disposition_trace_t *);
+
+    s = format(s, "disp:%d", t->mdd);
+    return (s);
+}
+
+static uword
+ip4_mpls_label_disposition (vlib_main_t * vm,
+                           vlib_node_runtime_t * node,
+                           vlib_frame_t * frame)
+{
+    return (mpls_label_disposition_inline(vm, node, frame, 1, 0));
+}
+
+VLIB_REGISTER_NODE (ip4_mpls_label_disposition_node) = {
+    .function = ip4_mpls_label_disposition,
+    .name = "ip4-mpls-label-disposition",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mpls_label_disposition_trace,
+    .n_next_nodes = 1,
+    .next_nodes = {
+        [0] = "ip4-drop",
+    }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_disposition_node,
+                              ip4_mpls_label_disposition)
+
+static uword
+ip6_mpls_label_disposition (vlib_main_t * vm,
+                           vlib_node_runtime_t * node,
+                           vlib_frame_t * frame)
+{
+    return (mpls_label_disposition_inline(vm, node, frame, 0, 1));
+}
+
+VLIB_REGISTER_NODE (ip6_mpls_label_disposition_node) = {
+    .function = ip6_mpls_label_disposition,
+    .name = "ip6-mpls-label-disposition",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mpls_label_disposition_trace,
+    .n_next_nodes = 1,
+    .next_nodes = {
+        [0] = "ip6-drop",
+    }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_disposition_node,
+                              ip6_mpls_label_disposition)
+
+static void
+mpls_disp_dpo_mem_show (void)
+{
+    fib_show_memory_usage("MPLS label",
+			  pool_elts(mpls_disp_dpo_pool),
+			  pool_len(mpls_disp_dpo_pool),
+			  sizeof(mpls_disp_dpo_t));
+}
+
+const static dpo_vft_t mdd_vft = {
+    .dv_lock = mpls_disp_dpo_lock,
+    .dv_unlock = mpls_disp_dpo_unlock,
+    .dv_format = format_mpls_disp_dpo,
+    .dv_mem_show = mpls_disp_dpo_mem_show,
+};
+
+const static char* const mpls_label_disp_ip4_nodes[] =
+{
+    "ip4-mpls-label-disposition",
+    NULL,
+};
+const static char* const mpls_label_disp_ip6_nodes[] =
+{
+    "ip6-mpls-label-disposition",
+    NULL,
+};
+const static char* const * const mpls_label_disp_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = mpls_label_disp_ip4_nodes,
+    [DPO_PROTO_IP6]  = mpls_label_disp_ip6_nodes,
+};
+
+
+void
+mpls_disp_dpo_module_init (void)
+{
+    dpo_register(DPO_MPLS_DISPOSITION, &mdd_vft, mpls_label_disp_nodes);
+}
diff --git a/src/vnet/dpo/mpls_disposition.h b/src/vnet/dpo/mpls_disposition.h
new file mode 100644
index 00000000..9c015083
--- /dev/null
+++ b/src/vnet/dpo/mpls_disposition.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_DISP_DPO_H__
+#define __MPLS_DISP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/mfib/mfib_types.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct mpls_disp_dpo_t
+{
+    /**
+     * Next DPO in the graph
+     */
+    dpo_id_t mdd_dpo;
+
+    /**
+     * The protocol of the payload/packets that are being encapped
+     */
+    dpo_proto_t mdd_payload_proto;
+
+    /**
+     * RPF-ID (if this is an mcast disposition)
+     */
+    fib_rpf_id_t mdd_rpf_id;
+
+    /**
+     * Number of locks/users of the label
+     */
+    u16 mdd_locks;
+} mpls_disp_dpo_t;
+
+/**
+ * @brief Assert that the MPLS label object is less than a cache line in size.
+ * Should this get any bigger then we will need to reconsider how many labels
+ * can be pushed in one object.
+ */
+_Static_assert((sizeof(mpls_disp_dpo_t) <= CLIB_CACHE_LINE_BYTES),
+	       "MPLS Disposition DPO is larger than one cache line.");
+
+/**
+ * @brief Create an MPLS label object
+ *
+ * @param payload_proto The ptocool of the payload packets that will
+ *                      be imposed with this label header.
+ * @param dpo The parent of the created MPLS label object
+ */
+extern index_t mpls_disp_dpo_create(dpo_proto_t payload_proto,
+                                    fib_rpf_id_t rpf_id,
+                                    const dpo_id_t *dpo);
+
+extern u8* format_mpls_disp_dpo(u8 *s, va_list *args);
+
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern mpls_disp_dpo_t *mpls_disp_dpo_pool;
+
+static inline mpls_disp_dpo_t *
+mpls_disp_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(mpls_disp_dpo_pool, index));
+}
+
+extern void mpls_disp_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index be9b2850..4d84b900 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -562,7 +562,7 @@ VLIB_REGISTER_NODE (mpls_label_imposition_node) = {
     .format_trace = format_mpls_label_imposition_trace,
     .n_next_nodes = 1,
     .next_nodes = {
-        [0] = "error-drop",
+        [0] = "mpls-drop",
     }
 };
 VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node,
@@ -584,7 +584,7 @@ VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = {
     .format_trace = format_mpls_label_imposition_trace,
     .n_next_nodes = 1,
     .next_nodes = {
-        [0] = "error-drop",
+        [0] = "ip4-drop",
     }
 };
 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_imposition_node,
@@ -606,7 +606,7 @@ VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = {
     .format_trace = format_mpls_label_imposition_trace,
     .n_next_nodes = 1,
     .next_nodes = {
-        [0] = "error-drop",
+        [0] = "ip6-drop",
     }
 };
 VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node,
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index e25ceae9..9fdb9a05 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -17,6 +17,7 @@
 #include <vnet/dpo/replicate_dpo.h>
 #include <vnet/dpo/drop_dpo.h>
 #include <vnet/adj/adj.h>
+#include <vnet/mpls/mpls_types.h>
 
 #undef REP_DEBUG
 
@@ -106,6 +107,7 @@ replicate_format (index_t repi,
     dpo_id_t *buckets;
     u32 i;
 
+    repi &= ~MPLS_IS_REPLICATE;
     rep = replicate_get(repi);
     vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to);
     buckets = replicate_get_buckets(rep);
@@ -187,6 +189,7 @@ replicate_set_bucket (index_t repi,
     replicate_t *rep;
     dpo_id_t *buckets;
 
+    repi &= ~MPLS_IS_REPLICATE;
     rep = replicate_get(repi);
     buckets = replicate_get_buckets(rep);
 
@@ -199,11 +202,13 @@ int
 replicate_is_drop (const dpo_id_t *dpo)
 {
     replicate_t *rep;
+    index_t repi;
 
     if (DPO_REPLICATE != dpo->dpoi_type)
         return (0);
 
-    rep = replicate_get(dpo->dpoi_index);
+    repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
+    rep = replicate_get(repi);
 
     if (1 == rep->rep_n_buckets)
     {
@@ -218,6 +223,7 @@ replicate_get_bucket (index_t repi,
 {
     replicate_t *rep;
 
+    repi &= ~MPLS_IS_REPLICATE;
     rep = replicate_get(repi);
 
     return (replicate_get_bucket_i(rep, bucket));
@@ -288,9 +294,11 @@ replicate_multipath_update (const dpo_id_t *dpo,
     dpo_id_t *tmp_dpo;
     u32 ii, n_buckets;
     replicate_t *rep;
+    index_t repi;
 
     ASSERT(DPO_REPLICATE == dpo->dpoi_type);
-    rep = replicate_get(dpo->dpoi_index);
+    repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
+    rep = replicate_get(repi);
     nhs = replicate_multipath_next_hop_fixup(next_hops,
                                              rep->rep_proto);
     n_buckets = vec_len(nhs);
@@ -718,7 +726,7 @@ format_replicate_trace (u8 * s, va_list * args)
 
   s = format (s, "replicate: %d via %U",
               t->rep_index,
-              format_dpo_id, &t->dpo);
+              format_dpo_id, &t->dpo, 0);
   return s;
 }
 
@@ -731,7 +739,7 @@ ip4_replicate (vlib_main_t * vm,
 }
 
 /**
- * @brief
+ * @brief IP4 replication node
  */
 VLIB_REGISTER_NODE (ip4_replicate_node) = {
   .function = ip4_replicate,
@@ -744,7 +752,7 @@ VLIB_REGISTER_NODE (ip4_replicate_node) = {
   .format_trace = format_replicate_trace,
   .n_next_nodes = 1,
   .next_nodes = {
-      [0] = "error-drop",
+      [0] = "ip4-drop",
   },
 };
 
@@ -757,7 +765,7 @@ ip6_replicate (vlib_main_t * vm,
 }
 
 /**
- * @brief
+ * @brief IPv6 replication node
  */
 VLIB_REGISTER_NODE (ip6_replicate_node) = {
   .function = ip6_replicate,
@@ -770,7 +778,33 @@ VLIB_REGISTER_NODE (ip6_replicate_node) = {
   .format_trace = format_replicate_trace,
   .n_next_nodes = 1,
   .next_nodes = {
-      [0] = "error-drop",
+      [0] = "ip6-drop",
+  },
+};
+
+static uword
+mpls_replicate (vlib_main_t * vm,
+                vlib_node_runtime_t * node,
+                vlib_frame_t * frame)
+{
+    return (replicate_inline (vm, node, frame));
+}
+
+/**
+ * @brief MPLS replication node
+ */
+VLIB_REGISTER_NODE (mpls_replicate_node) = {
+  .function = mpls_replicate,
+  .name = "mpls-replicate",
+  .vector_size = sizeof (u32),
+
+  .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
+  .error_strings = replicate_dpo_error_strings,
+
+  .format_trace = format_replicate_trace,
+  .n_next_nodes = 1,
+  .next_nodes = {
+      [0] = "mpls-drop",
   },
 };
 
diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h
index 77273015..7383184a 100644
--- a/src/vnet/dpo/replicate_dpo.h
+++ b/src/vnet/dpo/replicate_dpo.h
@@ -25,6 +25,7 @@
 #include <vnet/dpo/dpo.h>
 #include <vnet/dpo/load_balance.h>
 #include <vnet/fib/fib_types.h>
+#include <vnet/mpls/mpls_types.h>
 
 /**
  * replicate main
@@ -119,6 +120,7 @@ extern replicate_t *replicate_pool;
 static inline replicate_t*
 replicate_get (index_t repi)
 {
+    repi &= ~MPLS_IS_REPLICATE;
     return (pool_elt_at_index(replicate_pool, repi));
 }
 
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
index c74a097e..dd509193 100644
--- a/src/vnet/ethernet/arp.c
+++ b/src/vnet/ethernet/arp.c
@@ -507,6 +507,7 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
     case IP_LOOKUP_NEXT_PUNT:
     case IP_LOOKUP_NEXT_LOCAL:
     case IP_LOOKUP_NEXT_REWRITE:
+    case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
     case IP_LOOKUP_NEXT_MIDCHAIN:
     case IP_LOOKUP_NEXT_ICMP_ERROR:
     case IP_LOOKUP_N_NEXT:
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 335e3f9f..9ac30bc6 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -115,7 +115,7 @@ ethernet_build_rewrite (vnet_main_t * vnm,
 #define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
       _(IP4, IP4);
       _(IP6, IP6);
-      _(MPLS, MPLS_UNICAST);
+      _(MPLS, MPLS);
       _(ARP, ARP);
 #undef _
     default:
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index f7787ed2..5305012f 100755
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -249,7 +249,7 @@ determine_next_node (ethernet_main_t * em,
     {
       *next0 = em->l3_next.input_next_ip6;
     }
-  else if (type0 == ETHERNET_TYPE_MPLS_UNICAST)
+  else if (type0 == ETHERNET_TYPE_MPLS)
     {
       *next0 = em->l3_next.input_next_mpls;
 
@@ -1252,7 +1252,7 @@ next_by_ethertype_register (next_by_ethertype_t * l3_next,
 	{
 	  l3_next->input_next_ip6 = next_index;
 	}
-      else if (ethertype == ETHERNET_TYPE_MPLS_UNICAST)
+      else if (ethertype == ETHERNET_TYPE_MPLS)
 	{
 	  l3_next->input_next_mpls = next_index;
 	}
diff --git a/src/vnet/ethernet/types.def b/src/vnet/ethernet/types.def
index 643f3152..7dab8ee1 100644
--- a/src/vnet/ethernet/types.def
+++ b/src/vnet/ethernet/types.def
@@ -85,8 +85,8 @@ ethernet_type (0x876D, SECURE_DATA)
 ethernet_type (0x8808, MAC_CONTROL)
 ethernet_type (0x8809, SLOW_PROTOCOLS)
 ethernet_type (0x880B, PPP)
-ethernet_type (0x8847, MPLS_UNICAST)
-ethernet_type (0x8848, MPLS_MULTICAST)
+ethernet_type (0x8847, MPLS)
+ethernet_type (0x8848, MPLS_UPSTREAM_ASSIGNED)
 ethernet_type (0x8863, PPPOE_DISCOVERY)
 ethernet_type (0x8864, PPPOE_SESSION)
 ethernet_type (0x886D, INTEL_ANS)
diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h
index f8275317..10d0cb58 100644
--- a/src/vnet/fib/fib_api.h
+++ b/src/vnet/fib/fib_api.h
@@ -24,6 +24,7 @@ add_del_route_check (fib_protocol_t table_proto,
 		     fib_protocol_t next_hop_table_proto,
 		     u32 next_hop_table_id,
 		     u8 create_missing_tables,
+                     u8 is_rpf_id,
 		     u32 * fib_index, u32 * next_hop_fib_index);
 
 int
@@ -33,10 +34,13 @@ add_del_route_t_handler (u8 is_multipath,
 			 u8 is_unreach,
 			 u8 is_prohibit,
 			 u8 is_local,
+			 u8 is_multicast,
 			 u8 is_classify,
 			 u32 classify_table_index,
 			 u8 is_resolve_host,
 			 u8 is_resolve_attached,
+			 u8 is_interface_rx,
+                         u8 is_rpf_id,
 			 u32 fib_index,
 			 const fib_prefix_t * prefix,
 			 u8 next_hop_proto_is_ip4,
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index dac1fce9..6f811aa1 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -75,13 +75,7 @@ fib_entry_get_default_chain_type (const fib_entry_t *fib_entry)
 	return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
     case FIB_PROTOCOL_MPLS:
 	if (MPLS_EOS == fib_entry->fe_prefix.fp_eos)
-	    /*
-	     * If the entry being asked is a eos-MPLS label entry,
-	     * then use the payload-protocol field, that we stashed there
-	     * for just this purpose
-	     */
-	    return (fib_forw_chain_type_from_dpo_proto(
-			fib_entry->fe_prefix.fp_payload_proto));
+	    return (FIB_FORW_CHAIN_TYPE_MPLS_EOS);
 	else
 	    return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
     }
@@ -370,6 +364,35 @@ fib_entry_contribute_urpf (fib_node_index_t entry_index,
     return (fib_path_list_contribute_urpf(fib_entry->fe_parent, urpf));
 }
 
+/*
+ * If the client is request a chain for multicast forwarding then swap
+ * the chain type to one that can provide such transport.
+ */
+static fib_forward_chain_type_t
+fib_entry_chain_type_mcast_to_ucast (fib_forward_chain_type_t fct)
+{
+    switch (fct)
+    {
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+        /*
+         * we can only transport IP multicast packets if there is an
+         * LSP.
+         */
+        fct = FIB_FORW_CHAIN_TYPE_MPLS_EOS;
+        break;
+    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+    case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+    case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+    case FIB_FORW_CHAIN_TYPE_ETHERNET:
+    case FIB_FORW_CHAIN_TYPE_NSH:
+        break;
+    }
+
+    return (fct);
+}
+
 /*
  * fib_entry_contribute_forwarding
  *
@@ -385,6 +408,11 @@ fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index,
 
     fib_entry = fib_entry_get(fib_entry_index);
 
+    /*
+     * mfib children ask for mcast chains. fix these to the appropriate ucast types.
+     */
+    fct = fib_entry_chain_type_mcast_to_ucast(fct);
+
     if (fct == fib_entry_get_default_chain_type(fib_entry))
     {
         dpo_copy(dpo, &fib_entry->fe_lb);
@@ -414,6 +442,11 @@ fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index,
 
         dpo_copy(dpo, &fed->fd_dpo);
     }
+    /*
+     * don't allow the special index indicating replicate.vs.load-balance
+     * to escape to the clients
+     */
+    dpo->dpoi_index &= ~MPLS_IS_REPLICATE;
 }
 
 const dpo_id_t *
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index a3f75e60..b17a0b64 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -192,6 +192,11 @@ typedef enum fib_entry_attribute_t_ {
      * The prefix/address is local to this device
      */
     FIB_ENTRY_ATTRIBUTE_LOCAL,
+    /**
+     * The prefix/address is a multicast prefix.
+     *  this aplies only to MPLS. IP multicast is handled by mfib
+     */
+    FIB_ENTRY_ATTRIBUTE_MULTICAST,
     /**
      * The prefix/address exempted from loose uRPF check
      * To be used with caution
@@ -200,7 +205,7 @@ typedef enum fib_entry_attribute_t_ {
     /**
      * Marker. add new entries before this one.
      */
-    FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT,
+    FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_MULTICAST,
 } fib_entry_attribute_t;
 
 /**
@@ -215,7 +220,8 @@ typedef enum fib_entry_attribute_t_ {
     [FIB_ENTRY_ATTRIBUTE_DROP]      = "drop",		\
     [FIB_ENTRY_ATTRIBUTE_EXCLUSIVE] = "exclusive",      \
     [FIB_ENTRY_ATTRIBUTE_LOCAL]     = "local",		\
-    [FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT] = "uRPF-exempt"   \
+    [FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT] = "uRPF-exempt",  \
+    [FIB_ENTRY_ATTRIBUTE_MULTICAST] = "multicast",	\
 }
 
 #define FOR_EACH_FIB_ATTRIBUTE(_item)			\
@@ -232,6 +238,7 @@ typedef enum fib_entry_flag_t_ {
     FIB_ENTRY_FLAG_LOCAL     = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL),
     FIB_ENTRY_FLAG_IMPORT    = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT),
     FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT = (1 << FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT),
+    FIB_ENTRY_FLAG_MULTICAST = (1 << FIB_ENTRY_ATTRIBUTE_MULTICAST),
 } __attribute__((packed)) fib_entry_flag_t;
 
 /**
@@ -396,7 +403,7 @@ typedef struct fib_entry_t_ {
      *     paint the header straight on without the need to check the packet
      *     type to derive the EOS bit value.
      */
-    dpo_id_t fe_lb; // [FIB_FORW_CHAIN_MPLS_NUM];
+    dpo_id_t fe_lb;
     /**
      * Vector of source infos.
      * Most entries will only have 1 source. So we optimise for memory usage,
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index aa1d5a24..a700282e 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -17,6 +17,7 @@
 #include <vnet/dpo/load_balance.h>
 #include <vnet/dpo/mpls_label_dpo.h>
 #include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
 
 #include <vnet/fib/fib_entry_src.h>
 #include <vnet/fib/fib_table.h>
@@ -229,8 +230,6 @@ fib_forward_chain_type_t
 fib_entry_chain_type_fixup (const fib_entry_t *entry,
 			    fib_forward_chain_type_t fct)
 {
-    ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS == fct);
-
     /*
      * The EOS chain is a tricky since one cannot know the adjacency
      * to link to without knowing what the packets payload protocol
@@ -238,6 +237,11 @@ fib_entry_chain_type_fixup (const fib_entry_t *entry,
      */
     fib_forward_chain_type_t dfct;
 
+    if (FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct)
+    {
+        return (fct);
+    }
+
     dfct = fib_entry_get_default_chain_type(entry);
 
     if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct)
@@ -303,7 +307,12 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
          * found a matching extension. stack it to obtain the forwarding
          * info for this path.
          */
-        ctx->next_hops = fib_path_ext_stack(path_ext, ctx->fib_entry, ctx->fct, ctx->next_hops);
+        ctx->next_hops =
+            fib_path_ext_stack(path_ext,
+                               ctx->fct,
+                               fib_entry_chain_type_fixup(ctx->fib_entry,
+                                                          ctx->fct),
+                               ctx->next_hops);
     }
     else
     {
@@ -355,6 +364,9 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
                                            fib_entry_chain_type_fixup(ctx->fib_entry,
                                                                       ctx->fct),
                                            &nh->path_dpo);
+            fib_path_stack_mpls_disp(path_index,
+                                     ctx->fib_entry->fe_prefix.fp_payload_proto,
+                                     &nh->path_dpo);
 
             break;
         }
@@ -424,50 +436,70 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
         /*
          * first time create
          */
-        flow_hash_config_t fhc;
-
-        fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index,
-                                             dpo_proto_to_fib(lb_proto));
-        dpo_set(dpo_lb,
-                DPO_LOAD_BALANCE,
-                lb_proto,
-                load_balance_create(0, lb_proto, fhc));
+        if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST)
+        {
+            dpo_set(dpo_lb,
+                    DPO_REPLICATE,
+                    lb_proto,
+                    MPLS_IS_REPLICATE | replicate_create(0, lb_proto));
+        }
+        else
+        {
+            flow_hash_config_t fhc;
+
+            fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index,
+                                                 dpo_proto_to_fib(lb_proto));
+            dpo_set(dpo_lb,
+                    DPO_LOAD_BALANCE,
+                    lb_proto,
+                    load_balance_create(0, lb_proto, fhc));
+        }
     }
 
-    load_balance_multipath_update(dpo_lb,
-                                  ctx.next_hops,
-                                  fib_entry_calc_lb_flags(&ctx));
-    vec_free(ctx.next_hops);
-
-    /*
-     * if this entry is sourced by the uRPF-exempt source then we
-     * append the always present local0 interface (index 0) to the
-     * uRPF list so it is not empty. that way packets pass the loose check.
-     */
-    index_t ui = fib_path_list_get_urpf(esrc->fes_pl);
-
-    if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry),
-			      FIB_SOURCE_URPF_EXEMPT) ||
-	 (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&&
-	(0 == fib_urpf_check_size(ui)))
+    if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST)
     {
-	/*
-	 * The uRPF list we get from the path-list is shared by all
-	 * other users of the list, but the uRPF exemption applies
-	 * only to this prefix. So we need our own list.
-	 */
-	ui = fib_urpf_list_alloc_and_lock();
-	fib_urpf_list_append(ui, 0);
-	fib_urpf_list_bake(ui);
-	load_balance_set_urpf(dpo_lb->dpoi_index, ui);
-	fib_urpf_list_unlock(ui);
+        /*
+         * MPLS multicast
+         */
+        replicate_multipath_update(dpo_lb, ctx.next_hops);
     }
     else
     {
-	load_balance_set_urpf(dpo_lb->dpoi_index, ui);
+        load_balance_multipath_update(dpo_lb,
+                                      ctx.next_hops,
+                                      fib_entry_calc_lb_flags(&ctx));
+        vec_free(ctx.next_hops);
+
+        /*
+         * if this entry is sourced by the uRPF-exempt source then we
+         * append the always present local0 interface (index 0) to the
+         * uRPF list so it is not empty. that way packets pass the loose check.
+         */
+        index_t ui = fib_path_list_get_urpf(esrc->fes_pl);
+
+        if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry),
+                                  FIB_SOURCE_URPF_EXEMPT) ||
+             (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&&
+            (0 == fib_urpf_check_size(ui)))
+        {
+            /*
+             * The uRPF list we get from the path-list is shared by all
+             * other users of the list, but the uRPF exemption applies
+             * only to this prefix. So we need our own list.
+             */
+            ui = fib_urpf_list_alloc_and_lock();
+            fib_urpf_list_append(ui, 0);
+            fib_urpf_list_bake(ui);
+            load_balance_set_urpf(dpo_lb->dpoi_index, ui);
+            fib_urpf_list_unlock(ui);
+        }
+        else
+        {
+            load_balance_set_urpf(dpo_lb->dpoi_index, ui);
+        }
+        load_balance_set_fib_entry_flags(dpo_lb->dpoi_index,
+                                         fib_entry_get_flags_i(fib_entry));
     }
-    load_balance_set_fib_entry_flags(dpo_lb->dpoi_index,
-                                     fib_entry_get_flags_i(fib_entry));
 }
 
 void
@@ -887,21 +919,6 @@ fib_entry_src_action_remove (fib_entry_t *fib_entry,
     return (sflags);
 }
 
-static inline int
-fib_route_recurses_via_self (const fib_prefix_t *prefix,
-			     const fib_route_path_t *rpath)
-{
-    /*
-     * not all zeros next hop &&
-     * is recursive path &&
-     * nexthop is same as the route's address
-     */
-    return ((!ip46_address_is_zero(&rpath->frp_addr)) &&
-	    (~0 == rpath->frp_sw_if_index) &&
-	    (0 == ip46_address_cmp(&rpath->frp_addr, &prefix->fp_addr)));
-
-}
-
 /*
  * fib_route_attached_cross_table
  *
@@ -962,14 +979,14 @@ fib_entry_src_flags_2_path_list_flags (fib_entry_flag_t eflags)
     {
 	plf |= FIB_PATH_LIST_FLAG_DROP;
     }
-    if (eflags & FIB_ENTRY_FLAG_LOCAL)
-    {
-	plf |= FIB_PATH_LIST_FLAG_LOCAL;
-    }
     if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE)
     {
 	plf |= FIB_PATH_LIST_FLAG_EXCLUSIVE;
     }
+    if (eflags & FIB_ENTRY_FLAG_LOCAL)
+    {
+	plf |= FIB_PATH_LIST_FLAG_LOCAL;
+    }
 
     return (plf);
 }
@@ -980,25 +997,6 @@ fib_entry_flags_update (const fib_entry_t *fib_entry,
 			fib_path_list_flags_t *pl_flags,
 			fib_entry_src_t *esrc)
 {
-    /*
-     * don't allow the addition of a recursive looped path for prefix
-     * via itself.
-     */
-    if (fib_route_recurses_via_self(&fib_entry->fe_prefix, rpath))	
-    {
-	/*
-	 * force the install of a drop path-list.
-	 * we want the entry to have some path-list, mainly so
-	 * the dodgy path can be rmeoved when the source stops playing
-	 * silly buggers.
-	 */
-	*pl_flags |= FIB_PATH_LIST_FLAG_DROP;
-    }
-    else
-    {
-	*pl_flags &= ~FIB_PATH_LIST_FLAG_DROP;
-    }
-
     if ((esrc->fes_src == FIB_SOURCE_API) ||
 	(esrc->fes_src == FIB_SOURCE_CLI))
     {
diff --git a/src/vnet/fib/fib_internal.h b/src/vnet/fib/fib_internal.h
index 2d980bcc..8abc0e07 100644
--- a/src/vnet/fib/fib_internal.h
+++ b/src/vnet/fib/fib_internal.h
@@ -25,6 +25,7 @@
 #undef FIB_DEBUG
 
 extern void fib_prefix_from_mpls_label(mpls_label_t label,
+                                       mpls_eos_bit_t eos,
 				       fib_prefix_t *prf);
 
 extern int fib_route_path_cmp(const fib_route_path_t *rpath1,
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 6b202a97..f81f4170 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -21,6 +21,8 @@
 #include <vnet/dpo/receive_dpo.h>
 #include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/interface_dpo.h>
+#include <vnet/dpo/mpls_disposition.h>
 
 #include <vnet/adj/adj.h>
 #include <vnet/adj/adj_mcast.h>
@@ -66,6 +68,10 @@ typedef enum fib_path_type_t_ {
      * deag. Link to a lookup adj in the next table
      */
     FIB_PATH_TYPE_DEAG,
+    /**
+     * interface receive.
+     */
+    FIB_PATH_TYPE_INTF_RX,
     /**
      * receive. it's for-us.
      */
@@ -88,6 +94,7 @@ typedef enum fib_path_type_t_ {
     [FIB_PATH_TYPE_SPECIAL]           = "special",	        \
     [FIB_PATH_TYPE_EXCLUSIVE]         = "exclusive",	        \
     [FIB_PATH_TYPE_DEAG]              = "deag",	                \
+    [FIB_PATH_TYPE_INTF_RX]           = "intf-rx",	        \
     [FIB_PATH_TYPE_RECEIVE]           = "receive",	        \
 }
 
@@ -220,10 +227,16 @@ typedef struct fib_path_t_ {
 		 * The next-hop
 		 */
 		ip46_address_t fp_ip;
-		/**
-		 * The local label to resolve through.
-		 */
-		mpls_label_t fp_local_label;
+		struct {
+                    /**
+                     * The local label to resolve through.
+                     */
+                    mpls_label_t fp_local_label;
+                    /**
+                     * The EOS bit of the resolving label
+                     */
+                    mpls_eos_bit_t fp_eos;
+                };
 	    } fp_nh;
 	    /**
 	     * The FIB table index in which to find the next-hop.
@@ -254,6 +267,10 @@ typedef struct fib_path_t_ {
 	     * The FIB index in which to perfom the next lookup
 	     */
 	    fib_node_index_t fp_tbl_id;
+            /**
+             * The RPF-ID to tag the packets with
+             */
+            fib_rpf_id_t fp_rpf_id;
 	} deag;
 	struct {
 	} special;
@@ -273,6 +290,12 @@ typedef struct fib_path_t_ {
 	     */
 	    ip46_address_t fp_addr;
 	} receive;
+	struct {
+	    /**
+	     * The interface on which the packets will be input.
+	     */
+	    u32 fp_interface;
+	} intf_rx;
     };
     STRUCT_MARK(path_hash_end);
 
@@ -444,9 +467,11 @@ format_fib_path (u8 * s, va_list * args)
     case FIB_PATH_TYPE_RECURSIVE:
 	if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
 	{
-	    s = format (s, "via %U",
+	    s = format (s, "via %U %U",
 			format_mpls_unicast_label,
-			path->recursive.fp_nh.fp_local_label);
+			path->recursive.fp_nh.fp_local_label,
+			format_mpls_eos_bit,
+			path->recursive.fp_nh.fp_eos);
 	}
 	else
 	{
@@ -465,6 +490,7 @@ format_fib_path (u8 * s, va_list * args)
 
 	break;
     case FIB_PATH_TYPE_RECEIVE:
+    case FIB_PATH_TYPE_INTF_RX:
     case FIB_PATH_TYPE_SPECIAL:
     case FIB_PATH_TYPE_DEAG:
     case FIB_PATH_TYPE_EXCLUSIVE:
@@ -736,6 +762,7 @@ fib_path_unresolve (fib_path_t *path)
         break;
     case FIB_PATH_TYPE_SPECIAL:
     case FIB_PATH_TYPE_RECEIVE:
+    case FIB_PATH_TYPE_INTF_RX:
     case FIB_PATH_TYPE_DEAG:
         /*
          * these hold only the path's DPO, which is reset below.
@@ -754,16 +781,24 @@ fib_path_unresolve (fib_path_t *path)
 }
 
 static fib_forward_chain_type_t
-fib_path_proto_to_chain_type (fib_protocol_t proto)
+fib_path_to_chain_type (const fib_path_t *path)
 {
-    switch (proto)
+    switch (path->fp_nh_proto)
     {
     case FIB_PROTOCOL_IP4:
 	return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
     case FIB_PROTOCOL_IP6:
 	return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
     case FIB_PROTOCOL_MPLS:
-	return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
+        if (FIB_PATH_TYPE_RECURSIVE == path->fp_type &&
+            MPLS_EOS == path->recursive.fp_nh.fp_eos)
+        {
+            return (FIB_FORW_CHAIN_TYPE_MPLS_EOS);
+        }
+        else
+        {
+            return (FIB_FORW_CHAIN_TYPE_MPLS_EOS);
+        }
     }
     return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
 }
@@ -793,7 +828,7 @@ fib_path_back_walk_notify (fib_node_t *node,
 	     */
 	    fib_path_recursive_adj_update(
 		path,
-		fib_path_proto_to_chain_type(path->fp_nh_proto),
+		fib_path_to_chain_type(path),
 		&path->fp_dpo);
 	}
 	if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) ||
@@ -931,6 +966,8 @@ FIXME comment
 	    path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
 	}
 	break;
+    case FIB_PATH_TYPE_INTF_RX:
+        ASSERT(0);
     case FIB_PATH_TYPE_DEAG:
 	/*
 	 * FIXME When VRF delete is allowed this will need a poke.
@@ -986,6 +1023,14 @@ fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
 	cfg_flags |= FIB_PATH_CFG_FLAG_LOCAL;
     if (rpath->frp_flags & FIB_ROUTE_PATH_ATTACHED)
 	cfg_flags |= FIB_PATH_CFG_FLAG_ATTACHED;
+    if (rpath->frp_flags & FIB_ROUTE_PATH_INTF_RX)
+	cfg_flags |= FIB_PATH_CFG_FLAG_INTF_RX;
+    if (rpath->frp_flags & FIB_ROUTE_PATH_RPF_ID)
+	cfg_flags |= FIB_PATH_CFG_FLAG_RPF_ID;
+    if (rpath->frp_flags & FIB_ROUTE_PATH_EXCLUSIVE)
+	cfg_flags |= FIB_PATH_CFG_FLAG_EXCLUSIVE;
+    if (rpath->frp_flags & FIB_ROUTE_PATH_DROP)
+	cfg_flags |= FIB_PATH_CFG_FLAG_DROP;
 
     return (cfg_flags);
 }
@@ -998,8 +1043,6 @@ fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
  */
 fib_node_index_t
 fib_path_create (fib_node_index_t pl_index,
-		 fib_protocol_t nh_proto,
-		 fib_path_cfg_flags_t flags,
 		 const fib_route_path_t *rpath)
 {
     fib_path_t *path;
@@ -1012,7 +1055,7 @@ fib_path_create (fib_node_index_t pl_index,
 
     dpo_reset(&path->fp_dpo);
     path->fp_pl_index = pl_index;
-    path->fp_nh_proto = nh_proto;
+    path->fp_nh_proto = rpath->frp_proto;
     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
     path->fp_weight = rpath->frp_weight;
     if (0 == path->fp_weight)
@@ -1023,8 +1066,7 @@ fib_path_create (fib_node_index_t pl_index,
          */
         path->fp_weight = 1;
     }
-    path->fp_cfg_flags = flags;
-    path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath);
+    path->fp_cfg_flags = fib_path_route_flags_to_cfg_flags(rpath);
 
     /*
      * deduce the path's tpye from the parementers and save what is needed.
@@ -1035,6 +1077,17 @@ fib_path_create (fib_node_index_t pl_index,
         path->receive.fp_interface = rpath->frp_sw_if_index;
         path->receive.fp_addr = rpath->frp_addr;
     }
+    else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_INTF_RX)
+    {
+        path->fp_type = FIB_PATH_TYPE_INTF_RX;
+        path->intf_rx.fp_interface = rpath->frp_sw_if_index;
+    }
+    else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RPF_ID)
+    {
+        path->fp_type = FIB_PATH_TYPE_DEAG;
+        path->deag.fp_tbl_id = rpath->frp_fib_index;
+        path->deag.fp_rpf_id = rpath->frp_rpf_id;
+    }
     else if (~0 != rpath->frp_sw_if_index)
     {
         if (ip46_address_is_zero(&rpath->frp_addr))
@@ -1069,6 +1122,7 @@ fib_path_create (fib_node_index_t pl_index,
 	    if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
 	    {
 		path->recursive.fp_nh.fp_local_label = rpath->frp_local_label;
+                path->recursive.fp_nh.fp_eos = rpath->frp_eos;
 	    }
 	    else
 	    {
@@ -1238,17 +1292,13 @@ fib_path_cmp_i (const fib_path_t *path1,
 	    res = ip46_address_cmp(&path1->attached_next_hop.fp_nh,
 				   &path2->attached_next_hop.fp_nh);
 	    if (0 == res) {
-		res = vnet_sw_interface_compare(
-			  vnet_get_main(),
-			  path1->attached_next_hop.fp_interface,
-			  path2->attached_next_hop.fp_interface);
+		res = (path1->attached_next_hop.fp_interface -
+                       path2->attached_next_hop.fp_interface);
 	    }
 	    break;
 	case FIB_PATH_TYPE_ATTACHED:
-	    res = vnet_sw_interface_compare(
-		      vnet_get_main(),
-		      path1->attached.fp_interface,
-		      path2->attached.fp_interface);
+	    res = (path1->attached.fp_interface -
+                   path2->attached.fp_interface);
 	    break;
 	case FIB_PATH_TYPE_RECURSIVE:
 	    res = ip46_address_cmp(&path1->recursive.fp_nh,
@@ -1261,6 +1311,13 @@ fib_path_cmp_i (const fib_path_t *path1,
 	    break;
 	case FIB_PATH_TYPE_DEAG:
 	    res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id);
+	    if (0 == res)
+	    {
+                res = (path1->deag.fp_rpf_id - path2->deag.fp_rpf_id);
+            }
+	    break;
+	case FIB_PATH_TYPE_INTF_RX:
+	    res = (path1->intf_rx.fp_interface - path2->intf_rx.fp_interface);
 	    break;
 	case FIB_PATH_TYPE_SPECIAL:
 	case FIB_PATH_TYPE_RECEIVE:
@@ -1336,22 +1393,22 @@ fib_path_cmp_w_route_path (fib_node_index_t path_index,
 				   &rpath->frp_addr);
 	    if (0 == res)
 	    {
-		res = vnet_sw_interface_compare(
-			  vnet_get_main(),
-			  path->attached_next_hop.fp_interface,
-			  rpath->frp_sw_if_index);
+		res = (path->attached_next_hop.fp_interface -
+                       rpath->frp_sw_if_index);
 	    }
 	    break;
 	case FIB_PATH_TYPE_ATTACHED:
-	    res = vnet_sw_interface_compare(
-		      vnet_get_main(),
-		      path->attached.fp_interface,
-		      rpath->frp_sw_if_index);
+	    res = (path->attached.fp_interface - rpath->frp_sw_if_index);
 	    break;
 	case FIB_PATH_TYPE_RECURSIVE:
             if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
             {
                 res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label;
+
+                if (res == 0)
+                {
+                    res = path->recursive.fp_nh.fp_eos - rpath->frp_eos;
+                }
             }
             else
             {
@@ -1364,9 +1421,16 @@ fib_path_cmp_w_route_path (fib_node_index_t path_index,
                 res = (path->recursive.fp_tbl_id - rpath->frp_fib_index);
             }
 	    break;
+	case FIB_PATH_TYPE_INTF_RX:
+	    res = (path->intf_rx.fp_interface - rpath->frp_sw_if_index);
+            break;
 	case FIB_PATH_TYPE_DEAG:
 	    res = (path->deag.fp_tbl_id - rpath->frp_fib_index);
-	    break;
+	    if (0 == res)
+            {
+                res = (path->deag.fp_rpf_id - rpath->frp_rpf_id);
+            }
+            break;
 	case FIB_PATH_TYPE_SPECIAL:
 	case FIB_PATH_TYPE_RECEIVE:
 	case FIB_PATH_TYPE_EXCLUSIVE:
@@ -1465,6 +1529,7 @@ fib_path_recursive_loop_detect (fib_node_index_t path_index,
     case FIB_PATH_TYPE_SPECIAL:
     case FIB_PATH_TYPE_DEAG:
     case FIB_PATH_TYPE_RECEIVE:
+    case FIB_PATH_TYPE_INTF_RX:
     case FIB_PATH_TYPE_EXCLUSIVE:
 	/*
 	 * these path types cannot be part of a loop, since they are the leaves
@@ -1563,7 +1628,9 @@ fib_path_resolve (fib_node_index_t path_index)
 
 	if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
 	{
-	    fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, &pfx);
+	    fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label,
+                                       path->recursive.fp_nh.fp_eos,
+                                       &pfx);
 	}
 	else
 	{
@@ -1592,7 +1659,7 @@ fib_path_resolve (fib_node_index_t path_index)
 	 */
 	fib_path_recursive_adj_update(
 	    path,
-	    fib_path_proto_to_chain_type(path->fp_nh_proto),
+	    fib_path_to_chain_type(path),
 	    &path->fp_dpo);
 
 	break;
@@ -1605,16 +1672,25 @@ fib_path_resolve (fib_node_index_t path_index)
                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
 	break;
     case FIB_PATH_TYPE_DEAG:
+    {
 	/*
 	 * Resolve via a lookup DPO.
          * FIXME. control plane should add routes with a table ID
 	 */
-	lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
-                                          fib_proto_to_dpo(path->fp_nh_proto),
-                                          LOOKUP_INPUT_DST_ADDR,
-                                          LOOKUP_TABLE_FROM_CONFIG,
-                                          &path->fp_dpo);
+        lookup_cast_t cast;
+        
+        cast = (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RPF_ID ?
+                LOOKUP_MULTICAST :
+                LOOKUP_UNICAST);
+
+        lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
+                                           fib_proto_to_dpo(path->fp_nh_proto),
+                                           cast,
+                                           LOOKUP_INPUT_DST_ADDR,
+                                           LOOKUP_TABLE_FROM_CONFIG,
+                                           &path->fp_dpo);
 	break;
+    }
     case FIB_PATH_TYPE_RECEIVE:
 	/*
 	 * Resolve via a receive DPO.
@@ -1624,6 +1700,15 @@ fib_path_resolve (fib_node_index_t path_index)
                                 &path->receive.fp_addr,
                                 &path->fp_dpo);
 	break;
+    case FIB_PATH_TYPE_INTF_RX: {
+	/*
+	 * Resolve via a receive DPO.
+	 */
+	interface_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
+                                  path->intf_rx.fp_interface,
+                                  &path->fp_dpo);
+	break;
+    }
     case FIB_PATH_TYPE_EXCLUSIVE:
 	/*
 	 * Resolve via the user provided DPO
@@ -1652,6 +1737,7 @@ fib_path_get_resolving_interface (fib_node_index_t path_index)
 	return (path->receive.fp_interface);
     case FIB_PATH_TYPE_RECURSIVE:
 	return (fib_entry_get_resolving_interface(path->fp_via_fib));    
+    case FIB_PATH_TYPE_INTF_RX:
     case FIB_PATH_TYPE_SPECIAL:
     case FIB_PATH_TYPE_DEAG:
     case FIB_PATH_TYPE_EXCLUSIVE:
@@ -1743,6 +1829,7 @@ fib_path_contribute_urpf (fib_node_index_t path_index,
 
     case FIB_PATH_TYPE_DEAG:
     case FIB_PATH_TYPE_RECEIVE:
+    case FIB_PATH_TYPE_INTF_RX:
 	/*
 	 * these path types don't link to an adj
 	 */
@@ -1750,6 +1837,44 @@ fib_path_contribute_urpf (fib_node_index_t path_index,
     }
 }
 
+void
+fib_path_stack_mpls_disp (fib_node_index_t path_index,
+                          dpo_proto_t payload_proto,
+                          dpo_id_t *dpo)
+{
+    fib_path_t *path;
+
+    path = fib_path_get(path_index);
+
+    ASSERT(path);
+
+    switch (path->fp_type)
+    {
+    case FIB_PATH_TYPE_DEAG:
+    {
+        dpo_id_t tmp = DPO_INVALID;
+
+        dpo_copy(&tmp, dpo);
+        dpo_set(dpo,
+                DPO_MPLS_DISPOSITION,
+                payload_proto,
+                mpls_disp_dpo_create(payload_proto,
+                                     path->deag.fp_rpf_id,
+                                     &tmp));
+        dpo_reset(&tmp);
+        break;
+    }                
+    case FIB_PATH_TYPE_RECEIVE:
+    case FIB_PATH_TYPE_ATTACHED:
+    case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
+    case FIB_PATH_TYPE_RECURSIVE:
+    case FIB_PATH_TYPE_INTF_RX:
+    case FIB_PATH_TYPE_EXCLUSIVE:
+    case FIB_PATH_TYPE_SPECIAL:
+        break;
+    }
+}
+
 void
 fib_path_contribute_forwarding (fib_node_index_t path_index,
 				fib_forward_chain_type_t fct,
@@ -1769,7 +1894,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
      * This then represents the path's 'native' protocol; IP.
      * For all others will need to go find something else.
      */
-    if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct)
+    if (fib_path_to_chain_type(path) == fct)
     {
 	dpo_copy(dpo, &path->fp_dpo);
     }
@@ -1813,10 +1938,10 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
 	    case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
 	    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
-		fib_path_recursive_adj_update(path, fct, dpo);
-		break;
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+		fib_path_recursive_adj_update(path, fct, dpo);
+		break;
 	    case FIB_FORW_CHAIN_TYPE_ETHERNET:
 	    case FIB_FORW_CHAIN_TYPE_NSH:
 		ASSERT(0);
@@ -1829,13 +1954,14 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
                 lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
                                                   DPO_PROTO_MPLS,
+                                                  LOOKUP_UNICAST,
                                                   LOOKUP_INPUT_DST_ADDR,
                                                   LOOKUP_TABLE_FROM_CONFIG,
                                                   dpo);
                 break;
+	    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
 	    case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
 	    case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
-	    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
 		dpo_copy(dpo, &path->fp_dpo);
 		break;
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
@@ -1870,7 +1996,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
                     ai = adj_mcast_add_or_lock(path->fp_nh_proto,
                                                fib_forw_chain_type_to_link_type(fct),
                                                path->attached.fp_interface);
-                    dpo_set(dpo, DPO_ADJACENCY_MCAST,
+                    dpo_set(dpo, DPO_ADJACENCY,
                             fib_forw_chain_type_to_dpo_proto(fct),
                             ai);
                     adj_unlock(ai);
@@ -1878,6 +2004,14 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
                 break;
             }
             break;
+        case FIB_PATH_TYPE_INTF_RX:
+            /*
+             * Create the adj needed for sending IP multicast traffic
+             */
+            interface_dpo_add_or_lock(fib_forw_chain_type_to_dpo_proto(fct),
+                                      path->attached.fp_interface,
+                                      dpo);
+            break;
         case FIB_PATH_TYPE_RECEIVE:
         case FIB_PATH_TYPE_SPECIAL:
             dpo_copy(dpo, &path->fp_dpo);
diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h
index 14efc1ab..334be6f5 100644
--- a/src/vnet/fib/fib_path.h
+++ b/src/vnet/fib/fib_path.h
@@ -69,6 +69,14 @@ typedef enum fib_path_cfg_attribute_t_ {
     /**
      * The path is a for-us path
      */
+    FIB_PATH_CFG_ATTRIBUTE_INTF_RX,
+    /**
+     * The path is a deag with rpf-id
+     */
+    FIB_PATH_CFG_ATTRIBUTE_RPF_ID,
+    /**
+     * The path is an interface recieve
+     */
     FIB_PATH_CFG_ATTRIBUTE_LOCAL,
     /**
      * Marker. Add new types before this one, then update it.
@@ -88,6 +96,8 @@ typedef enum fib_path_cfg_attribute_t_ {
     [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED] = "resolve-attached", \
     [FIB_PATH_CFG_ATTRIBUTE_LOCAL] = "local",	        \
     [FIB_PATH_CFG_ATTRIBUTE_ATTACHED] = "attached",	\
+    [FIB_PATH_CFG_ATTRIBUTE_INTF_RX] = "interface-rx",	\
+    [FIB_PATH_CFG_ATTRIBUTE_RPF_ID] = "rpf-id",         \
 }
 
 #define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \
@@ -106,6 +116,8 @@ typedef enum fib_path_cfg_flags_t_ {
     FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED),
     FIB_PATH_CFG_FLAG_LOCAL = (1 << FIB_PATH_CFG_ATTRIBUTE_LOCAL),
     FIB_PATH_CFG_FLAG_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_ATTACHED),
+    FIB_PATH_CFG_FLAG_INTF_RX = (1 << FIB_PATH_CFG_ATTRIBUTE_INTF_RX),
+    FIB_PATH_CFG_FLAG_RPF_ID = (1 << FIB_PATH_CFG_ATTRIBUTE_RPF_ID),
 } __attribute__ ((packed)) fib_path_cfg_flags_t;
 
 
@@ -117,8 +129,6 @@ extern u8 *fib_path_adj_format(fib_node_index_t pi,
 extern u8 * format_fib_path(u8 * s, va_list * args);
 
 extern fib_node_index_t fib_path_create(fib_node_index_t pl_index,
-					fib_protocol_t nh_proto,
-					fib_path_cfg_flags_t flags,
 					const fib_route_path_t *path);
 extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index,
 						fib_protocol_t nh_proto,
@@ -145,6 +155,9 @@ extern load_balance_path_t * fib_path_append_nh_for_multipath_hash(
     fib_node_index_t path_index,
     fib_forward_chain_type_t fct,
     load_balance_path_t *hash_key);
+extern void fib_path_stack_mpls_disp(fib_node_index_t path_index,
+                                     dpo_proto_t payload_proto,
+                                     dpo_id_t *dpo);
 extern void fib_path_contribute_forwarding(fib_node_index_t path_index,
 					   fib_forward_chain_type_t type,
 					   dpo_id_t *dpo);
diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c
index f75b5626..08293bcf 100644
--- a/src/vnet/fib/fib_path_ext.c
+++ b/src/vnet/fib/fib_path_ext.c
@@ -103,8 +103,8 @@ fib_path_ext_is_imp_null (fib_path_ext_t *path_ext)
 
 load_balance_path_t *
 fib_path_ext_stack (fib_path_ext_t *path_ext,
-		    const fib_entry_t *entry,
                     fib_forward_chain_type_t child_fct,
+                    fib_forward_chain_type_t imp_null_fct,
 		    load_balance_path_t *nhs)
 {
     fib_forward_chain_type_t parent_fct;
@@ -129,7 +129,7 @@ fib_path_ext_stack (fib_path_ext_t *path_ext,
 	 */
 	if (fib_path_ext_is_imp_null(path_ext))
 	{
-            parent_fct = fib_entry_chain_type_fixup(entry, child_fct);
+            parent_fct = imp_null_fct;
         }
         else
         {
diff --git a/src/vnet/fib/fib_path_ext.h b/src/vnet/fib/fib_path_ext.h
index cf8f8df0..d617700d 100644
--- a/src/vnet/fib/fib_path_ext.h
+++ b/src/vnet/fib/fib_path_ext.h
@@ -18,6 +18,7 @@
 
 #include <vnet/mpls/mpls.h>
 #include <vnet/fib/fib_types.h>
+#include <vnet/dpo/load_balance.h>
 
 /**
  * A path extension is a per-entry addition to the forwarding information
@@ -61,8 +62,8 @@ extern void fib_path_ext_resolve(fib_path_ext_t *path_ext,
 				 fib_node_index_t path_list_index);
 
 extern load_balance_path_t *fib_path_ext_stack(fib_path_ext_t *path_ext,
-                                               const struct fib_entry_t_ *entry,
                                                fib_forward_chain_type_t fct,
+                                               fib_forward_chain_type_t imp_null_fct,
                                                load_balance_path_t *nhs);
 
 #endif
diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c
index b9a391b3..ea6565dd 100644
--- a/src/vnet/fib/fib_path_list.c
+++ b/src/vnet/fib/fib_path_list.c
@@ -40,13 +40,6 @@ typedef struct fib_path_list_t_ {
      */
     fib_path_list_flags_t fpl_flags;
 
-    /**
-     * The next-hop protocol for the paths in this path list.
-     * Note that fixing the proto here means we don't support a mix of
-     * v4 and v6 paths. ho hum.
-     */
-    fib_protocol_t fpl_nh_proto;
-
     /**
      * Vector of paths indicies for all configured paths.
      * For shareable path-lists this list MUST not change.
@@ -57,6 +50,11 @@ typedef struct fib_path_list_t_ {
      * the RPF list calculated for this path list
      */
     fib_node_index_t fpl_urpf;
+
+    /**
+     * Hash table of paths. valid only with INDEXED flag
+     */
+    uword *fpl_db;
 } fib_path_list_t;
 
 /*
@@ -131,7 +129,6 @@ format_fib_path_list (u8 * s, va_list * args)
     
     s = format (s, "    index:%u", fib_path_list_get_index(path_list));
     s = format (s, " locks:%u", path_list->fpl_node.fn_locks);
-    s = format (s, " proto:%U", format_fib_protocol, path_list->fpl_nh_proto);
 
     if (FIB_PATH_LIST_FLAG_NONE != path_list->fpl_flags)
     {
@@ -155,26 +152,6 @@ format_fib_path_list (u8 * s, va_list * args)
     return (s);
 }
 
-u8 *
-fib_path_list_adjs_format (fib_node_index_t path_list_index,
-			   u32 indent,
-			   u8 * s)
-{
-    fib_path_list_t *path_list;
-    u32 i;
-
-    path_list = fib_path_list_get(path_list_index);
-
-    vec_foreach_index (i, path_list->fpl_paths)
-    {
-	s = fib_path_adj_format(path_list->fpl_paths[i],
-				indent, s);
-    }
-
-    return (s);
-}
-
-
 u8 *
 fib_path_list_format (fib_node_index_t path_list_index,
 		      u8 * s)
@@ -648,27 +625,6 @@ fib_path_list_is_looped (fib_node_index_t path_list_index)
     return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED);
 }
 
-static fib_path_cfg_flags_t 
-fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf)
-{
-    fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE;
-
-    if (plf & FIB_PATH_LIST_FLAG_LOCAL)
-    {
-	pf |= FIB_PATH_CFG_FLAG_LOCAL;
-    }
-    if (plf & FIB_PATH_LIST_FLAG_DROP)
-    {
-	pf |= FIB_PATH_CFG_FLAG_DROP;
-    }
-    if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE)
-    {
-	pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE;
-    }
-
-    return (pf);
-}
-
 static fib_path_list_flags_t
 fib_path_list_flags_fixup (fib_path_list_flags_t flags)
 {
@@ -695,18 +651,15 @@ fib_path_list_create (fib_path_list_flags_t flags,
     flags = fib_path_list_flags_fixup(flags);
     path_list = fib_path_list_alloc(&path_list_index);
     path_list->fpl_flags = flags;
-    /*
-     * we'll assume for now all paths are the same next-hop protocol
-     */
-    path_list->fpl_nh_proto = rpaths[0].frp_proto;
 
-    vec_foreach_index(i, rpaths)
+    if (NULL != rpaths)
     {
-	vec_add1(path_list->fpl_paths,
-		 fib_path_create(path_list_index,
-				 path_list->fpl_nh_proto,
-				 fib_path_list_flags_2_path_flags(flags),
-				 &rpaths[i]));
+        vec_foreach_index(i, rpaths)
+        {
+            vec_add1(path_list->fpl_paths,
+                     fib_path_create(path_list_index,
+                                     &rpaths[i]));
+        }
     }
 
     /*
@@ -748,6 +701,27 @@ fib_path_list_create (fib_path_list_flags_t flags,
     return (path_list_index);
 }
 
+static fib_path_cfg_flags_t 
+fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf)
+{
+    fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE;
+
+    if (plf & FIB_PATH_LIST_FLAG_DROP)
+    {
+	pf |= FIB_PATH_CFG_FLAG_DROP;
+    }
+    if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE)
+    {
+	pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE;
+    }
+    if (plf & FIB_PATH_LIST_FLAG_LOCAL)
+    {
+        pf |= FIB_PATH_CFG_FLAG_LOCAL;
+    }
+
+    return (pf);
+}
+
 fib_node_index_t
 fib_path_list_create_special (fib_protocol_t nh_proto,
 			      fib_path_list_flags_t flags,
@@ -758,11 +732,10 @@ fib_path_list_create_special (fib_protocol_t nh_proto,
 
     path_list = fib_path_list_alloc(&path_list_index);
     path_list->fpl_flags = flags;
-    path_list->fpl_nh_proto = nh_proto;
 
     path_index =
 	fib_path_create_special(path_list_index,
-				path_list->fpl_nh_proto,
+                                nh_proto,
 				fib_path_list_flags_2_path_flags(flags),
 				dpo);
     vec_add1(path_list->fpl_paths, path_index);
@@ -775,6 +748,30 @@ fib_path_list_create_special (fib_protocol_t nh_proto,
     return (path_list_index);
 }
 
+/*
+ * return the index info the path-lists's vector of paths, of the matching path.
+ * ~0 if not found
+ */
+u32
+fib_path_list_find_rpath (fib_node_index_t path_list_index,
+                          const fib_route_path_t *rpath)
+{
+    fib_path_list_t *path_list;
+    u32 ii;
+
+    path_list = fib_path_list_get(path_list_index);
+
+    vec_foreach_index (ii, path_list->fpl_paths)
+    {
+        if (!fib_path_cmp_w_route_path(path_list->fpl_paths[ii], rpath))
+        {
+            return (ii);
+        }
+    }
+    return (~0);
+}
+
+
 /*
  * fib_path_list_copy_and_path_add
  *
@@ -782,13 +779,62 @@ fib_path_list_create_special (fib_protocol_t nh_proto,
  * The path-list returned could either have been newly created, or
  * can be a shared path-list from the data-base.
  */
+fib_node_index_t
+fib_path_list_path_add (fib_node_index_t path_list_index,
+                        const fib_route_path_t *rpaths)
+{
+    fib_node_index_t new_path_index, *orig_path_index;
+    fib_path_list_t *path_list;
+
+    /*
+     * alloc the new list before we retrieve the old one, lest
+     * the alloc result in a realloc
+     */
+    path_list = fib_path_list_get(path_list_index);
+
+    ASSERT(1 == vec_len(rpaths));
+    ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED));
+
+    FIB_PATH_LIST_DBG(orig_path_list, "path-add");
+
+    new_path_index = fib_path_create(path_list_index,
+                                     rpaths);
+
+    vec_foreach (orig_path_index, path_list->fpl_paths)
+    {
+        /*
+         * don't add duplicate paths
+         */
+	if (0 == fib_path_cmp(new_path_index, *orig_path_index))
+        {
+            return (*orig_path_index);
+        }
+    }
+
+    /*
+     * Add the new path - no sort, no sharing, no key..
+     */
+    vec_add1(path_list->fpl_paths, new_path_index);
+
+    FIB_PATH_LIST_DBG(path_list, "path-added");
+
+    /*
+     * no shared path list requested. resolve and use the one
+     * just created.
+     */
+    fib_path_resolve(new_path_index);
+
+    return (new_path_index);
+}
+
 fib_node_index_t
 fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
-				 fib_path_list_flags_t flags,
-				 const fib_route_path_t *rpaths)
+                                 fib_path_list_flags_t flags,
+                                 const fib_route_path_t *rpaths)
 {
     fib_node_index_t path_index, new_path_index, *orig_path_index;
     fib_path_list_t *path_list, *orig_path_list;
+    fib_node_index_t exist_path_list_index;
     fib_node_index_t path_list_index;
     fib_node_index_t pi;
 
@@ -806,13 +852,11 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
 
     flags = fib_path_list_flags_fixup(flags);
     path_list->fpl_flags = flags;
-    path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto;
+
     vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths));
     pi = 0;
 
     new_path_index = fib_path_create(path_list_index,
-                                     path_list->fpl_nh_proto,
-                                     fib_path_list_flags_2_path_flags(flags),
                                      rpaths);
 
     vec_foreach (orig_path_index, orig_path_list->fpl_paths)
@@ -845,46 +889,79 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
     FIB_PATH_LIST_DBG(path_list, "path-added");
 
     /*
-     * If a shared path list is requested, consult the DB for a match
+     * check for a matching path-list in the DB.
+     * If we find one then we can return the existing one and destroy the
+     * new one just created.
      */
-    if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)
+    exist_path_list_index = fib_path_list_db_find(path_list);
+    if (FIB_NODE_INDEX_INVALID != exist_path_list_index)
     {
-	fib_node_index_t exist_path_list_index;
-	/*
-	 * check for a matching path-list in the DB.
-	 * If we find one then we can return the existing one and destroy the
-	 * new one just created.
-	 */
-	exist_path_list_index = fib_path_list_db_find(path_list);
-	if (FIB_NODE_INDEX_INVALID != exist_path_list_index)
-	{
-	    fib_path_list_destroy(path_list);
+        fib_path_list_destroy(path_list);
 	
-	    path_list_index = exist_path_list_index;
-	}
-	else
-	{
-	    /*
-	     * if there was not a matching path-list, then this
-	     * new one will need inserting into the DB and resolving.
-	     */
-	    fib_path_list_db_insert(path_list_index);
-
-	    path_list = fib_path_list_resolve(path_list);
-	}
+        path_list_index = exist_path_list_index;
     }
     else
     {
-	/*
-	 * no shared path list requested. resolve and use the one
-	 * just created.
-	 */
-	path_list = fib_path_list_resolve(path_list);
+        /*
+         * if there was not a matching path-list, then this
+         * new one will need inserting into the DB and resolving.
+         */
+        fib_path_list_db_insert(path_list_index);
+
+        path_list = fib_path_list_resolve(path_list);
     }
 
     return (path_list_index);
 }
 
+/*
+ * fib_path_list_path_remove
+ */
+fib_node_index_t
+fib_path_list_path_remove (fib_node_index_t path_list_index,
+                           const fib_route_path_t *rpaths)
+{
+    fib_node_index_t match_path_index, tmp_path_index;
+    fib_path_list_t *path_list;
+    fib_node_index_t pi;
+
+    path_list = fib_path_list_get(path_list_index);
+
+    ASSERT(1 == vec_len(rpaths));
+    ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED));
+
+    FIB_PATH_LIST_DBG(orig_path_list, "path-remove");
+
+    /*
+     * create a representation of the path to be removed, so it
+     * can be used as a comparison object during the copy.
+     */
+    tmp_path_index = fib_path_create(path_list_index,
+				     rpaths);
+    match_path_index = FIB_NODE_INDEX_INVALID;
+
+    vec_foreach_index (pi, path_list->fpl_paths)
+    {
+	if (0 == fib_path_cmp(tmp_path_index,
+                              path_list->fpl_paths[pi]))
+        {
+            /*
+             * match - remove it
+             */
+            match_path_index = path_list->fpl_paths[pi];
+            fib_path_destroy(match_path_index);
+            vec_del1(path_list->fpl_paths, pi);
+	}
+    }
+
+    /*
+     * done with the temporary now
+     */
+    fib_path_destroy(tmp_path_index);
+
+    return (match_path_index);
+}
+
 /*
  * fib_path_list_copy_and_path_remove
  *
@@ -911,7 +988,6 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index,
     FIB_PATH_LIST_DBG(orig_path_list, "copy-remove");
 
     path_list->fpl_flags = flags;
-    path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto;
     /*
      * allocate as many paths as we might need in one go, rather than
      * using vec_add to do a few at a time.
@@ -927,8 +1003,6 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index,
      * can be used as a comparison object during the copy.
      */
     tmp_path_index = fib_path_create(path_list_index,
-				     path_list->fpl_nh_proto,
-				     fib_path_list_flags_2_path_flags(flags),
 				     rpaths);
 
     vec_foreach (orig_path_index, orig_path_list->fpl_paths)
diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h
index b4971add..9d246211 100644
--- a/src/vnet/fib/fib_path_list.h
+++ b/src/vnet/fib/fib_path_list.h
@@ -38,6 +38,11 @@ typedef enum fib_path_list_attribute_t_ {
      * be searched for each route update.
      */
     FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST,
+    /**
+     * Indexed means the path-list keeps a hash table of all paths for
+     * fast lookup. The lookup result is the fib_node_index of the path.
+     */
+    FIB_PATH_LIST_ATTRIBUTE_INDEXED,
     /**
      * explicit drop path-list. Used when the entry source needs to 
      * force a drop, despite the fact the path info is present.
@@ -73,6 +78,7 @@ typedef enum fib_path_list_attribute_t_ {
 typedef enum fib_path_list_flags_t_ {
     FIB_PATH_LIST_FLAG_NONE      = 0,
     FIB_PATH_LIST_FLAG_SHARED    = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED),
+    FIB_PATH_LIST_FLAG_INDEXED    = (1 << FIB_PATH_LIST_ATTRIBUTE_INDEXED),
     FIB_PATH_LIST_FLAG_DROP      = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP),
     FIB_PATH_LIST_FLAG_LOCAL     = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL),
     FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE),
@@ -83,10 +89,11 @@ typedef enum fib_path_list_flags_t_ {
 
 #define FIB_PATH_LIST_ATTRIBUTES {       		 \
     [FIB_PATH_LIST_ATTRIBUTE_SHARED]    = "shared",	 \
+    [FIB_PATH_LIST_ATTRIBUTE_INDEXED]    = "indexed",	 \
     [FIB_PATH_LIST_ATTRIBUTE_RESOLVED]  = "resolved",	 \
     [FIB_PATH_LIST_ATTRIBUTE_DROP]      = "drop",	 \
     [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive",   \
-    [FIB_PATH_LIST_ATTRIBUTE_LOCAL]     = "local",	 \
+    [FIB_PATH_LIST_ATTRIBUTE_LOCAL]     = "local",      \
     [FIB_PATH_LIST_ATTRIBUTE_LOOPED]     = "looped",	 \
     [FIB_PATH_LIST_ATTRIBUTE_NO_URPF]     = "no-uRPF",	 \
 }
@@ -110,6 +117,13 @@ extern fib_node_index_t fib_path_list_copy_and_path_remove(
     fib_node_index_t pl_index,
     fib_path_list_flags_t flags,
     const fib_route_path_t *path);
+extern fib_node_index_t fib_path_list_path_add (
+    fib_node_index_t path_list_index,
+    const fib_route_path_t *rpaths);
+extern fib_node_index_t fib_path_list_path_remove (
+    fib_node_index_t path_list_index,
+    const fib_route_path_t *rpaths);
+
 extern u32 fib_path_list_get_n_paths(fib_node_index_t pl_index);
 
 extern void fib_path_list_contribute_forwarding(fib_node_index_t path_list_index,
@@ -137,11 +151,11 @@ extern int fib_path_list_is_looped(fib_node_index_t path_list_index);
 extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index);
 extern u8 * fib_path_list_format(fib_node_index_t pl_index,
 				 u8 * s);
-extern u8 * fib_path_list_adjs_format(fib_node_index_t pl_index,
-				      u32 indent,
-				      u8 * s);
 extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index,
                                                 const fib_node_index_t *pis);
+extern u32 fib_path_list_find_rpath (fib_node_index_t path_list_index,
+                                     const fib_route_path_t *rpath);
+
 /**
  * A callback function type for walking a path-list's paths
  */
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 6c3162e7..b31f35e3 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -475,8 +475,21 @@ fib_table_entry_special_remove (u32 fib_index,
  */
 static void
 fib_table_route_path_fixup (const fib_prefix_t *prefix,
+                            fib_entry_flag_t eflags,
 			    fib_route_path_t *path)
 {
+    /*
+     * not all zeros next hop &&
+     * is recursive path &&
+     * nexthop is same as the route's address
+     */
+    if ((!ip46_address_is_zero(&path->frp_addr)) &&
+        (~0 == path->frp_sw_if_index) &&
+        (0 == ip46_address_cmp(&path->frp_addr, &prefix->fp_addr)))
+    {
+        /* Prefix recurses via itse;f */
+	path->frp_flags |= FIB_ROUTE_PATH_DROP;
+    }
     if (fib_prefix_is_host(prefix) &&
 	ip46_address_is_zero(&path->frp_addr) &&
 	path->frp_sw_if_index != ~0)
@@ -484,7 +497,19 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix,
 	path->frp_addr = prefix->fp_addr;
         path->frp_flags |= FIB_ROUTE_PATH_ATTACHED;
     }
-}		  
+    if (eflags & FIB_ENTRY_FLAG_DROP)
+    {
+	path->frp_flags |= FIB_ROUTE_PATH_DROP;
+    }
+    if (eflags & FIB_ENTRY_FLAG_LOCAL)
+    {
+	path->frp_flags |= FIB_ROUTE_PATH_LOCAL;
+    }
+    if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE)
+    {
+	path->frp_flags |= FIB_ROUTE_PATH_EXCLUSIVE;
+    }
+}
 
 fib_node_index_t
 fib_table_entry_path_add (u32 fib_index,
@@ -536,7 +561,7 @@ fib_table_entry_path_add2 (u32 fib_index,
 
     for (ii = 0; ii < vec_len(rpath); ii++)
     {
-	fib_table_route_path_fixup(prefix, &rpath[ii]);
+	fib_table_route_path_fixup(prefix, flags, &rpath[ii]);
     }
 
     if (FIB_NODE_INDEX_INVALID == fib_entry_index)
@@ -583,11 +608,6 @@ fib_table_entry_path_remove2 (u32 fib_index,
     fib_table = fib_table_get(fib_index, prefix->fp_proto);
     fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix);
 
-    for (ii = 0; ii < vec_len(rpath); ii++)
-    {
-	fib_table_route_path_fixup(prefix, &rpath[ii]);
-    }
-
     if (FIB_NODE_INDEX_INVALID == fib_entry_index)
     {
 	/*
@@ -605,6 +625,15 @@ fib_table_entry_path_remove2 (u32 fib_index,
 	fib_entry_lock(fib_entry_index);
         was_sourced = fib_entry_is_sourced(fib_entry_index, source);
 
+        for (ii = 0; ii < vec_len(rpath); ii++)
+        {
+            fib_table_route_path_fixup(
+                prefix,
+                fib_entry_get_flags_for_source(fib_entry_index,
+                                               source),
+                &rpath[ii]);
+        }
+
 	src_flag = fib_entry_path_remove(fib_entry_index, source, rpath);
 
 	if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag))
@@ -661,7 +690,6 @@ fib_table_entry_path_remove (u32 fib_index,
     };
     fib_route_path_t *paths = NULL;
 
-    fib_table_route_path_fixup(prefix, &path);
     vec_add1(paths, path);
 
     fib_table_entry_path_remove2(fib_index, prefix, source, paths);
@@ -692,7 +720,7 @@ fib_table_entry_update (u32 fib_index,
 
     for (ii = 0; ii < vec_len(paths); ii++)
     {
-	fib_table_route_path_fixup(prefix, &paths[ii]);
+	fib_table_route_path_fixup(prefix, flags, &paths[ii]);
     }
     /*
      * sort the paths provided by the control plane. this means
@@ -750,7 +778,6 @@ fib_table_entry_update_one_path (u32 fib_index,
     };
     fib_route_path_t *paths = NULL;
 
-    fib_table_route_path_fixup(prefix, &path);
     vec_add1(paths, path);
 
     fib_entry_index = 
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index 3c9b8a38..e4a8a70e 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -25,6 +25,8 @@
 #include <vnet/dpo/receive_dpo.h>
 #include <vnet/dpo/ip_null_dpo.h>
 #include <vnet/bfd/bfd_main.h>
+#include <vnet/dpo/interface_dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
 
 #include <vnet/mpls/mpls.h>
 
@@ -271,6 +273,7 @@ typedef enum fib_test_lb_bucket_type_t_ {
     FT_LB_O_LB,
     FT_LB_SPECIAL,
     FT_LB_ADJ,
+    FT_LB_INTF,
 } fib_test_lb_bucket_type_t;
 
 typedef struct fib_test_lb_bucket_t_ {
@@ -315,6 +318,31 @@ typedef struct fib_test_lb_bucket_t_ {
     };
 } fib_test_lb_bucket_t;
 
+typedef enum fib_test_rep_bucket_type_t_ {
+    FT_REP_LABEL_O_ADJ,
+    FT_REP_DISP_MFIB_LOOKUP,
+    FT_REP_INTF,
+} fib_test_rep_bucket_type_t;
+
+typedef struct fib_test_rep_bucket_t_ {
+    fib_test_rep_bucket_type_t type;
+
+    union
+    {
+	struct
+	{
+	    mpls_eos_bit_t eos;
+	    mpls_label_t label;
+	    u8 ttl;
+	    adj_index_t adj;
+	} label_o_adj;
+ 	struct
+	{
+	    adj_index_t adj;
+	} adj;
+   };
+} fib_test_rep_bucket_t;
+
 #define FIB_TEST_LB(_cond, _comment, _args...)			\
 {								\
     if (!FIB_TEST_I(_cond, _comment, ##_args)) {		\
@@ -322,7 +350,83 @@ typedef struct fib_test_lb_bucket_t_ {
     }								\
 }
 
-static int
+int
+fib_test_validate_rep_v (const replicate_t *rep,
+                         u16 n_buckets,
+                         va_list ap)
+{
+    const fib_test_rep_bucket_t *exp;
+    const dpo_id_t *dpo;
+    int bucket;
+
+    FIB_TEST_LB((n_buckets == rep->rep_n_buckets),
+                "n_buckets = %d", rep->rep_n_buckets);
+
+    for (bucket = 0; bucket < n_buckets; bucket++)
+    {
+	exp = va_arg(ap, fib_test_rep_bucket_t*);
+
+        dpo = replicate_get_bucket_i(rep, bucket);
+
+	switch (exp->type)
+	{
+	case FT_REP_LABEL_O_ADJ:
+	    {
+		const mpls_label_dpo_t *mld;
+                mpls_label_t hdr;
+		FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type),
+                            "bucket %d stacks on %U",
+                            bucket,
+                            format_dpo_type, dpo->dpoi_type);
+	    
+		mld = mpls_label_dpo_get(dpo->dpoi_index);
+                hdr = clib_net_to_host_u32(mld->mld_hdr[0].label_exp_s_ttl);
+
+		FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) ==
+			     exp->label_o_adj.label),
+			    "bucket %d stacks on label %d",
+			    bucket,
+			    exp->label_o_adj.label);
+
+		FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) ==
+			     exp->label_o_adj.eos),
+			    "bucket %d stacks on label %d %U",
+			    bucket,
+			    exp->label_o_adj.label,
+			    format_mpls_eos_bit, exp->label_o_adj.eos);
+
+		FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type),
+			    "bucket %d label stacks on %U",
+			    bucket,
+			    format_dpo_type, mld->mld_dpo.dpoi_type);
+
+		FIB_TEST_LB((exp->label_o_adj.adj == mld->mld_dpo.dpoi_index),
+			    "bucket %d label stacks on adj %d",
+			    bucket,
+			    exp->label_o_adj.adj);
+	    }
+	    break;
+	case FT_REP_INTF:
+            FIB_TEST_LB((DPO_INTERFACE == dpo->dpoi_type),
+                        "bucket %d stacks on %U",
+                        bucket,
+                        format_dpo_type, dpo->dpoi_type);
+
+            FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index),
+                        "bucket %d stacks on adj %d",
+                        bucket,
+                        exp->adj.adj);
+	    break;
+        case FT_REP_DISP_MFIB_LOOKUP:
+//            ASSERT(0);
+            break;
+        }
+    }
+
+    return (!0);
+}
+
+int
 fib_test_validate_lb_v (const load_balance_t *lb,
 			u16 n_buckets,
 			va_list ap)
@@ -484,6 +588,16 @@ fib_test_validate_lb_v (const load_balance_t *lb,
 			bucket,
 			exp->adj.adj);
 	    break;
+	case FT_LB_INTF:
+	    FIB_TEST_I((DPO_INTERFACE == dpo->dpoi_type),
+		       "bucket %d stacks on %U",
+		       bucket,
+		       format_dpo_type, dpo->dpoi_type);
+	    FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index),
+			"bucket %d stacks on adj %d",
+			bucket,
+			exp->adj.adj);
+	    break;
 	case FT_LB_O_LB:
 	    FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type),
                        "bucket %d stacks on %U",
@@ -509,14 +623,13 @@ fib_test_validate_lb_v (const load_balance_t *lb,
     return (!0);
 }
 
-static int
+int
 fib_test_validate_entry (fib_node_index_t fei,
 			 fib_forward_chain_type_t fct,
 			 u16 n_buckets,
 			 ...)
 {
     dpo_id_t dpo = DPO_INVALID;
-    const load_balance_t *lb;
     fib_prefix_t pfx;
     index_t fw_lbi;
     u32 fib_index;
@@ -529,47 +642,59 @@ fib_test_validate_entry (fib_node_index_t fei,
     fib_index = fib_entry_get_fib_index(fei);
     fib_entry_contribute_forwarding(fei, fct, &dpo);
 
-    FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type),
-		"Entry links to %U",
-		format_dpo_type, dpo.dpoi_type);
-    lb = load_balance_get(dpo.dpoi_index);
-
-    res = fib_test_validate_lb_v(lb, n_buckets, ap);
+    if (DPO_REPLICATE == dpo.dpoi_type)
+    {
+        const replicate_t *rep;
 
-    /*
-     * ensure that the LB contributed by the entry is the
-     * same as the LB in the forwarding tables
-     */
-    if (fct == fib_entry_get_default_chain_type(fib_entry_get(fei)))
+        rep = replicate_get(dpo.dpoi_index);
+        res = fib_test_validate_rep_v(rep, n_buckets, ap);
+    }
+    else
     {
-        switch (pfx.fp_proto)
-        {
-        case FIB_PROTOCOL_IP4:
-            fw_lbi = ip4_fib_forwarding_lookup(fib_index, &pfx.fp_addr.ip4);
-            break;
-        case FIB_PROTOCOL_IP6:
-            fw_lbi = ip6_fib_table_fwding_lookup(&ip6_main, fib_index, &pfx.fp_addr.ip6);
-            break;
-        case FIB_PROTOCOL_MPLS:
-            {
-                mpls_unicast_header_t hdr = {
-                    .label_exp_s_ttl = 0,
-                };
+        const load_balance_t *lb;
+
+        FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type),
+                    "Entry links to %U",
+                    format_dpo_type, dpo.dpoi_type);
 
-                vnet_mpls_uc_set_label(&hdr.label_exp_s_ttl, pfx.fp_label);
-                vnet_mpls_uc_set_s(&hdr.label_exp_s_ttl, pfx.fp_eos);
-                hdr.label_exp_s_ttl = clib_host_to_net_u32(hdr.label_exp_s_ttl);
+        lb = load_balance_get(dpo.dpoi_index);
+        res = fib_test_validate_lb_v(lb, n_buckets, ap);
 
-                fw_lbi = mpls_fib_table_forwarding_lookup(fib_index, &hdr);
+        /*
+         * ensure that the LB contributed by the entry is the
+         * same as the LB in the forwarding tables
+         */
+        if (fct == fib_entry_get_default_chain_type(fib_entry_get(fei)))
+        {
+            switch (pfx.fp_proto)
+            {
+            case FIB_PROTOCOL_IP4:
+                fw_lbi = ip4_fib_forwarding_lookup(fib_index, &pfx.fp_addr.ip4);
+                break;
+            case FIB_PROTOCOL_IP6:
+                fw_lbi = ip6_fib_table_fwding_lookup(&ip6_main, fib_index, &pfx.fp_addr.ip6);
                 break;
+            case FIB_PROTOCOL_MPLS:
+                {
+                    mpls_unicast_header_t hdr = {
+                        .label_exp_s_ttl = 0,
+                    };
+
+                    vnet_mpls_uc_set_label(&hdr.label_exp_s_ttl, pfx.fp_label);
+                    vnet_mpls_uc_set_s(&hdr.label_exp_s_ttl, pfx.fp_eos);
+                    hdr.label_exp_s_ttl = clib_host_to_net_u32(hdr.label_exp_s_ttl);
+
+                    fw_lbi = mpls_fib_table_forwarding_lookup(fib_index, &hdr);
+                    break;
+                }
+            default:
+                fw_lbi = 0;
             }
-        default:
-            fw_lbi = 0;
+            FIB_TEST_LB((fw_lbi == dpo.dpoi_index),
+                        "Contributed LB = FW LB: %U\n %U",
+                        format_load_balance, fw_lbi, 0,
+                        format_load_balance, dpo.dpoi_index, 0);
         }
-        FIB_TEST_LB((fw_lbi == dpo.dpoi_index),
-                    "Contributed LB = FW LB: %U\n %U",
-                    format_load_balance, fw_lbi, 0,
-                    format_load_balance, dpo.dpoi_index, 0);
     }
 
     dpo_reset(&dpo);
@@ -1289,6 +1414,7 @@ fib_test_v4 (void)
 
     lookup_dpo_add_or_lock_w_fib_index(fib_index,
                                        DPO_PROTO_IP4,
+                                       LOOKUP_UNICAST,
                                        LOOKUP_INPUT_DST_ADDR,
                                        LOOKUP_TABLE_FROM_CONFIG,
                                        &ex_dpo);
@@ -2605,7 +2731,6 @@ fib_test_v4 (void)
 			     NULL,
 			     FIB_ROUTE_PATH_FLAG_NONE);
 
-
     fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32);
     dpo1 = fib_entry_contribute_ip_forwarding(fei);
 
@@ -7493,6 +7618,7 @@ lfib_test (void)
     fib_route_path_t *rpaths = NULL, rpath = {
     	.frp_proto = FIB_PROTOCOL_MPLS,
     	.frp_local_label = 1200,
+        .frp_eos = MPLS_NON_EOS,
     	.frp_sw_if_index = ~0, // recurive
     	.frp_fib_index = 0, // Default MPLS fib
     	.frp_weight = 1,
@@ -7607,6 +7733,146 @@ lfib_test (void)
 
     dpo_reset(&ip_1200);
 
+    /*
+     * An rx-interface route.
+     *  like the tail of an mcast LSP
+     */
+    dpo_id_t idpo = DPO_INVALID;
+
+    interface_dpo_add_or_lock(DPO_PROTO_IP4,
+                              tm->hw[0]->sw_if_index,
+                              &idpo);
+
+    fib_prefix_t pfx_2500 = {
+	.fp_len = 21,
+	.fp_proto = FIB_PROTOCOL_MPLS,
+	.fp_label = 2500,
+	.fp_eos = MPLS_EOS,
+	.fp_payload_proto = DPO_PROTO_IP4,
+    };
+    fib_test_lb_bucket_t rx_intf_0 = {
+        .type = FT_LB_INTF,
+        .adj = {
+            .adj = idpo.dpoi_index,
+        },
+    };
+
+    lfe = fib_table_entry_update_one_path(fib_index,
+					  &pfx_2500,
+					  FIB_SOURCE_API,
+					  FIB_ENTRY_FLAG_NONE,
+					  FIB_PROTOCOL_IP4,
+					  NULL,
+					  tm->hw[0]->sw_if_index,
+					  ~0, // invalid fib index
+					  0,
+					  NULL,
+					  FIB_ROUTE_PATH_INTF_RX);
+    FIB_TEST(fib_test_validate_entry(lfe,
+    				     FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+    				     1,
+    				     &rx_intf_0),
+    	     "2500 rx-interface 0");
+    fib_table_entry_delete(fib_index, &pfx_2500, FIB_SOURCE_API);
+
+    /*
+     * An MPLS mulicast entry
+     */
+    fib_prefix_t pfx_3500 = {
+	.fp_len = 21,
+	.fp_proto = FIB_PROTOCOL_MPLS,
+	.fp_label = 3500,
+	.fp_eos = MPLS_EOS,
+	.fp_payload_proto = DPO_PROTO_IP4,
+    };
+    fib_test_rep_bucket_t mc_0 = {
+        .type = FT_REP_LABEL_O_ADJ,
+	.label_o_adj = {
+	    .adj = ai_mpls_10_10_10_1,
+	    .label = 3300,
+	    .eos = MPLS_EOS,
+	},
+    };
+    fib_test_rep_bucket_t mc_intf_0 = {
+        .type = FT_REP_INTF,
+        .adj = {
+            .adj = idpo.dpoi_index,
+        },
+    };
+    mpls_label_t *l3300 = NULL;
+    vec_add1(l3300, 3300);
+
+    lfe = fib_table_entry_update_one_path(lfib_index,
+					  &pfx_3500,
+					  FIB_SOURCE_API,
+					  FIB_ENTRY_FLAG_MULTICAST,
+					  FIB_PROTOCOL_IP4,
+					  &nh_10_10_10_1,
+					  tm->hw[0]->sw_if_index,
+					  ~0, // invalid fib index
+					  1,
+					  l3300,
+					  FIB_ROUTE_PATH_FLAG_NONE);
+    FIB_TEST(fib_test_validate_entry(lfe,
+    				     FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+    				     1,
+    				     &mc_0),
+    	     "3500 via replicate over 10.10.10.1");
+
+    /*
+     * MPLS Bud-node. Add a replication via an interface-receieve path
+     */
+    lfe = fib_table_entry_path_add(lfib_index,
+				   &pfx_3500,
+				   FIB_SOURCE_API,
+				   FIB_ENTRY_FLAG_MULTICAST,
+				   FIB_PROTOCOL_IP4,
+                                   NULL,
+                                   tm->hw[0]->sw_if_index,
+                                   ~0, // invalid fib index
+                                   0,
+                                   NULL,
+                                   FIB_ROUTE_PATH_INTF_RX);
+    FIB_TEST(fib_test_validate_entry(lfe,
+                                     FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+                                     2,
+                                     &mc_0,
+                                     &mc_intf_0),
+    	     "3500 via replicate over 10.10.10.1 and interface-rx");
+
+    /*
+     * Add a replication via an interface-free for-us path
+     */
+    fib_test_rep_bucket_t mc_disp = {
+        .type = FT_REP_DISP_MFIB_LOOKUP,
+        .adj = {
+            .adj = idpo.dpoi_index,
+        },
+    };
+    lfe = fib_table_entry_path_add(lfib_index,
+				   &pfx_3500,
+				   FIB_SOURCE_API,
+				   FIB_ENTRY_FLAG_MULTICAST,
+				   FIB_PROTOCOL_IP4,
+                                   NULL,
+                                   5, // rpf-id
+                                   0, // default table
+                                   0,
+                                   NULL,
+                                   FIB_ROUTE_PATH_RPF_ID);
+    FIB_TEST(fib_test_validate_entry(lfe,
+                                     FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+                                     3,
+                                     &mc_0,
+                                     &mc_disp,
+                                     &mc_intf_0),
+    	     "3500 via replicate over 10.10.10.1 and interface-rx");
+
+
+    
+    fib_table_entry_delete(fib_index, &pfx_3500, FIB_SOURCE_API);
+    dpo_reset(&idpo);
+
     /*
      * cleanup
      */
@@ -7617,6 +7883,9 @@ lfib_test (void)
     FIB_TEST(lb_count == pool_elts(load_balance_pool),
 	     "Load-balance resources freed %d of %d",
              lb_count, pool_elts(load_balance_pool));
+    FIB_TEST(0 == pool_elts(interface_dpo_pool),
+	     "interface_dpo resources freed %d of %d",
+             0, pool_elts(interface_dpo_pool));
 
     return (0);
 }
diff --git a/src/vnet/fib/fib_test.h b/src/vnet/fib/fib_test.h
new file mode 100644
index 00000000..b98680bf
--- /dev/null
+++ b/src/vnet/fib/fib_test.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FIB_TEST_H__
+#define __FIB_TEST_H__
+
+#include <vnet/fib/fib_types.h>
+
+typedef enum fib_test_lb_bucket_type_t_ {
+    FT_LB_LABEL_O_ADJ,
+    FT_LB_LABEL_STACK_O_ADJ,
+    FT_LB_LABEL_O_LB,
+    FT_LB_O_LB,
+    FT_LB_SPECIAL,
+    FT_LB_ADJ,
+    FT_LB_INTF,
+} fib_test_lb_bucket_type_t;
+
+typedef struct fib_test_lb_bucket_t_ {
+    fib_test_lb_bucket_type_t type;
+
+    union
+    {
+	struct
+	{
+	    mpls_eos_bit_t eos;
+	    mpls_label_t label;
+	    u8 ttl;
+	    adj_index_t adj;
+	} label_o_adj;
+	struct
+	{
+	    mpls_eos_bit_t eos;
+	    mpls_label_t label_stack[8];
+	    u8 label_stack_size;
+	    u8 ttl;
+	    adj_index_t adj;
+	} label_stack_o_adj;
+	struct
+	{
+	    mpls_eos_bit_t eos;
+	    mpls_label_t label;
+	    u8 ttl;
+	    index_t lb;
+	} label_o_lb;
+	struct
+	{
+	    index_t adj;
+	} adj;
+	struct
+	{
+	    index_t lb;
+	} lb;
+	struct
+	{
+	    index_t adj;
+	} special;
+    };
+} fib_test_lb_bucket_t;
+
+typedef enum fib_test_rep_bucket_type_t_ {
+    FT_REP_LABEL_O_ADJ,
+    FT_REP_INTF,
+} fib_test_rep_bucket_type_t;
+
+typedef struct fib_test_rep_bucket_t_ {
+    fib_test_rep_bucket_type_t type;
+
+    union
+    {
+	struct
+	{
+	    mpls_eos_bit_t eos;
+	    mpls_label_t label;
+	    u8 ttl;
+	    adj_index_t adj;
+	} label_o_adj;
+ 	struct
+	{
+	    adj_index_t adj;
+	} adj;
+   };
+} fib_test_rep_bucket_t;
+
+
+extern int fib_test_validate_rep_v(const replicate_t *rep,
+                                   u16 n_buckets,
+                                   va_list ap);
+
+extern int fib_test_validate_lb_v(const load_balance_t *lb,
+                                  u16 n_buckets,
+                                  va_list ap);
+
+extern int fib_test_validate_entry(fib_node_index_t fei,
+                                   fib_forward_chain_type_t fct,
+                                   u16 n_buckets,
+                                   ...);
+
+#endif
diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c
index 2837a59d..8165f3eb 100644
--- a/src/vnet/fib/fib_types.c
+++ b/src/vnet/fib/fib_types.c
@@ -66,12 +66,13 @@ fib_prefix_from_ip46_addr (const ip46_address_t *addr,
 
 void
 fib_prefix_from_mpls_label (mpls_label_t label,
+                            mpls_eos_bit_t eos,
 			    fib_prefix_t *pfx)
 {
     pfx->fp_proto = FIB_PROTOCOL_MPLS;
     pfx->fp_len = 21;
     pfx->fp_label = label;
-    pfx->fp_eos = MPLS_NON_EOS;
+    pfx->fp_eos = eos;
 }
 
 int
@@ -194,17 +195,7 @@ fib_route_path_cmp (const fib_route_path_t *rpath1,
 
     if (0 != res) return (res);
 
-    if (~0 != rpath1->frp_sw_if_index &&
-        ~0 != rpath2->frp_sw_if_index)
-    {
-        res = vnet_sw_interface_compare(vnet_get_main(),
-                                        rpath1->frp_sw_if_index,
-                                        rpath2->frp_sw_if_index);
-    }
-    else
-    {
-        res = rpath1->frp_sw_if_index - rpath2->frp_sw_if_index;
-    }
+    res = (rpath1->frp_sw_if_index - rpath2->frp_sw_if_index);
 
     if (0 != res) return (res);
 
diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h
index 1c5299a9..4cb73e8a 100644
--- a/src/vnet/fib/fib_types.h
+++ b/src/vnet/fib/fib_types.h
@@ -286,8 +286,36 @@ typedef enum fib_route_path_flags_t_
      * Attached path
      */
     FIB_ROUTE_PATH_ATTACHED = (1 << 3),
+    /**
+     * A Drop path - resolve the path on the drop DPO
+     */
+    FIB_ROUTE_PATH_DROP = (1 << 4),
+    /**
+     * Don't resolve the path, use the DPO the client provides
+     */
+    FIB_ROUTE_PATH_EXCLUSIVE = (1 << 5),
+    /**
+     * A path that result in received traffic being recieved/recirculated
+     * so that it appears to have arrived on the new interface
+     */
+    FIB_ROUTE_PATH_INTF_RX = (1 << 6),
+    /**
+     * A local path with a RPF-ID => multicast traffic
+     */
+    FIB_ROUTE_PATH_RPF_ID = (1 << 7),
 } fib_route_path_flags_t;
 
+/**
+ * An RPF-ID is numerical value that is used RPF validate. An entry
+ * has-a RPF-ID, when a packet egress from (e.g. an LSP) it gains an
+ * RPF-ID, these two are compared for the RPF check.
+ * This replaces the interfce based chack (since the LSP has no associated
+ * interface.
+ */
+typedef u32 fib_rpf_id_t;
+
+#define MFIB_RPF_ID_NONE (0)
+
 /**
  * @brief 
  * A representation of a path as described by a route producer.
@@ -321,17 +349,29 @@ typedef struct fib_route_path_t_ {
 	 */
 	ip46_address_t frp_addr;
 
-	/**
-	 * The MPLS local Label to reursively resolve through.
-	 * This is valid when the path type is MPLS.
-	 */
-	mpls_label_t frp_local_label;
+        struct {
+            /**
+             * The MPLS local Label to reursively resolve through.
+             * This is valid when the path type is MPLS.
+             */
+            mpls_label_t frp_local_label;
+            /**
+             * EOS bit for the resolving label
+             */
+            mpls_eos_bit_t frp_eos;
+        };
+    };
+    union {
+        /**
+         * The interface.
+         * Will be invalid for recursive paths.
+         */
+        u32 frp_sw_if_index;
+        /**
+         * The RPF-ID
+         */
+        fib_rpf_id_t frp_rpf_id;
     };
-    /**
-     * The interface.
-     * Will be invalid for recursive paths.
-     */
-    u32 frp_sw_if_index;
     /**
      * The FIB index to lookup the nexthop
      * Only valid for recursive paths.
diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c
index 4b2b76ea..19f9f3c1 100644
--- a/src/vnet/fib/mpls_fib.c
+++ b/src/vnet/fib/mpls_fib.c
@@ -165,6 +165,7 @@ mpls_fib_create_with_table_id (u32 table_id)
 
     lookup_dpo_add_or_lock_w_fib_index(0, // unused
                                        DPO_PROTO_IP4,
+                                       LOOKUP_UNICAST,
                                        LOOKUP_INPUT_DST_ADDR,
                                        LOOKUP_TABLE_FROM_INPUT_INTERFACE,
                                        &dpo);
@@ -179,6 +180,7 @@ mpls_fib_create_with_table_id (u32 table_id)
 
     lookup_dpo_add_or_lock_w_fib_index(0, //unsued
                                        DPO_PROTO_MPLS,
+                                       LOOKUP_UNICAST,
                                        LOOKUP_INPUT_DST_ADDR,
                                        LOOKUP_TABLE_FROM_INPUT_INTERFACE,
                                        &dpo);
@@ -197,6 +199,7 @@ mpls_fib_create_with_table_id (u32 table_id)
 
     lookup_dpo_add_or_lock_w_fib_index(0, //unused
                                        DPO_PROTO_IP6,
+                                       LOOKUP_UNICAST,
                                        LOOKUP_INPUT_DST_ADDR,
                                        LOOKUP_TABLE_FROM_INPUT_INTERFACE,
                                        &dpo);
@@ -210,6 +213,7 @@ mpls_fib_create_with_table_id (u32 table_id)
     prefix.fp_eos = MPLS_NON_EOS;
     lookup_dpo_add_or_lock_w_fib_index(0, // unsued
                                        DPO_PROTO_MPLS,
+                                       LOOKUP_UNICAST,
                                        LOOKUP_INPUT_DST_ADDR,
                                        LOOKUP_TABLE_FROM_INPUT_INTERFACE,
                                        &dpo);
@@ -320,8 +324,15 @@ mpls_fib_forwarding_table_update (mpls_fib_t *mf,
 {
     mpls_label_t key;
 
-    ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
-
+    ASSERT((DPO_LOAD_BALANCE == dpo->dpoi_type) ||
+           (DPO_REPLICATE == dpo->dpoi_type));
+    if (CLIB_DEBUG > 0)
+    {
+        if (DPO_REPLICATE == dpo->dpoi_type)
+            ASSERT(dpo->dpoi_index & MPLS_IS_REPLICATE);
+        if (DPO_LOAD_BALANCE == dpo->dpoi_type)
+            ASSERT(!(dpo->dpoi_index & MPLS_IS_REPLICATE));
+    }
     key = mpls_fib_entry_mk_key(label, eos);
 
     mf->mf_lbs[key] = dpo->dpoi_index;
diff --git a/src/vnet/handoff.h b/src/vnet/handoff.h
index 815206a9..04ba8bfb 100644
--- a/src/vnet/handoff.h
+++ b/src/vnet/handoff.h
@@ -150,7 +150,7 @@ eth_get_sym_key (ethernet_header_t * h0)
 			ip->dst_address.as_u64[0] ^
 			ip->dst_address.as_u64[1] ^ ip->protocol);
     }
-  else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST))
+  else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS))
     {
       hash_key = mpls_get_key ((mpls_unicast_header_t *) (h0 + 1));
     }
@@ -179,8 +179,7 @@ eth_get_sym_key (ethernet_header_t * h0)
 		   ip->dst_address.as_u64[0] ^
 		   ip->dst_address.as_u64[1] ^ ip->protocol);
 	}
-      else if (outer->type ==
-	       clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST))
+      else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS))
 	{
 	  hash_key = mpls_get_key ((mpls_unicast_header_t *) (outer + 1));
 	}
@@ -210,7 +209,7 @@ eth_get_key (ethernet_header_t * h0)
     {
       hash_key = ipv6_get_key ((ip6_header_t *) (h0 + 1));
     }
-  else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST))
+  else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS))
     {
       hash_key = mpls_get_key ((mpls_unicast_header_t *) (h0 + 1));
     }
@@ -230,8 +229,7 @@ eth_get_key (ethernet_header_t * h0)
 	{
 	  hash_key = ipv6_get_key ((ip6_header_t *) (outer + 1));
 	}
-      else if (outer->type ==
-	       clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST))
+      else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS))
 	{
 	  hash_key = mpls_get_key ((mpls_unicast_header_t *) (outer + 1));
 	}
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index 2a1e70e8..45417b2f 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -1360,7 +1360,7 @@ vnet_link_to_l3_proto (vnet_link_t link)
     case VNET_LINK_IP6:
       return (VNET_L3_PACKET_TYPE_IP6);
     case VNET_LINK_MPLS:
-      return (VNET_L3_PACKET_TYPE_MPLS_UNICAST);
+      return (VNET_L3_PACKET_TYPE_MPLS);
     case VNET_LINK_ARP:
       return (VNET_L3_PACKET_TYPE_ARP);
     case VNET_LINK_ETHERNET:
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index 5c2df32c..6af1714f 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -478,6 +478,7 @@ define ip_mroute_add_del
   u32 table_id;
   u32 entry_flags;
   u32 itf_flags;
+  u32 rpf_id;
   u16 grp_address_length;
   u8 create_vrf_if_needed;
   u8 is_add;
@@ -518,6 +519,8 @@ manual_endian manual_print define ip_mfib_details
 {
   u32 context;
   u32 table_id;
+  u32 entry_flags;
+  u32 rpf_id;
   u8  address_length;
   u8  grp_address[4];
   u8  src_address[4];
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index fdfe7f63..9fdf9b3c 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -2752,6 +2752,16 @@ ip4_rewrite_mcast (vlib_main_t * vm,
     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
 }
 
+static uword
+ip4_mcast_midchain (vlib_main_t * vm,
+		    vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  if (adj_are_counters_enabled ())
+    return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
+  else
+    return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
+}
+
 /* *INDENT-OFF* */
 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
   .function = ip4_rewrite,
@@ -2778,6 +2788,16 @@ VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
 };
 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
 
+VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
+  .function = ip4_mcast_midchain,
+  .name = "ip4-mcast-midchain",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_ip4_rewrite_trace,
+  .sibling_of = "ip4-rewrite",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
+
 VLIB_REGISTER_NODE (ip4_midchain_node) = {
   .function = ip4_midchain,
   .name = "ip4-midchain",
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index c2fc4f87..a369f79f 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -2246,6 +2246,16 @@ ip6_midchain (vlib_main_t * vm,
     return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
 }
 
+static uword
+ip6_mcast_midchain (vlib_main_t * vm,
+		    vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  if (adj_are_counters_enabled ())
+    return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
+  else
+    return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
+}
+
 /* *INDENT-OFF* */
 VLIB_REGISTER_NODE (ip6_midchain_node) =
 {
@@ -2290,6 +2300,19 @@ VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast);
 
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_mcast_midchain_node, static) =
+{
+  .function = ip6_mcast_midchain,
+  .name = "ip6-mcast-midchain",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ip6_rewrite_trace,
+  .sibling_of = "ip6-rewrite",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mcast_midchain_node, ip6_mcast_midchain);
+
 /*
  * Hop-by-Hop handling
  */
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index 2af546df..58b997aa 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -557,6 +557,7 @@ ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
     case IP_LOOKUP_NEXT_PUNT:
     case IP_LOOKUP_NEXT_LOCAL:
     case IP_LOOKUP_NEXT_REWRITE:
+    case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
     case IP_LOOKUP_NEXT_MIDCHAIN:
     case IP_LOOKUP_NEXT_ICMP_ERROR:
     case IP_LOOKUP_N_NEXT:
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index b9f1782b..9c9cb4a4 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -438,17 +438,20 @@ vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp)
 }
 
 static void
-send_ip_mfib_details (vpe_api_main_t * am,
-		      unix_shared_memory_queue_t * q,
-		      u32 table_id,
-		      mfib_prefix_t * pfx,
-		      fib_route_path_encode_t * api_rpaths, u32 context)
+send_ip_mfib_details (unix_shared_memory_queue_t * q,
+		      u32 context, u32 table_id, fib_node_index_t mfei)
 {
+  fib_route_path_encode_t *api_rpath, *api_rpaths = NULL;
   vl_api_ip_mfib_details_t *mp;
-  fib_route_path_encode_t *api_rpath;
+  mfib_entry_t *mfib_entry;
   vl_api_fib_path_t *fp;
+  mfib_prefix_t pfx;
   int path_count;
 
+  mfib_entry = mfib_entry_get (mfei);
+  mfib_entry_get_prefix (mfei, &pfx);
+  mfib_entry_encode (mfei, &api_rpaths);
+
   path_count = vec_len (api_rpaths);
   mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
   if (!mp)
@@ -457,12 +460,14 @@ send_ip_mfib_details (vpe_api_main_t * am,
   mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS);
   mp->context = context;
 
+  mp->rpf_id = mfib_entry->mfe_rpf_id;
+  mp->entry_flags = mfib_entry->mfe_flags;
   mp->table_id = htonl (table_id);
-  mp->address_length = pfx->fp_len;
-  memcpy (mp->grp_address, &pfx->fp_grp_addr.ip4,
-	  sizeof (pfx->fp_grp_addr.ip4));
-  memcpy (mp->src_address, &pfx->fp_src_addr.ip4,
-	  sizeof (pfx->fp_src_addr.ip4));
+  mp->address_length = pfx.fp_len;
+  memcpy (mp->grp_address, &pfx.fp_grp_addr.ip4,
+	  sizeof (pfx.fp_grp_addr.ip4));
+  memcpy (mp->src_address, &pfx.fp_src_addr.ip4,
+	  sizeof (pfx.fp_src_addr.ip4));
 
   mp->count = htonl (path_count);
   fp = mp->path;
@@ -475,6 +480,7 @@ send_ip_mfib_details (vpe_api_main_t * am,
     copy_fib_next_hop (api_rpath, fp);
     fp++;
   }
+  vec_free (api_rpaths);
 
   vl_msg_api_send_shmem (q, (u8 *) & mp);
 }
@@ -497,13 +503,10 @@ vl_api_ip_mfib_table_dump_walk (fib_node_index_t fei, void *arg)
 static void
 vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp)
 {
-  vpe_api_main_t *am = &vpe_api_main;
   unix_shared_memory_queue_t *q;
   ip4_main_t *im = &ip4_main;
   mfib_table_t *mfib_table;
   fib_node_index_t *mfeip;
-  mfib_prefix_t pfx;
-  fib_route_path_encode_t *api_rpaths = NULL;
   vl_api_ip_mfib_dump_ctc_t ctx = {
     .entries = NULL,
   };
@@ -524,21 +527,16 @@ vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp)
 
     vec_foreach (mfeip, ctx.entries)
     {
-      mfib_entry_get_prefix (*mfeip, &pfx);
-      mfib_entry_encode (*mfeip, &api_rpaths);
-      send_ip_mfib_details (am, q,
+      send_ip_mfib_details (q, mp->context,
                             mfib_table->mft_table_id,
-                            &pfx, api_rpaths,
-                            mp->context);
+                            *mfeip);
     }
-    vec_reset_length (api_rpaths);
     vec_reset_length (ctx.entries);
 
   }));
   /* *INDENT-ON* */
 
   vec_free (ctx.entries);
-  vec_free (api_rpaths);
 }
 
 static void
@@ -705,10 +703,13 @@ add_del_route_t_handler (u8 is_multipath,
 			 u8 is_unreach,
 			 u8 is_prohibit,
 			 u8 is_local,
+			 u8 is_multicast,
 			 u8 is_classify,
 			 u32 classify_table_index,
 			 u8 is_resolve_host,
 			 u8 is_resolve_attached,
+			 u8 is_interface_rx,
+			 u8 is_rpf_id,
 			 u32 fib_index,
 			 const fib_prefix_t * prefix,
 			 u8 next_hop_proto_is_ip4,
@@ -731,16 +732,24 @@ add_del_route_t_handler (u8 is_multipath,
     .frp_label_stack = next_hop_out_label_stack,
   };
   fib_route_path_t *paths = NULL;
+  fib_entry_flag_t entry_flags = FIB_ENTRY_FLAG_NONE;
 
   if (MPLS_LABEL_INVALID != next_hop_via_label)
     {
       path.frp_proto = FIB_PROTOCOL_MPLS;
       path.frp_local_label = next_hop_via_label;
+      path.frp_eos = MPLS_NON_EOS;
     }
   if (is_resolve_host)
     path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
   if (is_resolve_attached)
     path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED;
+  if (is_interface_rx)
+    path_flags |= FIB_ROUTE_PATH_INTF_RX;
+  if (is_rpf_id)
+    path_flags |= FIB_ROUTE_PATH_RPF_ID;
+  if (is_multicast)
+    entry_flags |= FIB_ENTRY_FLAG_MULTICAST;
 
   path.frp_flags = path_flags;
 
@@ -754,8 +763,7 @@ add_del_route_t_handler (u8 is_multipath,
       if (is_add)
 	fib_table_entry_path_add2 (fib_index,
 				   prefix,
-				   FIB_SOURCE_API,
-				   FIB_ENTRY_FLAG_NONE, paths);
+				   FIB_SOURCE_API, entry_flags, paths);
       else
 	fib_table_entry_path_remove2 (fib_index,
 				      prefix, FIB_SOURCE_API, paths);
@@ -826,8 +834,7 @@ add_del_route_t_handler (u8 is_multipath,
 	{
 	  vec_add1 (paths, path);
 	  fib_table_entry_update (fib_index,
-				  prefix,
-				  FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, paths);
+				  prefix, FIB_SOURCE_API, entry_flags, paths);
 	  vec_free (paths);
 	}
       else
@@ -847,7 +854,7 @@ add_del_route_check (fib_protocol_t table_proto,
 		     fib_protocol_t next_hop_table_proto,
 		     u32 next_hop_table_id,
 		     u8 create_missing_tables,
-		     u32 * fib_index, u32 * next_hop_fib_index)
+		     u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
 
@@ -866,7 +873,7 @@ add_del_route_check (fib_protocol_t table_proto,
 	}
     }
 
-  if (~0 != ntohl (next_hop_sw_if_index))
+  if (!is_rpf_id && ~0 != ntohl (next_hop_sw_if_index))
     {
       if (pool_is_free_index (vnm->interface_main.sw_interfaces,
 			      ntohl (next_hop_sw_if_index)))
@@ -876,16 +883,27 @@ add_del_route_check (fib_protocol_t table_proto,
     }
   else
     {
-      *next_hop_fib_index = fib_table_find (next_hop_table_proto,
-					    ntohl (next_hop_table_id));
+      if (is_rpf_id)
+	*next_hop_fib_index = mfib_table_find (next_hop_table_proto,
+					       ntohl (next_hop_table_id));
+      else
+	*next_hop_fib_index = fib_table_find (next_hop_table_proto,
+					      ntohl (next_hop_table_id));
 
       if (~0 == *next_hop_fib_index)
 	{
 	  if (create_missing_tables)
 	    {
-	      *next_hop_fib_index =
-		fib_table_find_or_create_and_lock (next_hop_table_proto,
-						   ntohl (next_hop_table_id));
+	      if (is_rpf_id)
+		*next_hop_fib_index =
+		  mfib_table_find_or_create_and_lock (next_hop_table_proto,
+						      ntohl
+						      (next_hop_table_id));
+	      else
+		*next_hop_fib_index =
+		  fib_table_find_or_create_and_lock (next_hop_table_proto,
+						     ntohl
+						     (next_hop_table_id));
 	    }
 	  else
 	    {
@@ -910,7 +928,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 			    mp->next_hop_sw_if_index,
 			    FIB_PROTOCOL_IP4,
 			    mp->next_hop_table_id,
-			    mp->create_vrf_if_needed,
+			    mp->create_vrf_if_needed, 0,
 			    &fib_index, &next_hop_fib_index);
 
   if (0 != rv)
@@ -943,11 +961,11 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 				   mp->is_drop,
 				   mp->is_unreach,
 				   mp->is_prohibit,
-				   mp->is_local,
+				   mp->is_local, 0,
 				   mp->is_classify,
 				   mp->classify_table_index,
 				   mp->is_resolve_host,
-				   mp->is_resolve_attached,
+				   mp->is_resolve_attached, 0, 0,
 				   fib_index, &pfx, 1,
 				   &nh,
 				   ntohl (mp->next_hop_sw_if_index),
@@ -969,7 +987,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 			    mp->next_hop_sw_if_index,
 			    FIB_PROTOCOL_IP6,
 			    mp->next_hop_table_id,
-			    mp->create_vrf_if_needed,
+			    mp->create_vrf_if_needed, 0,
 			    &fib_index, &next_hop_fib_index);
 
   if (0 != rv)
@@ -1002,11 +1020,11 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 				   mp->is_drop,
 				   mp->is_unreach,
 				   mp->is_prohibit,
-				   mp->is_local,
+				   mp->is_local, 0,
 				   mp->is_classify,
 				   mp->classify_table_index,
 				   mp->is_resolve_host,
-				   mp->is_resolve_attached,
+				   mp->is_resolve_attached, 0, 0,
 				   fib_index, &pfx, 0,
 				   &nh, ntohl (mp->next_hop_sw_if_index),
 				   next_hop_fib_index,
@@ -1075,6 +1093,7 @@ mroute_add_del_handler (u8 is_add,
 			u32 fib_index,
 			const mfib_prefix_t * prefix,
 			u32 entry_flags,
+			fib_rpf_id_t rpf_id,
 			u32 next_hop_sw_if_index, u32 itf_flags)
 {
   stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ );
@@ -1091,7 +1110,7 @@ mroute_add_del_handler (u8 is_add,
   if (!is_local && ~0 == next_hop_sw_if_index)
     {
       mfib_table_entry_update (fib_index, prefix,
-			       MFIB_SOURCE_API, entry_flags);
+			       MFIB_SOURCE_API, rpf_id, entry_flags);
     }
   else
     {
@@ -1152,6 +1171,7 @@ api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
 				  mp->is_local,
 				  fib_index, &pfx,
 				  ntohl (mp->entry_flags),
+				  ntohl (mp->rpf_id),
 				  ntohl (mp->next_hop_sw_if_index),
 				  ntohl (mp->itf_flags)));
 }
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index ec9a1f97..597de06b 100755
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -450,6 +450,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
 			 unformat_mpls_unicast_label, &rpath.frp_local_label))
 	{
 	  rpath.frp_weight = 1;
+	  rpath.frp_eos = MPLS_NON_EOS;
 	  rpath.frp_proto = FIB_PROTOCOL_MPLS;
 	  rpath.frp_sw_if_index = ~0;
 	  vec_add1 (rpaths, rpath);
@@ -923,7 +924,7 @@ vnet_ip_mroute_cmd (vlib_main_t * vm,
 	  else if (eflags)
 	    {
 	      mfib_table_entry_update (fib_index, &pfx, MFIB_SOURCE_CLI,
-				       eflags);
+				       MFIB_RPF_ID_NONE, eflags);
 	    }
 	  else
 	    {
diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
index efa724e0..d2954e96 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
@@ -88,6 +88,7 @@ ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix)
 					  (ip_prefix_version (dst_prefix) ==
 					   IP6 ? DPO_PROTO_IP6 :
 					   DPO_PROTO_IP4),
+					  LOOKUP_UNICAST,
 					  LOOKUP_INPUT_SRC_ADDR,
 					  LOOKUP_TABLE_FROM_CONFIG,
 					  &src_lkup_dpo);
diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c
index 164cafa1..3ed7cba7 100644
--- a/src/vnet/mfib/ip4_mfib.c
+++ b/src/vnet/mfib/ip4_mfib.c
@@ -72,6 +72,7 @@ ip4_create_mfib_with_table_id (u32 table_id)
         mfib_table_entry_update(mfib_table->mft_index,
                                 &prefix,
                                 MFIB_SOURCE_DEFAULT_ROUTE,
+                                MFIB_RPF_ID_NONE,
                                 MFIB_ENTRY_FLAG_DROP);
     }
 
diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c
index 991b91c6..116fee22 100644
--- a/src/vnet/mfib/ip6_mfib.c
+++ b/src/vnet/mfib/ip6_mfib.c
@@ -195,6 +195,7 @@ ip6_create_mfib_with_table_id (u32 table_id)
     mfib_table_entry_update(mfib_table->mft_index,
                             &all_zeros,
                             MFIB_SOURCE_DEFAULT_ROUTE,
+                            MFIB_RPF_ID_NONE,
                             MFIB_ENTRY_FLAG_DROP);
 
     /*
diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c
index 1aa8e086..847f25e7 100644
--- a/src/vnet/mfib/mfib_entry.c
+++ b/src/vnet/mfib/mfib_entry.c
@@ -48,6 +48,15 @@
 #define MFIB_ENTRY_DBG(_e, _fmt, _args...)
 #endif
 
+/**
+ * MFIB extensions to each path
+ */
+typedef struct mfib_path_ext_t_
+{
+    mfib_itf_flags_t mfpe_flags;
+    fib_node_index_t mfpe_path;
+} mfib_path_ext_t;
+
 /**
  * The source of an MFIB entry
  */
@@ -58,22 +67,39 @@ typedef struct mfib_entry_src_t_
      */
     mfib_source_t mfes_src;
 
+    /**
+     * Route flags
+     */
+    mfib_entry_flags_t mfes_flags;
+
     /**
      * The path-list of forwarding interfaces
      */
     fib_node_index_t mfes_pl;
 
     /**
-     * Route flags
+     * RPF-ID
      */
-    mfib_entry_flags_t mfes_flags;
+    fib_rpf_id_t mfes_rpf_id;
+
+    /**
+     * Hash table of path extensions
+     */
+    mfib_path_ext_t *mfes_exts;
 
     /**
-     * The hash table of all interfaces
+     * The hash table of all interfaces.
+     *  This is forwarding time information derived from the paths
+     *  and their extensions.
      */
     mfib_itf_t *mfes_itfs;
 } mfib_entry_src_t;
 
+/**
+ * Pool of path extensions
+ */
+static mfib_path_ext_t *mfib_path_ext_pool;
+
 /**
  * String names for each source
  */
@@ -123,6 +149,24 @@ format_mfib_entry_dpo (u8 * s, va_list * args)
                    MFIB_ENTRY_FORMAT_BRIEF));
 }
 
+static inline mfib_path_ext_t *
+mfib_entry_path_ext_get (index_t mi)
+{
+    return (pool_elt_at_index(mfib_path_ext_pool, mi));
+}
+
+static u8 *
+format_mfib_entry_path_ext (u8 * s, va_list * args)
+{
+    mfib_path_ext_t *path_ext;
+    index_t mpi = va_arg(*args, index_t);
+
+    path_ext = mfib_entry_path_ext_get(mpi);
+    return (format(s, "path:%d flags:%U",
+                   path_ext->mfpe_path,
+                   format_mfib_itf_flags, path_ext->mfpe_flags));
+}
+
 u8 *
 format_mfib_entry (u8 * s, va_list * args)
 {
@@ -141,6 +185,8 @@ format_mfib_entry (u8 * s, va_list * args)
 
     if (level >= MFIB_ENTRY_FORMAT_DETAIL)
     {
+        fib_node_index_t path_index, mpi;
+
         s = format (s, "\n");
         s = format (s, " fib:%d", mfib_entry->mfe_fib_index);
         s = format (s, " index:%d", mfib_entry_get_index(mfib_entry));
@@ -153,6 +199,14 @@ format_mfib_entry (u8 * s, va_list * args)
             {
                 s = fib_path_list_format(msrc->mfes_pl, s);
             }
+            s = format (s, "    Extensions:\n",
+                        mfib_source_names[msrc->mfes_src]);
+            hash_foreach(path_index, mpi, msrc->mfes_exts,
+            ({
+                s = format(s, "     %U\n", format_mfib_entry_path_ext, mpi);
+            }));
+            s = format (s, "    Interface-Forwarding:\n",
+                        mfib_source_names[msrc->mfes_src]);
             hash_foreach(sw_if_index, mfi, msrc->mfes_itfs,
             ({
                 s = format(s, "    %U\n", format_mfib_itf, mfi);
@@ -165,7 +219,7 @@ format_mfib_entry (u8 * s, va_list * args)
     ({
         s = format(s, "\n  %U", format_mfib_itf, mfi);
     }));
-
+    s = format(s, "\n  RPF-ID:%d", mfib_entry->mfe_rpf_id);
     s = format(s, "\n  %U-chain\n  %U",
                format_fib_forw_chain_type,
                mfib_entry_get_default_chain_type(mfib_entry),
@@ -314,13 +368,6 @@ mfib_entry_src_remove (mfib_entry_t *mfib_entry,
     }
 }
 
-static int
-mfib_entry_src_n_itfs (const mfib_entry_src_t *msrc)
-{
-    return (hash_elts(msrc->mfes_itfs));
-}
-
-
 static void
 mfib_entry_last_lock_gone (fib_node_t *node)
 {
@@ -338,7 +385,6 @@ mfib_entry_last_lock_gone (fib_node_t *node)
         mfib_entry_src_flush(msrc);
     }
 
-    fib_path_list_unlock(mfib_entry->mfe_parent);
     vec_free(mfib_entry->mfe_srcs);
 
     fib_node_deinit(&mfib_entry->mfe_node);
@@ -417,10 +463,9 @@ mfib_entry_alloc (u32 fib_index,
     mfib_entry->mfe_flags = 0;
     mfib_entry->mfe_fib_index = fib_index;
     mfib_entry->mfe_prefix = *prefix;
-    mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID;
-    mfib_entry->mfe_sibling = FIB_NODE_INDEX_INVALID;
     mfib_entry->mfe_srcs = NULL;
     mfib_entry->mfe_itfs = NULL;
+    mfib_entry->mfe_rpf_id = MFIB_RPF_ID_NONE;
 
     dpo_reset(&mfib_entry->mfe_rep);
 
@@ -431,10 +476,57 @@ mfib_entry_alloc (u32 fib_index,
     return (mfib_entry);
 }
 
+static inline mfib_path_ext_t *
+mfib_entry_path_ext_find (mfib_path_ext_t *exts,
+                          fib_node_index_t path_index)
+{
+    uword *p;
+
+    p = hash_get(exts, path_index);
+
+    if (NULL != p)
+    {
+        return (mfib_entry_path_ext_get(p[0]));
+    }
+
+    return (NULL);
+}
+
+static mfib_path_ext_t*
+mfib_path_ext_add (mfib_entry_src_t *msrc,
+                   fib_node_index_t path_index,
+                   mfib_itf_flags_t mfi_flags)
+{
+    mfib_path_ext_t *path_ext;
+
+    pool_get(mfib_path_ext_pool, path_ext);
+
+    path_ext->mfpe_flags = mfi_flags;
+    path_ext->mfpe_path = path_index;
+
+    hash_set(msrc->mfes_exts, path_index,
+             path_ext - mfib_path_ext_pool);
+
+    return (path_ext);
+}
+
+static void
+mfib_path_ext_remove (mfib_entry_src_t *msrc,
+                      fib_node_index_t path_index)
+{
+    mfib_path_ext_t *path_ext;
+
+    path_ext = mfib_entry_path_ext_find(msrc->mfes_exts, path_index);
+
+    hash_unset(msrc->mfes_exts, path_index);
+    pool_put(mfib_path_ext_pool, path_ext);
+}
+
 typedef struct mfib_entry_collect_forwarding_ctx_t_
 {
     load_balance_path_t * next_hops;
     fib_forward_chain_type_t fct;
+    mfib_entry_src_t *msrc;
 } mfib_entry_collect_forwarding_ctx_t;
 
 static int
@@ -455,6 +547,20 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index,
         return (!0);
     }
 
+    /*
+     * If the path is not forwarding to use it
+     */
+    mfib_path_ext_t *path_ext;
+    
+    path_ext = mfib_entry_path_ext_find(ctx->msrc->mfes_exts,
+                                        path_index);
+
+    if (NULL != path_ext &&
+        !(path_ext->mfpe_flags & MFIB_ITF_FLAG_FORWARD))
+    {
+        return (!0);
+    }
+    
     switch (ctx->fct)
     {
     case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
@@ -483,46 +589,61 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index,
 }
 
 static void
-mfib_entry_stack (mfib_entry_t *mfib_entry)
+mfib_entry_stack (mfib_entry_t *mfib_entry,
+                  mfib_entry_src_t *msrc)
 {
     dpo_proto_t dp;
 
     dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry));
 
-    if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent)
+    if (NULL != msrc &&
+        FIB_NODE_INDEX_INVALID != msrc->mfes_pl)
     {
         mfib_entry_collect_forwarding_ctx_t ctx = {
             .next_hops = NULL,
             .fct = mfib_entry_get_default_chain_type(mfib_entry),
+            .msrc = msrc,
         };
 
-        fib_path_list_walk(mfib_entry->mfe_parent,
+        fib_path_list_walk(msrc->mfes_pl,
                            mfib_entry_src_collect_forwarding,
                            &ctx);
 
         if (!(MFIB_ENTRY_FLAG_EXCLUSIVE & mfib_entry->mfe_flags))
         {
-            /*
-             * each path contirbutes a next-hop. form a replicate
-             * from those choices.
-             */
-            if (!dpo_id_is_valid(&mfib_entry->mfe_rep) ||
-                dpo_is_drop(&mfib_entry->mfe_rep))
+            if (NULL == ctx.next_hops)
             {
-                dpo_id_t tmp_dpo = DPO_INVALID;
-
-                dpo_set(&tmp_dpo,
-                        DPO_REPLICATE, dp,
-                        replicate_create(0, dp));
-
+                /*
+                 * no next-hops, stack directly on the drop
+                 */
                 dpo_stack(DPO_MFIB_ENTRY, dp,
                           &mfib_entry->mfe_rep,
-                          &tmp_dpo);
-
-                dpo_reset(&tmp_dpo);
+                          drop_dpo_get(dp));
+            }
+            else
+            {
+                /*
+                 * each path contirbutes a next-hop. form a replicate
+                 * from those choices.
+                 */
+                if (!dpo_id_is_valid(&mfib_entry->mfe_rep) ||
+                    dpo_is_drop(&mfib_entry->mfe_rep))
+                {
+                    dpo_id_t tmp_dpo = DPO_INVALID;
+
+                    dpo_set(&tmp_dpo,
+                            DPO_REPLICATE, dp,
+                            replicate_create(0, dp));
+
+                    dpo_stack(DPO_MFIB_ENTRY, dp,
+                              &mfib_entry->mfe_rep,
+                              &tmp_dpo);
+
+                    dpo_reset(&tmp_dpo);
+                }
+                replicate_multipath_update(&mfib_entry->mfe_rep,
+                                           ctx.next_hops);
             }
-            replicate_multipath_update(&mfib_entry->mfe_rep,
-                                       ctx.next_hops);
         }
         else
         {
@@ -548,11 +669,11 @@ mfib_entry_stack (mfib_entry_t *mfib_entry)
     }
 }
 
-static void
-mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc,
-                                const fib_route_path_t *rpath)
+static fib_node_index_t
+mfib_entry_src_path_add (mfib_entry_src_t *msrc,
+                         const fib_route_path_t *rpath)
 {
-    fib_node_index_t old_pl_index;
+    fib_node_index_t path_index;
     fib_route_path_t *rpaths;
 
     ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags));
@@ -563,32 +684,26 @@ mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc,
     rpaths = NULL;
     vec_add1(rpaths, rpath[0]);
 
-    old_pl_index = msrc->mfes_pl;
-
     if (FIB_NODE_INDEX_INVALID == msrc->mfes_pl)
     {
-        msrc->mfes_pl =
-            fib_path_list_create(FIB_PATH_LIST_FLAG_NO_URPF,
-                                 rpaths);
-    }
-    else
-    {
-        msrc->mfes_pl =
-            fib_path_list_copy_and_path_add(msrc->mfes_pl,
-                                            FIB_PATH_LIST_FLAG_NO_URPF,
-                                            rpaths);
+        /* A non-shared path-list */
+        msrc->mfes_pl = fib_path_list_create(FIB_PATH_LIST_FLAG_NO_URPF,
+                                             NULL);
+        fib_path_list_lock(msrc->mfes_pl);
     }
-    fib_path_list_lock(msrc->mfes_pl);
-    fib_path_list_unlock(old_pl_index);
+
+    path_index = fib_path_list_path_add(msrc->mfes_pl, rpaths);
 
     vec_free(rpaths);
+
+    return (path_index);
 }
 
-static int
-mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc,
-                                   const fib_route_path_t *rpath)
+static fib_node_index_t
+mfib_entry_src_path_remove (mfib_entry_src_t *msrc,
+                            const fib_route_path_t *rpath)
 {
-    fib_node_index_t old_pl_index;
+    fib_node_index_t path_index;
     fib_route_path_t *rpaths;
 
     ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags));
@@ -599,56 +714,31 @@ mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc,
     rpaths = NULL;
     vec_add1(rpaths, rpath[0]);
 
-    old_pl_index = msrc->mfes_pl;
-
-    msrc->mfes_pl =
-        fib_path_list_copy_and_path_remove(msrc->mfes_pl,
-                                           FIB_PATH_LIST_FLAG_NONE,
-                                           rpaths);
-
-    fib_path_list_lock(msrc->mfes_pl);
-    fib_path_list_unlock(old_pl_index);
+    path_index = fib_path_list_path_remove(msrc->mfes_pl, rpaths);
 
     vec_free(rpaths);
 
-    return (FIB_NODE_INDEX_INVALID != msrc->mfes_pl);
+    return (path_index);
 }
 
 static void
 mfib_entry_recalculate_forwarding (mfib_entry_t *mfib_entry)
 {
-    fib_node_index_t old_pl_index;
     mfib_entry_src_t *bsrc;
 
-    old_pl_index = mfib_entry->mfe_parent;
-
     /*
      * copy the forwarding data from the bast source
      */
     bsrc = mfib_entry_get_best_src(mfib_entry);
 
-    if (NULL == bsrc)
-    {
-        mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID;
-    }
-    else
+    if (NULL != bsrc)
     {
-        mfib_entry->mfe_parent = bsrc->mfes_pl;
         mfib_entry->mfe_flags = bsrc->mfes_flags;
         mfib_entry->mfe_itfs = bsrc->mfes_itfs;
+        mfib_entry->mfe_rpf_id = bsrc->mfes_rpf_id;
     }
 
-    /*
-     * re-stack the entry on the best forwarding info.
-     */
-    if (old_pl_index != mfib_entry->mfe_parent ||
-        FIB_NODE_INDEX_INVALID == old_pl_index)
-    {
-        mfib_entry_stack(mfib_entry);
-
-        fib_path_list_lock(mfib_entry->mfe_parent);
-        fib_path_list_unlock(old_pl_index);
-    }
+    mfib_entry_stack(mfib_entry, bsrc);
 }
 
 
@@ -656,6 +746,7 @@ fib_node_index_t
 mfib_entry_create (u32 fib_index,
                    mfib_source_t source,
                    const mfib_prefix_t *prefix,
+                   fib_rpf_id_t rpf_id,
                    mfib_entry_flags_t entry_flags)
 {
     fib_node_index_t mfib_entry_index;
@@ -666,6 +757,7 @@ mfib_entry_create (u32 fib_index,
                                   &mfib_entry_index);
     msrc = mfib_entry_src_find_or_create(mfib_entry, source);
     msrc->mfes_flags = entry_flags;
+    msrc->mfes_rpf_id = rpf_id;
 
     mfib_entry_recalculate_forwarding(mfib_entry);
 
@@ -682,13 +774,14 @@ static int
 mfib_entry_src_ok_for_delete (const mfib_entry_src_t *msrc)
 {
     return ((MFIB_ENTRY_FLAG_NONE == msrc->mfes_flags &&
-             0 == mfib_entry_src_n_itfs(msrc)));
+             0 == fib_path_list_get_n_paths(msrc->mfes_pl)));
 }
 
 int
 mfib_entry_update (fib_node_index_t mfib_entry_index,
                    mfib_source_t source,
                    mfib_entry_flags_t entry_flags,
+                   fib_rpf_id_t rpf_id,
                    index_t repi)
 {
     mfib_entry_t *mfib_entry;
@@ -697,6 +790,7 @@ mfib_entry_update (fib_node_index_t mfib_entry_index,
     mfib_entry = mfib_entry_get(mfib_entry_index);
     msrc = mfib_entry_src_find_or_create(mfib_entry, source);
     msrc->mfes_flags = entry_flags;
+    msrc->mfes_rpf_id = rpf_id;
 
     if (INDEX_INVALID != repi)
     {
@@ -768,55 +862,79 @@ mfib_entry_path_update (fib_node_index_t mfib_entry_index,
                         const fib_route_path_t *rpath,
                         mfib_itf_flags_t itf_flags)
 {
+    fib_node_index_t path_index;
+    mfib_path_ext_t *path_ext;
+    mfib_itf_flags_t old, new;
     mfib_entry_t *mfib_entry;
     mfib_entry_src_t *msrc;
-    mfib_itf_t *mfib_itf;
 
     mfib_entry = mfib_entry_get(mfib_entry_index);
     ASSERT(NULL != mfib_entry);
     msrc = mfib_entry_src_find_or_create(mfib_entry, source);
 
     /*
-     * search for the interface in the current set
+     * add the path to the path-list. If it's a duplicate we'll get
+     * back the original path.
+     */
+    path_index = mfib_entry_src_path_add(msrc, rpath);
+
+    /*
+     * find the path extension for that path
      */
-    mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs,
-                                   rpath[0].frp_sw_if_index);
+    path_ext = mfib_entry_path_ext_find(msrc->mfes_exts, path_index);
 
-    if (NULL == mfib_itf)
+    if (NULL == path_ext)
     {
-        /*
-         * this is a path we do not yet have. If it is forwarding then we
-         * add it to the replication set
-         */
-        if (itf_flags & MFIB_ITF_FLAG_FORWARD)
-        {
-            mfib_entry_forwarding_path_add(msrc, rpath);
-        }
-        /*
-         * construct a new ITF for this entry's list
-         */
-        mfib_entry_itf_add(msrc,
-                           rpath[0].frp_sw_if_index,
-                           mfib_itf_create(rpath[0].frp_sw_if_index,
-                                           itf_flags));
+        old = MFIB_ITF_FLAG_NONE;
+        path_ext = mfib_path_ext_add(msrc, path_index, itf_flags);
     }
     else
     {
-        int was_forwarding = !!(mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD);
-        int is_forwarding  = !!(itf_flags & MFIB_ITF_FLAG_FORWARD);
+        old = path_ext->mfpe_flags;
+        path_ext->mfpe_flags = itf_flags;
+    }
 
-        if (!was_forwarding && is_forwarding)
-        {
-            mfib_entry_forwarding_path_add(msrc, rpath);
-        }
-        else if (was_forwarding && !is_forwarding)
+    /*
+     * Has the path changed its contribution to the input interface set.
+     * Which only paths with interfaces can do...
+     */
+    if (~0 != rpath[0].frp_sw_if_index)
+    {
+        mfib_itf_t *mfib_itf;
+
+        new = itf_flags;
+
+        if (old != new)
         {
-            mfib_entry_forwarding_path_remove(msrc, rpath);
+            if (MFIB_ITF_FLAG_NONE == new)
+            {
+                /*
+                 * no more interface flags on this path, remove
+                 * from the data-plane set
+                 */
+                mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index);
+            }
+            else if (MFIB_ITF_FLAG_NONE == old)
+            {
+                /*
+                 * This interface is now contributing
+                 */
+                mfib_entry_itf_add(msrc,
+                                   rpath[0].frp_sw_if_index,
+                                   mfib_itf_create(rpath[0].frp_sw_if_index,
+                                                   itf_flags));
+            }
+            else
+            {
+                /*
+                 * change of flag contributions
+                 */
+                mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs,
+                                               rpath[0].frp_sw_if_index);
+                /* Seen by packets inflight */
+                mfib_itf->mfi_flags = new;
+            }
         }
-        /*
-         * packets in flight see these updates.
-         */
-        mfib_itf->mfi_flags = itf_flags;
     }
 
     mfib_entry_recalculate_forwarding(mfib_entry);
@@ -833,9 +951,9 @@ mfib_entry_path_remove (fib_node_index_t mfib_entry_index,
                         mfib_source_t source,
                         const fib_route_path_t *rpath)
 {
+    fib_node_index_t path_index;
     mfib_entry_t *mfib_entry;
     mfib_entry_src_t *msrc;
-    mfib_itf_t *mfib_itf;
 
     mfib_entry = mfib_entry_get(mfib_entry_index);
     ASSERT(NULL != mfib_entry);
@@ -850,33 +968,23 @@ mfib_entry_path_remove (fib_node_index_t mfib_entry_index,
     }
 
     /*
-     * search for the interface in the current set
+     * remove the path from the path-list. If it's not there we'll get
+     * back invalid
      */
-    mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs,
-                                   rpath[0].frp_sw_if_index);
+    path_index = mfib_entry_src_path_remove(msrc, rpath);
 
-    if (NULL == mfib_itf)
+    if (FIB_NODE_INDEX_INVALID != path_index)
     {
         /*
-         * removing a path that does not exist
+         * don't need the extension, nor the interface anymore
          */
-        return (mfib_entry_ok_for_delete(mfib_entry));
-    }
-
-    /*
-     * we have this path. If it is forwarding then we
-     * remove it to the replication set
-     */
-    if (mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD)
-    {
-        mfib_entry_forwarding_path_remove(msrc, rpath);
+        mfib_path_ext_remove(msrc, path_index);
+        if (~0 != rpath[0].frp_sw_if_index)
+        {
+            mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index);
+        }
     }
 
-    /*
-     * remove the interface/path from this entry's list
-     */
-    mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index);
-
     if (mfib_entry_src_ok_for_delete(msrc))
     {
         /*
@@ -1057,11 +1165,14 @@ mfib_entry_encode (fib_node_index_t mfib_entry_index,
                   fib_route_path_encode_t **api_rpaths)
 {
     mfib_entry_t *mfib_entry;
+    mfib_entry_src_t *bsrc;
 
     mfib_entry = mfib_entry_get(mfib_entry_index);
-    if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent)
+    bsrc = mfib_entry_get_best_src(mfib_entry);
+
+    if (FIB_NODE_INDEX_INVALID != bsrc->mfes_pl)
     {
-        fib_path_list_walk(mfib_entry->mfe_parent,
+        fib_path_list_walk(bsrc->mfes_pl,
                            fib_path_encode,
                            api_rpaths);
     }
diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h
index dc8f49aa..4f62b18e 100644
--- a/src/vnet/mfib/mfib_entry.h
+++ b/src/vnet/mfib/mfib_entry.h
@@ -42,17 +42,6 @@ typedef struct mfib_entry_t_ {
      * The index of the FIB table this entry is in
      */
     u32 mfe_fib_index;
-    /**
-     * the path-list for which this entry is a child. This is also the path-list
-     * that is contributing forwarding for this entry.
-     */
-    fib_node_index_t mfe_parent;
-    /**
-     * index of this entry in the parent's child list.
-     * This is set when this entry is added as a child, but can also
-     * be changed by the parent as it manages its list.
-     */
-    u32 mfe_sibling;
 
     /**
      * A vector of sources contributing forwarding
@@ -65,7 +54,7 @@ typedef struct mfib_entry_t_ {
     CLIB_CACHE_LINE_ALIGN_MARK(cacheline1);
 
     /**
-     * The Replicate DPO used for forwarding.
+     * The DPO used for forwarding; replicate, drop, etc..
      */
     dpo_id_t mfe_rep;
 
@@ -74,6 +63,11 @@ typedef struct mfib_entry_t_ {
      */
     mfib_entry_flags_t mfe_flags;
 
+    /**
+     * RPF-ID used when the packets ingress not from an interface
+     */
+    fib_rpf_id_t mfe_rpf_id;
+
     /**
      * A hash table of interfaces
      */
@@ -90,11 +84,13 @@ extern u8 *format_mfib_entry(u8 * s, va_list * args);
 extern fib_node_index_t mfib_entry_create(u32 fib_index,
                                           mfib_source_t source,
                                           const mfib_prefix_t *prefix,
+                                          fib_rpf_id_t rpf_id,
                                           mfib_entry_flags_t entry_flags);
 
 extern int mfib_entry_update(fib_node_index_t fib_entry_index,
                              mfib_source_t source,
                              mfib_entry_flags_t entry_flags,
+                             fib_rpf_id_t rpf_id,
                              index_t rep_dpo);
 
 extern void mfib_entry_path_update(fib_node_index_t fib_entry_index,
diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c
index 5fe0a57c..3d8f4f98 100644
--- a/src/vnet/mfib/mfib_forward.c
+++ b/src/vnet/mfib/mfib_forward.c
@@ -380,13 +380,27 @@ mfib_forward_rpf (vlib_main_t * vm,
              * for the case of throughput traffic that is not replicated
              * to the host stack nor sets local flags
              */
-            if (PREDICT_TRUE(NULL != mfi0))
+
+            /*
+             * If the mfib entry has a configured RPF-ID check that
+             * in preference to an interface based RPF
+             */
+            if (MFIB_RPF_ID_NONE != mfe0->mfe_rpf_id)
             {
-                iflags0 = mfi0->mfi_flags;
+                iflags0 = (mfe0->mfe_rpf_id == vnet_buffer(b0)->ip.rpf_id ?
+                           MFIB_ITF_FLAG_ACCEPT :
+                           MFIB_ITF_FLAG_NONE);
             }
             else
             {
-                iflags0 = MFIB_ITF_FLAG_NONE;
+                if (PREDICT_TRUE(NULL != mfi0))
+                {
+                    iflags0 = mfi0->mfi_flags;
+                }
+                else
+                {
+                    iflags0 = MFIB_ITF_FLAG_NONE;
+                }
             }
             eflags0 = mfe0->mfe_flags;
 
@@ -436,17 +450,16 @@ mfib_forward_rpf (vlib_main_t * vm,
             {
                 mfib_forward_rpf_trace_t *t0;
 
-                t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+                t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
                 t0->entry_index = mfei0;
+                t0->itf_flags = iflags0;
                 if (NULL == mfi0)
                 {
                     t0->sw_if_index = ~0;
-                    t0->itf_flags = MFIB_ITF_FLAG_NONE;
                 }
                 else
                 {
                     t0->sw_if_index = mfi0->mfi_sw_if_index;
-                    t0->itf_flags = mfi0->mfi_flags;
                 }
             }
             vlib_validate_buffer_enqueue_x1 (vm, node, next,
@@ -478,7 +491,7 @@ VLIB_REGISTER_NODE (ip4_mfib_forward_rpf_node, static) = {
 
     .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT,
     .next_nodes = {
-        [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop",
+        [MFIB_FORWARD_RPF_NEXT_DROP] = "ip4-drop",
     },
 };
 
@@ -503,7 +516,7 @@ VLIB_REGISTER_NODE (ip6_mfib_forward_rpf_node, static) = {
 
     .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT,
     .next_nodes = {
-        [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop",
+        [MFIB_FORWARD_RPF_NEXT_DROP] = "ip6-drop",
     },
 };
 
diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c
index 3b4bd985..7ffe8941 100644
--- a/src/vnet/mfib/mfib_table.c
+++ b/src/vnet/mfib/mfib_table.c
@@ -165,6 +165,7 @@ fib_node_index_t
 mfib_table_entry_update (u32 fib_index,
                          const mfib_prefix_t *prefix,
                          mfib_source_t source,
+                         fib_rpf_id_t rpf_id,
                          mfib_entry_flags_t entry_flags)
 {
     fib_node_index_t mfib_entry_index;
@@ -181,7 +182,8 @@ mfib_table_entry_update (u32 fib_index,
              * update to a non-existing entry with non-zero flags
              */
             mfib_entry_index = mfib_entry_create(fib_index, source,
-                                                 prefix, entry_flags);
+                                                 prefix, rpf_id,
+                                                 entry_flags);
 
             mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index);
         }
@@ -198,6 +200,7 @@ mfib_table_entry_update (u32 fib_index,
         if (mfib_entry_update(mfib_entry_index,
                               source,
                               entry_flags,
+                              rpf_id,
                               INDEX_INVALID))
         {
             /*
@@ -230,6 +233,7 @@ mfib_table_entry_path_update (u32 fib_index,
         mfib_entry_index = mfib_entry_create(fib_index,
                                              source,
                                              prefix,
+                                             MFIB_RPF_ID_NONE,
                                              MFIB_ENTRY_FLAG_NONE);
 
         mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index);
@@ -304,6 +308,7 @@ mfib_table_entry_special_add (u32 fib_index,
         mfib_entry_index = mfib_entry_create(fib_index,
                                              source,
                                              prefix,
+                                             MFIB_RPF_ID_NONE,
                                              MFIB_ENTRY_FLAG_NONE);
 
         mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index);
@@ -311,6 +316,7 @@ mfib_table_entry_special_add (u32 fib_index,
 
     mfib_entry_update(mfib_entry_index, source,
                       (MFIB_ENTRY_FLAG_EXCLUSIVE | entry_flags),
+                      MFIB_RPF_ID_NONE,
                       rep_dpo);
 
     return (mfib_entry_index);
diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h
index 95239f7c..83aa04ef 100644
--- a/src/vnet/mfib/mfib_table.h
+++ b/src/vnet/mfib/mfib_table.h
@@ -122,6 +122,7 @@ extern fib_node_index_t mfib_table_lookup_exact_match(u32 fib_index,
 extern fib_node_index_t mfib_table_entry_update(u32 fib_index,
                                                 const mfib_prefix_t *prefix,
                                                 mfib_source_t source,
+                                                fib_rpf_id_t rpf_id,
                                                 mfib_entry_flags_t flags);
 
 /**
diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c
index 36a303e8..7c92ae99 100644
--- a/src/vnet/mfib/mfib_test.c
+++ b/src/vnet/mfib/mfib_test.c
@@ -20,6 +20,8 @@
 #include <vnet/mfib/mfib_signal.h>
 #include <vnet/mfib/ip6_mfib.h>
 #include <vnet/fib/fib_path_list.h>
+#include <vnet/fib/fib_test.h>
+#include <vnet/fib/fib_table.h>
 
 #include <vnet/dpo/replicate_dpo.h>
 #include <vnet/adj/adj_mcast.h>
@@ -201,8 +203,8 @@ mfib_test_validate_rep_v (const replicate_t *rep,
         if (DPO_RECEIVE != dt)
         {
             MFIB_TEST_REP((ai == dpo->dpoi_index),
-                          "bucket %d stacks on %U",
-                          bucket,
+                          "bucket %d [exp:%d] stacks on %U",
+                          bucket, ai,
                           format_dpo_id, dpo, 0);
         }
     }
@@ -734,6 +736,7 @@ mfib_test_i (fib_protocol_t PROTO,
     mfib_table_entry_update(fib_index,
                             pfx_s_g,
                             MFIB_SOURCE_API,
+                            MFIB_RPF_ID_NONE,
                             MFIB_ENTRY_FLAG_SIGNAL);
     MFIB_TEST(mfib_test_entry(mfei,
                               MFIB_ENTRY_FLAG_SIGNAL,
@@ -824,6 +827,7 @@ mfib_test_i (fib_protocol_t PROTO,
     mfib_table_entry_update(fib_index,
                             pfx_s_g,
                             MFIB_SOURCE_API,
+                            MFIB_RPF_ID_NONE,
                             (MFIB_ENTRY_FLAG_SIGNAL |
                              MFIB_ENTRY_FLAG_CONNECTED));
     MFIB_TEST(mfib_test_entry(mfei,
@@ -965,6 +969,7 @@ mfib_test_i (fib_protocol_t PROTO,
     mfib_table_entry_update(fib_index,
                             pfx_s_g,
                             MFIB_SOURCE_API,
+                            MFIB_RPF_ID_NONE,
                             MFIB_ENTRY_FLAG_NONE);
     mfei = mfib_table_lookup_exact_match(fib_index,
                                          pfx_s_g);
@@ -1073,6 +1078,117 @@ mfib_test_i (fib_protocol_t PROTO,
                             MFIB_SOURCE_SRv6);
     dpo_reset(&td);
 
+    /*
+     * A Multicast LSP. This a mLDP head-end
+     */
+    fib_node_index_t ai_mpls_10_10_10_1, lfei;
+    ip46_address_t nh_10_10_10_1 = {
+	.ip4 = {
+	    .as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+	},
+    };
+    ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+                                             VNET_LINK_MPLS,
+                                             &nh_10_10_10_1,
+                                             tm->hw[0]->sw_if_index);
+
+    fib_prefix_t pfx_3500 = {
+	.fp_len = 21,
+	.fp_proto = FIB_PROTOCOL_MPLS,
+	.fp_label = 3500,
+	.fp_eos = MPLS_EOS,
+	.fp_payload_proto = DPO_PROTO_IP4,
+    };
+    fib_test_rep_bucket_t mc_0 = {
+        .type = FT_REP_LABEL_O_ADJ,
+	.label_o_adj = {
+	    .adj = ai_mpls_10_10_10_1,
+	    .label = 3300,
+	    .eos = MPLS_EOS,
+	},
+    };
+    mpls_label_t *l3300 = NULL;
+    vec_add1(l3300, 3300);
+
+    /*
+     * MPLS enable an interface so we get the MPLS table created
+     */
+    mpls_sw_interface_enable_disable(&mpls_main,
+                                     tm->hw[0]->sw_if_index,
+                                     1);
+
+    lfei = fib_table_entry_update_one_path(0, // default MPLS Table
+                                           &pfx_3500,
+                                           FIB_SOURCE_API,
+                                           FIB_ENTRY_FLAG_MULTICAST,
+                                           FIB_PROTOCOL_IP4,
+                                           &nh_10_10_10_1,
+                                           tm->hw[0]->sw_if_index,
+                                           ~0, // invalid fib index
+                                           1,
+                                           l3300,
+                                           FIB_ROUTE_PATH_FLAG_NONE);
+    MFIB_TEST(fib_test_validate_entry(lfei,
+                                      FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+                                      1,
+                                      &mc_0),
+              "3500 via replicate over 10.10.10.1");
+
+    /*
+     * An (S,G) that resolves via the mLDP head-end
+     */
+    fib_route_path_t path_via_mldp = {
+        .frp_proto = FIB_PROTOCOL_MPLS,
+        .frp_local_label = pfx_3500.fp_label,
+        .frp_eos = MPLS_EOS,
+        .frp_sw_if_index = 0xffffffff,
+        .frp_fib_index = 0,
+        .frp_weight = 1,
+        .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+    };
+    dpo_id_t mldp_dpo = DPO_INVALID;
+
+    fib_entry_contribute_forwarding(lfei,
+                                    FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+                                    &mldp_dpo);
+
+    mfei = mfib_table_entry_path_update(fib_index,
+                                        pfx_s_g,
+                                        MFIB_SOURCE_API,
+                                        &path_via_mldp,
+                                        MFIB_ITF_FLAG_FORWARD);
+
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              1,
+                              DPO_REPLICATE, mldp_dpo.dpoi_index),
+              "%U over-mLDP replicate OK",
+              format_mfib_prefix, pfx_s_g);
+
+    /*
+     * add a for-us path. this tests two types of non-attached paths on one entry
+     */
+    mfei = mfib_table_entry_path_update(fib_index,
+                                        pfx_s_g,
+                                        MFIB_SOURCE_API,
+                                        &path_for_us,
+                                        MFIB_ITF_FLAG_FORWARD);
+    MFIB_TEST(mfib_test_entry(mfei,
+                              MFIB_ENTRY_FLAG_NONE,
+                              2,
+                              DPO_REPLICATE, mldp_dpo.dpoi_index,
+                              DPO_RECEIVE, 0),
+              "%U mLDP+for-us replicate OK",
+              format_mfib_prefix, pfx_s_g);
+
+    mfib_table_entry_delete(fib_index,
+                            pfx_s_g,
+                            MFIB_SOURCE_API);
+    fib_table_entry_delete(0,
+                           &pfx_3500,
+                           FIB_SOURCE_API);
+    dpo_reset(&mldp_dpo);
+
     /*
      * Unlock the table - it's the last lock so should be gone thereafter
      */
@@ -1086,6 +1202,13 @@ mfib_test_i (fib_protocol_t PROTO,
     adj_unlock(ai_2);
     adj_unlock(ai_3);
 
+    /*
+     * MPLS disable the interface
+     */
+    mpls_sw_interface_enable_disable(&mpls_main,
+                                     tm->hw[0]->sw_if_index,
+                                     0);
+
     /*
      * test we've leaked no resources
      */
diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api
index 2e3bfaf5..a1e1270a 100644
--- a/src/vnet/mpls/mpls.api
+++ b/src/vnet/mpls/mpls.api
@@ -55,6 +55,7 @@ define mpls_ip_bind_unbind_reply
     @param context - sender context, to match reply w/ request
     @param mt_is_add - Is this a route add or delete
     @param mt_sw_if_index - The SW interface index of the tunnel to delete
+    @param mt_is_multicast - Is the tunnel's underlying LSP multicast
     @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4
     @param mt_next_hop_weight - The weight, for UCMP
     @param mt_next_hop[16] - the nextop address
@@ -70,6 +71,7 @@ define mpls_tunnel_add_del
   u32 mt_sw_if_index;
   u8 mt_is_add;
   u8 mt_l2_only;
+  u8 mt_is_multicast;
   u8 mt_next_hop_proto_is_ip4;
   u8 mt_next_hop_weight;
   u8 mt_next_hop[16];
@@ -102,30 +104,43 @@ define mpls_tunnel_dump
   i32 tunnel_index;
 };
 
-/** \brief mpls eth tunnel operational state response
-    @param tunnel_index - eth tunnel identifier
-    @param intfc_address - interface ipv4 addr
-    @param mask_width - interface ipv4 addr mask
-    @param hw_if_index - interface id
-    @param l2_only -
-    @param tunnel_dst_mac -
-    @param tx_sw_if_index -
-    @param encap_index - reference to mpls label table
-    @param nlabels - number of resolved labels
-    @param labels - resolved labels
+/** \brief FIB path
+    @param sw_if_index - index of the interface
+    @param weight - The weight, for UCMP
+    @param is_local - local if non-zero, else remote
+    @param is_drop - Drop the packet
+    @param is_unreach - Drop the packet and rate limit send ICMP unreachable
+    @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
+    @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2
+    @param next_hop[16] - the next hop address
+
+    WARNING: this type is replicated, pending cleanup completion
+
+*/
+typeonly manual_print manual_endian define fib_path2
+{
+  u32 sw_if_index;
+  u32 weight;
+  u8 is_local;
+  u8 is_drop;
+  u8 is_unreach;
+  u8 is_prohibit;
+  u8 afi;
+  u8 next_hop[16];
+  u32 labels[16];
+};
+
+/** \brief mpls tunnel details
 */
-define mpls_tunnel_details
+manual_endian manual_print define mpls_tunnel_details
 {
   u32 context;
-  u32 tunnel_index;
-  u8 mt_l2_only;
   u8 mt_sw_if_index;
-  u8 mt_next_hop_proto_is_ip4;
-  u8 mt_next_hop[16];
-  u32 mt_next_hop_sw_if_index;
-  u32 mt_next_hop_table_id;
-  u32 mt_next_hop_n_labels;
-  u32 mt_next_hop_out_labels[mt_next_hop_n_labels];
+  u8 mt_tunnel_index;
+  u8 mt_l2_only;
+  u8 mt_is_multicast;
+  u32 mt_count;
+  vl_api_fib_path2_t mt_paths[mt_count];
 };
 
 /** \brief MPLS Route Add / del route
@@ -140,10 +155,14 @@ define mpls_tunnel_details
                                         create them
     @param mr_is_add - Is this a route add or delete
     @param mr_is_classify - Is this route result a classify
+    @param mr_is_multicast - Is this a multicast route
     @param mr_is_multipath - Is this route update a multipath - i.e. is this
                              a path addition to an existing route
     @param mr_is_resolve_host - Recurse resolution constraint via a host prefix
     @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix
+    @param mr_is_interface_rx - Interface Receive path
+    @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface
+                                is used as the RPF-ID
     @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4
     @param mr_next_hop_weight - The weight, for UCMP
     @param mr_next_hop[16] - the nextop address
@@ -164,9 +183,12 @@ define mpls_route_add_del
   u8 mr_create_table_if_needed;
   u8 mr_is_add;
   u8 mr_is_classify;
+  u8 mr_is_multicast;
   u8 mr_is_multipath;
   u8 mr_is_resolve_host;
   u8 mr_is_resolve_attached;
+  u8 mr_is_interface_rx;
+  u8 mr_is_rpf_id;
   u8 mr_next_hop_proto_is_ip4;
   u8 mr_next_hop_weight;
   u8 mr_next_hop[16];
@@ -187,31 +209,6 @@ define mpls_route_add_del_reply
   i32 retval;
 };
 
-/** \brief FIB path
-    @param sw_if_index - index of the interface
-    @param weight - The weight, for UCMP
-    @param is_local - local if non-zero, else remote
-    @param is_drop - Drop the packet
-    @param is_unreach - Drop the packet and rate limit send ICMP unreachable
-    @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
-    @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2
-    @param next_hop[16] - the next hop address
-
-    WARNING: this type is replicated, pending cleanup completion
-
-*/
-typeonly manual_print manual_endian define fib_path2
-{
-  u32 sw_if_index;
-  u32 weight;
-  u8 is_local;
-  u8 is_drop;
-  u8 is_unreach;
-  u8 is_prohibit;
-  u8 afi;
-  u8 next_hop[16];
-};
-
 /** \brief Dump MPLS fib table
     @param client_index - opaque cookie to identify the sender
 */
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
index 482577b1..451b15cf 100644
--- a/src/vnet/mpls/mpls.c
+++ b/src/vnet/mpls/mpls.c
@@ -286,7 +286,15 @@ vnet_mpls_local_label (vlib_main_t * vm,
 	  rpath.frp_proto = FIB_PROTOCOL_IP4;
 	  vec_add1(rpaths, rpath);
       }
-			 
+      else if (unformat (line_input, "rx-ip4 %U",
+			 unformat_vnet_sw_interface, vnm,
+			 &rpath.frp_sw_if_index))
+      {
+	  rpath.frp_weight = 1;
+	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+          rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX;
+	  vec_add1(rpaths, rpath);
+      }
       else if (unformat (line_input, "via %U %U",
 			 unformat_ip6_address,
  			 &rpath.frp_addr.ip6,
@@ -512,10 +520,3 @@ mpls_init (vlib_main_t * vm)
 }
 
 VLIB_INIT_FUNCTION (mpls_init);
-
-mpls_main_t * mpls_get_main (vlib_main_t * vm)
-{
-  vlib_call_init_function (vm, mpls_init);
-  return &mpls_main;
-}
-
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
index f1aef6c9..6bfc491d 100644
--- a/src/vnet/mpls/mpls_api.c
+++ b/src/vnet/mpls/mpls_api.c
@@ -27,6 +27,7 @@
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/fib_api.h>
 #include <vnet/fib/mpls_fib.h>
+#include <vnet/fib/fib_path_list.h>
 
 #include <vnet/vnet_msg_enum.h>
 
@@ -163,6 +164,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
 			    dpo_proto_to_fib (pfx.fp_payload_proto),
 			    mp->mr_next_hop_table_id,
 			    mp->mr_create_table_if_needed,
+			    mp->mr_is_rpf_id,
 			    &fib_index, &next_hop_fib_index);
 
   if (0 != rv)
@@ -192,10 +194,13 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
 				   0,	// mp->is_unreach,
 				   0,	// mp->is_prohibit,
 				   0,	// mp->is_local,
+				   mp->mr_is_multicast,
 				   mp->mr_is_classify,
 				   mp->mr_classify_table_index,
 				   mp->mr_is_resolve_host,
 				   mp->mr_is_resolve_attached,
+				   mp->mr_is_interface_rx,
+				   mp->mr_is_rpf_id,
 				   fib_index, &pfx,
 				   mp->mr_next_hop_proto_is_ip4,
 				   &nh, ntohl (mp->mr_next_hop_sw_if_index),
@@ -229,46 +234,54 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp)
   int rv = 0;
   u32 tunnel_sw_if_index;
   int ii;
+  fib_route_path_t rpath, *rpaths = NULL;
+
+  memset (&rpath, 0, sizeof (rpath));
 
   stats_dslock_with_hint (1 /* release hint */ , 5 /* tag */ );
 
-  if (mp->mt_is_add)
+  if (mp->mt_next_hop_proto_is_ip4)
     {
-      fib_route_path_t rpath, *rpaths = NULL;
-      mpls_label_t *label_stack = NULL;
-
-      memset (&rpath, 0, sizeof (rpath));
-
-      if (mp->mt_next_hop_proto_is_ip4)
-	{
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
-	  clib_memcpy (&rpath.frp_addr.ip4,
-		       mp->mt_next_hop, sizeof (rpath.frp_addr.ip4));
-	}
-      else
-	{
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
-	  clib_memcpy (&rpath.frp_addr.ip6,
-		       mp->mt_next_hop, sizeof (rpath.frp_addr.ip6));
-	}
-      rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index);
+      rpath.frp_proto = FIB_PROTOCOL_IP4;
+      clib_memcpy (&rpath.frp_addr.ip4,
+		   mp->mt_next_hop, sizeof (rpath.frp_addr.ip4));
+    }
+  else
+    {
+      rpath.frp_proto = FIB_PROTOCOL_IP6;
+      clib_memcpy (&rpath.frp_addr.ip6,
+		   mp->mt_next_hop, sizeof (rpath.frp_addr.ip6));
+    }
+  rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index);
+  rpath.frp_weight = 1;
 
+  if (mp->mt_is_add)
+    {
       for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++)
-	vec_add1 (label_stack, ntohl (mp->mt_next_hop_out_label_stack[ii]));
+	vec_add1 (rpath.frp_label_stack,
+		  ntohl (mp->mt_next_hop_out_label_stack[ii]));
+    }
 
-      vec_add1 (rpaths, rpath);
+  vec_add1 (rpaths, rpath);
 
-      vnet_mpls_tunnel_add (rpaths, label_stack,
-			    mp->mt_l2_only, &tunnel_sw_if_index);
-      vec_free (rpaths);
-      vec_free (label_stack);
+  tunnel_sw_if_index = ntohl (mp->mt_sw_if_index);
+
+  if (mp->mt_is_add)
+    {
+      if (~0 == tunnel_sw_if_index)
+	tunnel_sw_if_index = vnet_mpls_tunnel_create (mp->mt_l2_only,
+						      mp->mt_is_multicast);
+      vnet_mpls_tunnel_path_add (tunnel_sw_if_index, rpaths);
     }
   else
     {
       tunnel_sw_if_index = ntohl (mp->mt_sw_if_index);
-      vnet_mpls_tunnel_del (tunnel_sw_if_index);
+      if (!vnet_mpls_tunnel_path_remove (tunnel_sw_if_index, rpaths))
+	vnet_mpls_tunnel_del (tunnel_sw_if_index);
     }
 
+  vec_free (rpaths);
+
   stats_dsunlock ();
 
   /* *INDENT-OFF* */
@@ -289,10 +302,12 @@ typedef struct mpls_tunnel_send_walk_ctx_t_
 static void
 send_mpls_tunnel_entry (u32 mti, void *arg)
 {
+  fib_route_path_encode_t *api_rpaths, *api_rpath;
   mpls_tunnel_send_walk_ctx_t *ctx;
   vl_api_mpls_tunnel_details_t *mp;
   const mpls_tunnel_t *mt;
-  u32 nlabels;
+  vl_api_fib_path2_t *fp;
+  u32 n;
 
   ctx = arg;
 
@@ -300,18 +315,34 @@ send_mpls_tunnel_entry (u32 mti, void *arg)
     return;
 
   mt = mpls_tunnel_get (mti);
-  nlabels = vec_len (mt->mt_label_stack);
+  n = fib_path_list_get_n_paths (mt->mt_path_list);
+
+  mp = vl_msg_api_alloc (sizeof (*mp) + n * sizeof (vl_api_fib_path2_t));
+  memset (mp, 0, sizeof (*mp) + n * sizeof (vl_api_fib_path2_t));
 
-  mp = vl_msg_api_alloc (sizeof (*mp) + nlabels * sizeof (u32));
-  memset (mp, 0, sizeof (*mp));
   mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS);
   mp->context = ctx->context;
 
-  mp->tunnel_index = ntohl (mti);
-  memcpy (mp->mt_next_hop_out_labels,
-	  mt->mt_label_stack, nlabels * sizeof (u32));
+  mp->mt_tunnel_index = ntohl (mti);
+  mp->mt_count = ntohl (n);
+
+  fib_path_list_walk (mt->mt_path_list, fib_path_encode, &api_rpaths);
+
+  fp = mp->mt_paths;
+  vec_foreach (api_rpath, api_rpaths)
+  {
+    memset (fp, 0, sizeof (*fp));
+
+    fp->weight = htonl (api_rpath->rpath.frp_weight);
+    fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+    copy_fib_next_hop (api_rpath, fp);
+    fp++;
+  }
 
   // FIXME
+  // memcpy (mp->mt_next_hop_out_labels,
+  //   mt->mt_label_stack, nlabels * sizeof (u32));
+
 
   vl_msg_api_send_shmem (ctx->q, (u8 *) & mp);
 }
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
index 1b9bdd05..86ad8bba 100644
--- a/src/vnet/mpls/mpls_input.c
+++ b/src/vnet/mpls/mpls_input.c
@@ -291,7 +291,7 @@ mpls_setup_nodes (vlib_main_t * vm)
   rt->last_outer_fib_index = 0;
   rt->mpls_main = &mpls_main;
 
-  ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST,
+  ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS,
                                 mpls_input_node.index);
 }
 
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index ace6a70f..3c6be7e8 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -20,8 +20,17 @@
 #include <vnet/mpls/mpls.h>
 #include <vnet/fib/mpls_fib.h>
 #include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/replicate_dpo.h>
 
-vlib_node_registration_t mpls_lookup_node;
+/**
+ * Static MPLS VLIB forwarding node
+ */
+static vlib_node_registration_t mpls_lookup_node;
+
+/**
+ * The arc/edge from the MPLS lookup node to the MPLS replicate node
+ */
+static u32 mpls_lookup_to_replicate_edge;
 
 typedef struct {
   u32 next_index;
@@ -156,81 +165,123 @@ mpls_lookup (vlib_main_t * vm,
           lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2);
           lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3);
 
-          lb0 = load_balance_get(lbi0);
-          lb1 = load_balance_get(lbi1);
-          lb2 = load_balance_get(lbi2);
-          lb3 = load_balance_get(lbi3);
-
           hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
           hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0;
           hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0;
           hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0;
 
-          if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+          if (MPLS_IS_REPLICATE & lbi0)
           {
-              hash_c0 = vnet_buffer (b0)->ip.flow_hash =
-                  mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+              next0 = mpls_lookup_to_replicate_edge;
+              vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+                  (lbi0 & ~MPLS_IS_REPLICATE);
           }
-          if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
+          else
           {
-              hash_c1 = vnet_buffer (b1)->ip.flow_hash =
-                  mpls_compute_flow_hash(h1, lb1->lb_hash_config);
+              lb0 = load_balance_get(lbi0);
+
+              if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+              {
+                  hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+                      mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+              }
+              ASSERT (lb0->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb0->lb_n_buckets));
+              dpo0 = load_balance_get_bucket_i(lb0,
+                                               (hash_c0 &
+                                                (lb0->lb_n_buckets_minus_1)));
+              next0 = dpo0->dpoi_next_node;
+
+              vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+              vlib_increment_combined_counter
+                  (cm, thread_index, lbi0, 1,
+                   vlib_buffer_length_in_chain (vm, b0));
           }
-          if (PREDICT_FALSE(lb2->lb_n_buckets > 1))
+          if (MPLS_IS_REPLICATE & lbi1)
           {
-              hash_c2 = vnet_buffer (b2)->ip.flow_hash =
-                  mpls_compute_flow_hash(h2, lb2->lb_hash_config);
+              next1 = mpls_lookup_to_replicate_edge;
+              vnet_buffer (b1)->ip.adj_index[VLIB_TX] =
+                  (lbi1 & ~MPLS_IS_REPLICATE);
           }
-          if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
+          else
           {
-              hash_c3 = vnet_buffer (b3)->ip.flow_hash =
-                  mpls_compute_flow_hash(h3, lb3->lb_hash_config);
-          }
-
-          ASSERT (lb0->lb_n_buckets > 0);
-          ASSERT (is_pow2 (lb0->lb_n_buckets));
-          ASSERT (lb1->lb_n_buckets > 0);
-          ASSERT (is_pow2 (lb1->lb_n_buckets));
-          ASSERT (lb2->lb_n_buckets > 0);
-          ASSERT (is_pow2 (lb2->lb_n_buckets));
-          ASSERT (lb3->lb_n_buckets > 0);
-          ASSERT (is_pow2 (lb3->lb_n_buckets));
-
-          dpo0 = load_balance_get_bucket_i(lb0,
-                                           (hash_c0 &
-                                            (lb0->lb_n_buckets_minus_1)));
-          dpo1 = load_balance_get_bucket_i(lb1,
-                                           (hash_c1 &
-                                            (lb1->lb_n_buckets_minus_1)));
-          dpo2 = load_balance_get_bucket_i(lb2,
-                                           (hash_c2 &
-                                            (lb2->lb_n_buckets_minus_1)));
-          dpo3 = load_balance_get_bucket_i(lb3,
-                                           (hash_c3 &
-                                            (lb3->lb_n_buckets_minus_1)));
+              lb1 = load_balance_get(lbi1);
 
-          next0 = dpo0->dpoi_next_node;
-          next1 = dpo1->dpoi_next_node;
-          next2 = dpo2->dpoi_next_node;
-          next3 = dpo3->dpoi_next_node;
+              if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
+              {
+                  hash_c1 = vnet_buffer (b1)->ip.flow_hash =
+                      mpls_compute_flow_hash(h1, lb1->lb_hash_config);
+              }
+              ASSERT (lb1->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb1->lb_n_buckets));
+              dpo1 = load_balance_get_bucket_i(lb1,
+                                               (hash_c1 &
+                                                (lb1->lb_n_buckets_minus_1)));
+              next1 = dpo1->dpoi_next_node;
+
+              vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+              vlib_increment_combined_counter
+                  (cm, thread_index, lbi1, 1,
+                   vlib_buffer_length_in_chain (vm, b1));
+          }
+          if (MPLS_IS_REPLICATE & lbi2)
+          {
+              next2 = mpls_lookup_to_replicate_edge;
+              vnet_buffer (b2)->ip.adj_index[VLIB_TX] =
+                  (lbi2 & ~MPLS_IS_REPLICATE);
+          }
+          else
+          {
+              lb2 = load_balance_get(lbi2);
 
-          vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-          vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-          vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
-          vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+              if (PREDICT_FALSE(lb2->lb_n_buckets > 1))
+              {
+                  hash_c2 = vnet_buffer (b2)->ip.flow_hash =
+                      mpls_compute_flow_hash(h2, lb2->lb_hash_config);
+              }
+              ASSERT (lb2->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb2->lb_n_buckets));
+              dpo2 = load_balance_get_bucket_i(lb2,
+                                               (hash_c2 &
+                                                (lb2->lb_n_buckets_minus_1)));
+              next2 = dpo2->dpoi_next_node;
+
+              vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+
+              vlib_increment_combined_counter
+                  (cm, thread_index, lbi2, 1,
+                   vlib_buffer_length_in_chain (vm, b2));
+          }
+          if (MPLS_IS_REPLICATE & lbi3)
+          {
+              next3 = mpls_lookup_to_replicate_edge;
+              vnet_buffer (b3)->ip.adj_index[VLIB_TX] =
+                  (lbi3 & ~MPLS_IS_REPLICATE);
+          }
+          else
+          {
+              lb3 = load_balance_get(lbi3);
 
-          vlib_increment_combined_counter
-              (cm, thread_index, lbi0, 1,
-               vlib_buffer_length_in_chain (vm, b0));
-          vlib_increment_combined_counter
-              (cm, thread_index, lbi1, 1,
-               vlib_buffer_length_in_chain (vm, b1));
-          vlib_increment_combined_counter
-              (cm, thread_index, lbi2, 1,
-               vlib_buffer_length_in_chain (vm, b2));
-          vlib_increment_combined_counter
-              (cm, thread_index, lbi3, 1,
-               vlib_buffer_length_in_chain (vm, b3));
+              if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
+              {
+                  hash_c3 = vnet_buffer (b3)->ip.flow_hash =
+                      mpls_compute_flow_hash(h3, lb3->lb_hash_config);
+              }
+              ASSERT (lb3->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb3->lb_n_buckets));
+              dpo3 = load_balance_get_bucket_i(lb3,
+                                               (hash_c3 &
+                                                (lb3->lb_n_buckets_minus_1)));
+              next3 = dpo3->dpoi_next_node;
+
+              vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+
+              vlib_increment_combined_counter
+                  (cm, thread_index, lbi3, 1,
+                   vlib_buffer_length_in_chain (vm, b3));
+          }
 
           /*
            * before we pop the label copy th values we need to maintain.
@@ -331,31 +382,41 @@ mpls_lookup (vlib_main_t * vm,
                                 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
 
           lbi0 = mpls_fib_table_forwarding_lookup(lfib_index0, h0);
-	  lb0 = load_balance_get(lbi0);
-
           hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
-          if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+
+          if (MPLS_IS_REPLICATE & lbi0)
           {
-              hash_c0 = vnet_buffer (b0)->ip.flow_hash =
-                  mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+              next0 = mpls_lookup_to_replicate_edge;
+              vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+                  (lbi0 & ~MPLS_IS_REPLICATE);
           }
+          else
+          {
+              lb0 = load_balance_get(lbi0);
 
-          ASSERT (lb0->lb_n_buckets > 0);
-          ASSERT (is_pow2 (lb0->lb_n_buckets));
+              if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+              {
+                  hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+                      mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+              }
 
-          dpo0 = load_balance_get_bucket_i(lb0,
-                                           (hash_c0 &
-                                            (lb0->lb_n_buckets_minus_1)));
+              ASSERT (lb0->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb0->lb_n_buckets));
 
-          next0 = dpo0->dpoi_next_node;
-          vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+              dpo0 = load_balance_get_bucket_i(lb0,
+                                               (hash_c0 &
+                                                (lb0->lb_n_buckets_minus_1)));
 
-          vlib_increment_combined_counter
-              (cm, thread_index, lbi0, 1,
-               vlib_buffer_length_in_chain (vm, b0));
+              next0 = dpo0->dpoi_next_node;
+              vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+              vlib_increment_combined_counter
+                  (cm, thread_index, lbi0, 1,
+                   vlib_buffer_length_in_chain (vm, b0));
+          }
 
           /*
-           * before we pop the label copy th values we need to maintain.
+           * before we pop the label copy, values we need to maintain.
            * The label header is in network byte order.
            *  last byte is the TTL.
            *  bits 2 to 4 inclusive are the EXP bits
@@ -398,7 +459,7 @@ static char * mpls_error_strings[] = {
 #undef mpls_error
 };
 
-VLIB_REGISTER_NODE (mpls_lookup_node) = {
+VLIB_REGISTER_NODE (mpls_lookup_node, static) = {
   .function = mpls_lookup,
   .name = "mpls-lookup",
   /* Takes a vector of packets. */
@@ -621,3 +682,22 @@ VLIB_REGISTER_NODE (mpls_load_balance_node) = {
 };
 
 VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance)
+
+
+static clib_error_t *
+mpls_lookup_init (vlib_main_t * vm)
+{
+  clib_error_t * error;
+
+  if ((error = vlib_call_init_function (vm, mpls_init)))
+    return error;
+
+  mpls_lookup_to_replicate_edge =
+      vlib_node_add_named_next(vm,
+                               mpls_lookup_node.index,
+                               "mpls-replicate");
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (mpls_lookup_init);
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
index ac6fdcdf..1254dd9d 100644
--- a/src/vnet/mpls/mpls_tunnel.c
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -18,9 +18,12 @@
 #include <vnet/vnet.h>
 #include <vnet/pg/pg.h>
 #include <vnet/mpls/mpls_tunnel.h>
+#include <vnet/mpls/mpls_types.h>
 #include <vnet/ip/ip.h>
 #include <vnet/fib/fib_path_list.h>
 #include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/dpo/replicate_dpo.h>
 
 /**
  * @brief pool of tunnel instances
@@ -37,6 +40,11 @@ static u32 * mpls_tunnel_free_hw_if_indices;
  */
 static u32 *mpls_tunnel_db;
 
+/**
+ * @brief MPLS tunnel flags strings
+ */
+static const char *mpls_tunnel_attribute_names[] = MPLS_TUNNEL_ATTRIBUTES;
+
 /**
  * @brief Get a tunnel object from a SW interface index
  */
@@ -44,103 +52,178 @@ static mpls_tunnel_t*
 mpls_tunnel_get_from_sw_if_index (u32 sw_if_index)
 {
     if ((vec_len(mpls_tunnel_db) < sw_if_index) ||
-	(~0 == mpls_tunnel_db[sw_if_index]))
-	return (NULL);
+        (~0 == mpls_tunnel_db[sw_if_index]))
+        return (NULL);
 
     return (pool_elt_at_index(mpls_tunnel_pool,
-			      mpls_tunnel_db[sw_if_index]));
+                              mpls_tunnel_db[sw_if_index]));
 }
 
 /**
- * @brief Return true if the label stack is imp-null only
+ * @brief Build a rewrite string for the MPLS tunnel.
  */
-static fib_forward_chain_type_t
-mpls_tunnel_get_fwd_chain_type (const mpls_tunnel_t *mt)
+static u8*
+mpls_tunnel_build_rewrite_i (void)
 {
-    if ((1 == vec_len(mt->mt_label_stack)) &&
-	(mt->mt_label_stack[0] == MPLS_IETF_IMPLICIT_NULL_LABEL))
-    {
-	/*
-	 * the only label in the label stack is implicit null
-	 * we need to build an IP chain.
-	 */
-	if (FIB_PROTOCOL_IP4 == fib_path_list_get_proto(mt->mt_path_list))
-	{
-	    return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
-	}
-	else
-	{
-	    return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
-	}
-    }
-    else
-    {
-	return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
-    }
+    /*
+     * passing the adj code a NULL rewirte means 'i don't have one cos
+     * t'other end is unresolved'. That's not the case here. For the mpls
+     * tunnel there are just no bytes of encap to apply in the adj. We'll impose
+     * the label stack once we choose a path. So return a zero length rewrite.
+     */
+    u8 *rewrite = NULL;
+
+    vec_validate(rewrite, 0);
+    vec_reset_length(rewrite);
+
+    return (rewrite);
 }
 
 /**
  * @brief Build a rewrite string for the MPLS tunnel.
- *
- * We have choices here;
- *  1 - have an Adjacency with a zero length string and stack it on
- *       MPLS label objects
- *  2 - put the label header rewrites in the adjacency string.
- *
- * We choose 2 since it results in fewer graph nodes in the egress path
  */
 static u8*
 mpls_tunnel_build_rewrite (vnet_main_t * vnm,
-			   u32 sw_if_index,
-			   vnet_link_t link_type,
-			   const void *dst_address)
+                           u32 sw_if_index,
+                           vnet_link_t link_type,
+                           const void *dst_address)
 {
-    mpls_unicast_header_t *muh;
-    mpls_tunnel_t *mt;
-    u8 *rewrite;
-    u32 mti, ii;
+    return (mpls_tunnel_build_rewrite_i());
+}
 
-    rewrite = NULL;
-    mti = mpls_tunnel_db[sw_if_index];
-    mt = pool_elt_at_index(mpls_tunnel_pool, mti);
+typedef struct mpls_tunnel_collect_forwarding_ctx_t_
+{
+    load_balance_path_t * next_hops;
+    const mpls_tunnel_t *mt;
+    fib_forward_chain_type_t fct;
+} mpls_tunnel_collect_forwarding_ctx_t;
+
+static int
+mpls_tunnel_collect_forwarding (fib_node_index_t pl_index,
+                                fib_node_index_t path_index,
+                                void *arg)
+{
+    mpls_tunnel_collect_forwarding_ctx_t *ctx;
+    fib_path_ext_t *path_ext;
+    int have_path_ext;
+
+    ctx = arg;
 
     /*
-     * The vector must be allocated as u8 so the length is correct
+     * if the path is not resolved, don't include it.
      */
-    ASSERT(0 < vec_len(mt->mt_label_stack));
-    vec_validate(rewrite, (sizeof(*muh) * vec_len(mt->mt_label_stack)) - 1);
-    ASSERT(rewrite);
-    muh = (mpls_unicast_header_t *)rewrite;
+    if (!fib_path_is_resolved(path_index))
+    {
+        return (!0);
+    }
 
     /*
-     * The last (inner most) label in the stack may be EOS, all the rest Non-EOS
+     * get the matching path-extension for the path being visited.
      */
-    for (ii = 0; ii < vec_len(mt->mt_label_stack)-1; ii++)
+    have_path_ext = 0;
+    vec_foreach(path_ext, ctx->mt->mt_path_exts)
     {
-	vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]);
-	vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255);
-	vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0);
-	vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS);
-	muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl);
+        if (path_ext->fpe_path_index == path_index)
+        {
+            have_path_ext = 1;
+            break;
+        }
     }
 
-    vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]);
-    vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255);
-    vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0);
-
-    if ((VNET_LINK_MPLS == link_type) &&
-	(mt->mt_label_stack[ii] != MPLS_IETF_IMPLICIT_NULL_LABEL))
+    if (have_path_ext)
     {
-	vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS);
+        /*
+         * found a matching extension. stack it to obtain the forwarding
+         * info for this path.
+         */
+        ctx->next_hops = fib_path_ext_stack(path_ext,
+                                            ctx->fct,
+                                            ctx->fct,
+                                            ctx->next_hops);
     }
     else
+        ASSERT(0);
+    /*
+     * else
+     *   There should be a path-extenios associated with each path
+     */
+
+    return (!0);
+}
+
+static void
+mpls_tunnel_mk_lb (mpls_tunnel_t *mt,
+                   vnet_link_t linkt,
+                   fib_forward_chain_type_t fct,
+                   dpo_id_t *dpo_lb)
+{
+    dpo_proto_t lb_proto;
+
+    /*
+     * If the entry has path extensions then we construct a load-balance
+     * by stacking the extensions on the forwarding chains of the paths.
+     * Otherwise we use the load-balance of the path-list
+     */
+    mpls_tunnel_collect_forwarding_ctx_t ctx = {
+        .mt = mt,
+        .next_hops = NULL,
+        .fct = fct,
+    };
+
+    /*
+     * As an optimisation we allocate the vector of next-hops to be sized
+     * equal to the maximum nuber of paths we will need, which is also the
+     * most likely number we will need, since in most cases the paths are 'up'.
+     */
+    vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list));
+    vec_reset_length(ctx.next_hops);
+
+    lb_proto = vnet_link_to_dpo_proto(linkt);
+
+    fib_path_list_walk(mt->mt_path_list,
+                       mpls_tunnel_collect_forwarding,
+                       &ctx);
+
+    if (!dpo_id_is_valid(dpo_lb))
     {
-	vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_EOS);
+        /*
+         * first time create
+         */
+        if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST)
+        {
+            dpo_set(dpo_lb,
+                    DPO_REPLICATE,
+                    lb_proto,
+                    replicate_create(0, lb_proto));
+        }
+        else
+        {
+            flow_hash_config_t fhc;
+
+            fhc = 0; // FIXME
+            /* fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, */
+            /*                                      dpo_proto_to_fib(lb_proto)); */
+            dpo_set(dpo_lb,
+                    DPO_LOAD_BALANCE,
+                    lb_proto,
+                    load_balance_create(0, lb_proto, fhc));
+        }
     }
 
-    muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl);
-
-    return (rewrite);
+    if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST)
+    {
+        /*
+         * MPLS multicast
+         */
+        replicate_multipath_update(dpo_lb, ctx.next_hops);
+    }
+    else
+    {
+        load_balance_multipath_update(dpo_lb,
+                                      ctx.next_hops,
+                                      LOAD_BALANCE_FLAG_NONE);
+        vec_free(ctx.next_hops);
+    }
 }
 
 /**
@@ -161,45 +244,47 @@ mpls_tunnel_stack (adj_index_t ai)
     mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
 
     if (NULL == mt)
-	return;
+        return;
 
     /*
-     * find the adjacency that is contributed by the FIB path-list
-     * that this tunnel resovles via, and use it as the next adj
-     * in the midchain
+     * while we're stacking the adj, remove the tunnel from the child list
+     * of the path list. this breaks a circular dependency of walk updates
+     * where the create of adjacencies in the children can lead to walks
+     * that get back here.
      */
-    if (vnet_hw_interface_get_flags(vnet_get_main(),
-				    mt->mt_hw_if_index) &
-	VNET_HW_INTERFACE_FLAG_LINK_UP)
-    {
-	dpo_id_t dpo = DPO_INVALID;
+    fib_path_list_lock(mt->mt_path_list);
 
-	fib_path_list_contribute_forwarding(mt->mt_path_list,
-					    mpls_tunnel_get_fwd_chain_type(mt),
-					    &dpo);
-
-	if (DPO_LOAD_BALANCE == dpo.dpoi_type)
-	{
-	    /*
-	     * we don't support multiple paths, so no need to load-balance.
-	     * pull the first and only choice and stack directly on that.
-	     */
-	    load_balance_t *lb;
-
-	    lb = load_balance_get (dpo.dpoi_index);
+    fib_path_list_child_remove(mt->mt_path_list,
+                               mt->mt_sibling_index);
 
-	    ASSERT(1 == lb->lb_n_buckets);
+    /*
+     * Construct the DPO (load-balance or replicate) that we can stack
+     * the tunnel's midchain on
+     */
+    if (vnet_hw_interface_get_flags(vnet_get_main(),
+                                    mt->mt_hw_if_index) &
+        VNET_HW_INTERFACE_FLAG_LINK_UP)
+    {
+        dpo_id_t dpo = DPO_INVALID;
 
-	    dpo_copy(&dpo, load_balance_get_bucket_i (lb, 0));
-	}
+        mpls_tunnel_mk_lb(mt,
+                          adj->ia_link,
+                          FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+                          &dpo);
 
-	adj_nbr_midchain_stack(ai, &dpo);
-	dpo_reset(&dpo);
+        adj_nbr_midchain_stack(ai, &dpo);
+        dpo_reset(&dpo);
     }
     else
     {
-	adj_nbr_midchain_unstack(ai);
+        adj_nbr_midchain_unstack(ai);
     }
+
+    mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
+                                                   FIB_NODE_TYPE_MPLS_TUNNEL,
+                                                   mt - mpls_tunnel_pool);
+
+    fib_path_list_lock(mt->mt_path_list);
 }
 
 /**
@@ -207,7 +292,7 @@ mpls_tunnel_stack (adj_index_t ai)
  */
 static adj_walk_rc_t
 mpls_adj_walk_cb (adj_index_t ai,
-		 void *ctx)
+                 void *ctx)
 {
     mpls_tunnel_stack(ai);
 
@@ -224,17 +309,17 @@ mpls_tunnel_restack (mpls_tunnel_t *mt)
      */
     FOR_EACH_FIB_PROTOCOL(proto)
     {
-	adj_nbr_walk(mt->mt_sw_if_index,
-		     proto,
-		     mpls_adj_walk_cb,
-		     NULL);
+        adj_nbr_walk(mt->mt_sw_if_index,
+                     proto,
+                     mpls_adj_walk_cb,
+                     NULL);
     }
 }
 
 static clib_error_t *
 mpls_tunnel_admin_up_down (vnet_main_t * vnm,
-			   u32 hw_if_index,
-			   u32 flags)
+                           u32 hw_if_index,
+                           u32 flags)
 {
     vnet_hw_interface_t * hi;
     mpls_tunnel_t *mt;
@@ -244,13 +329,13 @@ mpls_tunnel_admin_up_down (vnet_main_t * vnm,
     mt = mpls_tunnel_get_from_sw_if_index(hi->sw_if_index);
 
     if (NULL == mt)
-	return (NULL);
+        return (NULL);
 
     if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
-	vnet_hw_interface_set_flags (vnm, hw_if_index,
-				     VNET_HW_INTERFACE_FLAG_LINK_UP);
+        vnet_hw_interface_set_flags (vnm, hw_if_index,
+                                     VNET_HW_INTERFACE_FLAG_LINK_UP);
     else
-	vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
+        vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
 
     mpls_tunnel_restack(mt);
 
@@ -263,22 +348,58 @@ mpls_tunnel_admin_up_down (vnet_main_t * vnm,
  */
 static void
 mpls_tunnel_fixup (vlib_main_t *vm,
-		   ip_adjacency_t *adj,
-		   vlib_buffer_t *b0)
+                   ip_adjacency_t *adj,
+                   vlib_buffer_t *b0)
 {
+    /*
+     * A no-op w.r.t. the header. but reset the 'have we pushed any
+     * MPLS labels onto the packet' flag. That way when we enter the
+     * tunnel we'll get a TTL set to 255
+     */
+    vnet_buffer(b0)->mpls.first = 0;
 }
 
 static void
 mpls_tunnel_update_adj (vnet_main_t * vnm,
-			u32 sw_if_index,
-			adj_index_t ai)
+                        u32 sw_if_index,
+                        adj_index_t ai)
 {
-    adj_nbr_midchain_update_rewrite(
-	ai, mpls_tunnel_fixup, 
-	ADJ_FLAG_NONE,
-	mpls_tunnel_build_rewrite(vnm, sw_if_index,
-				  adj_get_link_type(ai),
-				  NULL));
+    ip_adjacency_t *adj;
+
+    ASSERT(ADJ_INDEX_INVALID != ai);
+
+    adj = adj_get(ai);
+
+    switch (adj->lookup_next_index)
+    {
+    case IP_LOOKUP_NEXT_ARP:
+    case IP_LOOKUP_NEXT_GLEAN:
+        adj_nbr_midchain_update_rewrite(ai, mpls_tunnel_fixup,
+                                        ADJ_FLAG_NONE,
+                                        mpls_tunnel_build_rewrite_i());
+        break;
+    case IP_LOOKUP_NEXT_MCAST:
+        /*
+         * Construct a partial rewrite from the known ethernet mcast dest MAC
+         * There's no MAC fixup, so the last 2 parameters are 0
+         */
+        adj_mcast_midchain_update_rewrite(ai, mpls_tunnel_fixup,
+                                          ADJ_FLAG_NONE,
+                                          mpls_tunnel_build_rewrite_i(),
+                                          0, 0);
+        break;
+
+    case IP_LOOKUP_NEXT_DROP:
+    case IP_LOOKUP_NEXT_PUNT:
+    case IP_LOOKUP_NEXT_LOCAL:
+    case IP_LOOKUP_NEXT_REWRITE:
+    case IP_LOOKUP_NEXT_MIDCHAIN:
+    case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+    case IP_LOOKUP_NEXT_ICMP_ERROR:
+    case IP_LOOKUP_N_NEXT:
+      ASSERT (0);
+      break;
+    }
 
     mpls_tunnel_stack(ai);
 }
@@ -312,7 +433,7 @@ typedef struct mpls_tunnel_trace_t_
 
 static u8 *
 format_mpls_tunnel_tx_trace (u8 * s,
-			     va_list * args)
+                             va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
@@ -327,8 +448,8 @@ format_mpls_tunnel_tx_trace (u8 * s,
  */
 static uword
 mpls_tunnel_tx (vlib_main_t * vm,
-		vlib_node_runtime_t * node,
-		vlib_frame_t * frame)
+                vlib_node_runtime_t * node,
+                vlib_frame_t * frame)
 {
   u32 next_index;
   u32 * from, * to_next, n_left_from, n_left_to_next;
@@ -355,32 +476,32 @@ mpls_tunnel_tx (vlib_main_t * vm,
        * FIXME DUAL LOOP
        */
       while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  vlib_buffer_t * b0;
-	  u32 bi0;
+        {
+          vlib_buffer_t * b0;
+          u32 bi0;
 
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
+          bi0 = from[0];
+          to_next[0] = bi0;
+          from += 1;
+          to_next += 1;
+          n_left_from -= 1;
+          n_left_to_next -= 1;
 
-	  b0 = vlib_get_buffer(vm, bi0);
+          b0 = vlib_get_buffer(vm, bi0);
 
-	  vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj;
+          vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj;
 
-	  if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node,
-						   b0, sizeof (*tr));
-	      tr->tunnel_id = rd->dev_instance;
-	    }
+          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+              mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node,
+                                                   b0, sizeof (*tr));
+              tr->tunnel_id = rd->dev_instance;
+            }
 
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, mt->mt_l2_tx_arc);
-	}
+          vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                           to_next, n_left_to_next,
+                                           bi0, mt->mt_l2_tx_arc);
+        }
 
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
@@ -417,13 +538,13 @@ mpls_tunnel_get (u32 mti)
  */
 void
 mpls_tunnel_walk (mpls_tunnel_walk_cb_t cb,
-		  void *ctx)
+                  void *ctx)
 {
     u32 mti;
 
     pool_foreach_index(mti, mpls_tunnel_pool,
     ({
-	cb(mti, ctx);
+        cb(mti, ctx);
     }));
 }
 
@@ -435,25 +556,22 @@ vnet_mpls_tunnel_del (u32 sw_if_index)
     mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
 
     if (NULL == mt)
-	return;
-    
-    fib_path_list_child_remove(mt->mt_path_list,
-			       mt->mt_sibling_index);
-    if (ADJ_INDEX_INVALID != mt->mt_l2_adj)
-	adj_unlock(mt->mt_l2_adj);
+        return;
 
-    vec_free(mt->mt_label_stack);
+    if (FIB_NODE_INDEX_INVALID != mt->mt_path_list)
+        fib_path_list_child_remove(mt->mt_path_list,
+                                   mt->mt_sibling_index);
+    if (ADJ_INDEX_INVALID != mt->mt_l2_adj)
+        adj_unlock(mt->mt_l2_adj);
 
     vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index);
     pool_put(mpls_tunnel_pool, mt);
     mpls_tunnel_db[sw_if_index] = ~0;
 }
 
-void
-vnet_mpls_tunnel_add (fib_route_path_t *rpaths,
-		      mpls_label_t *label_stack,
-		      u8 l2_only,
-		      u32 *sw_if_index)
+u32
+vnet_mpls_tunnel_create (u8 l2_only,
+                         u8 is_multicast)
 {
     vnet_hw_interface_t * hi;
     mpls_tunnel_t *mt;
@@ -466,28 +584,33 @@ vnet_mpls_tunnel_add (fib_route_path_t *rpaths,
     mti = mt - mpls_tunnel_pool;
     fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL);
     mt->mt_l2_adj = ADJ_INDEX_INVALID;
+    mt->mt_path_list = FIB_NODE_INDEX_INVALID;
+    mt->mt_sibling_index = FIB_NODE_INDEX_INVALID;
+
+    if (is_multicast)
+        mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST;
 
     /*
      * Create a new, or re=use and old, tunnel HW interface
      */
     if (vec_len (mpls_tunnel_free_hw_if_indices) > 0)
     {
-	mt->mt_hw_if_index = 
-	    mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1];
-	_vec_len (mpls_tunnel_free_hw_if_indices) -= 1;
-	hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
-	hi->hw_instance = mti;
-	hi->dev_instance = mti;
+        mt->mt_hw_if_index =
+            mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1];
+        _vec_len (mpls_tunnel_free_hw_if_indices) -= 1;
+        hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
+        hi->hw_instance = mti;
+        hi->dev_instance = mti;
     }
-    else 
+    else
     {
-	mt->mt_hw_if_index = vnet_register_interface(
-	                         vnm,
-				 mpls_tunnel_class.index,
-				 mti,
-				 mpls_tunnel_hw_interface_class.index,
-				 mti);
-	hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index);
+        mt->mt_hw_if_index = vnet_register_interface(
+                                 vnm,
+                                 mpls_tunnel_class.index,
+                                 mti,
+                                 mpls_tunnel_hw_interface_class.index,
+                                 mti);
+        hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index);
     }
 
     /*
@@ -497,43 +620,218 @@ vnet_mpls_tunnel_add (fib_route_path_t *rpaths,
     vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0);
     mpls_tunnel_db[mt->mt_sw_if_index] = mti;
 
+    if (l2_only)
+    {
+        mt->mt_l2_adj =
+            adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list),
+                                VNET_LINK_ETHERNET,
+                                &zero_addr,
+                                mt->mt_sw_if_index);
+
+        mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(),
+                                                    hi->tx_node_index,
+                                                    "adj-l2-midchain");
+    }
+
+    return (mt->mt_sw_if_index);
+}
+
+/*
+ * mpls_tunnel_path_ext_add
+ *
+ * append a path extension to the entry's list
+ */
+static void
+mpls_tunnel_path_ext_append (mpls_tunnel_t *mt,
+                             const fib_route_path_t *rpath)
+{
+    if (NULL != rpath->frp_label_stack)
+    {
+        fib_path_ext_t *path_ext;
+
+        vec_add2(mt->mt_path_exts, path_ext, 1);
+
+        fib_path_ext_init(path_ext, mt->mt_path_list, rpath);
+    }
+}
+
+/*
+ * mpls_tunnel_path_ext_insert
+ *
+ * insert, sorted, a path extension to the entry's list.
+ * It's not strictly necessary in sort the path extensions, since each
+ * extension has the path index to which it resolves. However, by being
+ * sorted the load-balance produced has a deterministic order, not an order
+ * based on the sequence of extension additions. this is a considerable benefit.
+ */
+static void
+mpls_tunnel_path_ext_insert (mpls_tunnel_t *mt,
+                             const fib_route_path_t *rpath)
+{
+    if (0 == vec_len(mt->mt_path_exts))
+        return (mpls_tunnel_path_ext_append(mt, rpath));
+
+    if (NULL != rpath->frp_label_stack)
+    {
+        fib_path_ext_t path_ext;
+        int i = 0;
+
+        fib_path_ext_init(&path_ext, mt->mt_path_list, rpath);
+
+        while (i < vec_len(mt->mt_path_exts) &&
+               (fib_path_ext_cmp(&mt->mt_path_exts[i], rpath) < 0))
+        {
+            i++;
+        }
+
+        vec_insert_elts(mt->mt_path_exts, &path_ext, 1, i);
+    }
+}
+
+void
+vnet_mpls_tunnel_path_add (u32 sw_if_index,
+                           fib_route_path_t *rpaths)
+{
+    mpls_tunnel_t *mt;
+    u32 mti;
+
+    mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
+
+    if (NULL == mt)
+        return;
+
+    mti = mt - mpls_tunnel_pool;
+
     /*
      * construct a path-list from the path provided
      */
-    mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths);
-    mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
-						   FIB_NODE_TYPE_MPLS_TUNNEL,
-						   mti);
+    if (FIB_NODE_INDEX_INVALID == mt->mt_path_list)
+    {
+        mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths);
+        mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
+                                                       FIB_NODE_TYPE_MPLS_TUNNEL,
+                                                       mti);
+    }
+    else
+    {
+        fib_node_index_t old_pl_index;
+        fib_path_ext_t *path_ext;
+
+        old_pl_index = mt->mt_path_list;
+
+        mt->mt_path_list =
+            fib_path_list_copy_and_path_add(old_pl_index,
+                                            FIB_PATH_LIST_FLAG_SHARED,
+                                            rpaths);
+
+        fib_path_list_child_remove(old_pl_index,
+                                   mt->mt_sibling_index);
+        mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
+                                                       FIB_NODE_TYPE_MPLS_TUNNEL,
+                                                       mti);
+        /*
+         * re-resolve all the path-extensions with the new path-list
+         */
+        vec_foreach(path_ext, mt->mt_path_exts)
+        {
+            fib_path_ext_resolve(path_ext, mt->mt_path_list);
+        }
+    }
+    mpls_tunnel_path_ext_insert(mt, rpaths);
+    mpls_tunnel_restack(mt);
+}
+
+int
+vnet_mpls_tunnel_path_remove (u32 sw_if_index,
+                              fib_route_path_t *rpaths)
+{
+    mpls_tunnel_t *mt;
+    u32 mti;
 
-    mt->mt_label_stack = vec_dup(label_stack);
+    mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
 
-    if (l2_only)
+    if (NULL == mt)
+        return (0);
+
+    mti = mt - mpls_tunnel_pool;
+
+    /*
+     * construct a path-list from the path provided
+     */
+    if (FIB_NODE_INDEX_INVALID == mt->mt_path_list)
     {
-	mt->mt_l2_adj =
-	    adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list),
-				VNET_LINK_ETHERNET,
-				&zero_addr,
-				mt->mt_sw_if_index);
-
-	mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(),
-						    hi->tx_node_index,
-						    "adj-l2-midchain");
+        /* can't remove a path if we have onoe */
+        return (0);
     }
-
-    *sw_if_index = mt->mt_sw_if_index;
+    else
+    {
+        fib_node_index_t old_pl_index;
+        fib_path_ext_t *path_ext;
+
+        old_pl_index = mt->mt_path_list;
+
+        mt->mt_path_list =
+            fib_path_list_copy_and_path_remove(old_pl_index,
+                                               FIB_PATH_LIST_FLAG_SHARED,
+                                               rpaths);
+
+        fib_path_list_child_remove(old_pl_index,
+                                   mt->mt_sibling_index);
+
+        if (FIB_NODE_INDEX_INVALID == mt->mt_path_list)
+        {
+            /* no paths left */
+            return (0);
+        }
+        else
+        {
+            mt->mt_sibling_index =
+                fib_path_list_child_add(mt->mt_path_list,
+                                        FIB_NODE_TYPE_MPLS_TUNNEL,
+                                        mti);
+        }
+        /*
+         * find the matching path extension and remove it
+         */
+        vec_foreach(path_ext, mt->mt_path_exts)
+        {
+            if (!fib_path_ext_cmp(path_ext, rpaths))
+            {
+                /*
+                 * delete the element moving the remaining elements down 1 position.
+                 * this preserves the sorted order.
+                 */
+                vec_free(path_ext->fpe_label_stack);
+                vec_delete(mt->mt_path_exts, 1,
+                           (path_ext - mt->mt_path_exts));
+                break;
+            }
+        }
+       /*
+         * re-resolve all the path-extensions with the new path-list
+         */
+        vec_foreach(path_ext, mt->mt_path_exts)
+        {
+            fib_path_ext_resolve(path_ext, mt->mt_path_list);
+        }
+
+        mpls_tunnel_restack(mt);
+   }
+
+    return (fib_path_list_get_n_paths(mt->mt_path_list));
 }
 
+
 static clib_error_t *
 vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm,
-				    unformat_input_t * input,
-				    vlib_cli_command_t * cmd)
+                                    unformat_input_t * input,
+                                    vlib_cli_command_t * cmd)
 {
     unformat_input_t _line_input, * line_input = &_line_input;
     vnet_main_t * vnm = vnet_get_main();
-    u8 is_del = 0;
-    u8 l2_only = 0;
+    u8 is_del = 0, l2_only = 0, is_multicast =0;
     fib_route_path_t rpath, *rpaths = NULL;
-    mpls_label_t out_label = MPLS_LABEL_INVALID, *labels = NULL;
+    mpls_label_t out_label = MPLS_LABEL_INVALID;
     u32 sw_if_index;
     clib_error_t *error = NULL;
 
@@ -541,87 +839,89 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm,
 
     /* Get a line of input. */
     if (! unformat_user (input, unformat_line_input, line_input))
-	return 0;
+        return 0;
 
     while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
     {
-	if (unformat (line_input, "del %U",
-		      unformat_vnet_sw_interface, vnm,
-		      &sw_if_index))
-	    is_del = 1;
-	else if (unformat (line_input, "add"))
-	    is_del = 0;
-	else if (unformat (line_input, "out-label %U",
-			   unformat_mpls_unicast_label, &out_label))
-	{
-	    vec_add1(labels, out_label);
-	}
-	else if (unformat (line_input, "via %U %U",
-			   unformat_ip4_address,
-			   &rpath.frp_addr.ip4,
-			   unformat_vnet_sw_interface, vnm,
-			   &rpath.frp_sw_if_index))
-	{
-	    rpath.frp_weight = 1;
-	    rpath.frp_proto = FIB_PROTOCOL_IP4;
-	}
-			 
-	else if (unformat (line_input, "via %U %U",
-			   unformat_ip6_address,
-			   &rpath.frp_addr.ip6,
-			   unformat_vnet_sw_interface, vnm,
-			   &rpath.frp_sw_if_index))
-	{
-	    rpath.frp_weight = 1;
-	    rpath.frp_proto = FIB_PROTOCOL_IP6;
-	}
-	else if (unformat (line_input, "via %U",
-			   unformat_ip6_address,
-			   &rpath.frp_addr.ip6))
-	{
-	    rpath.frp_fib_index = 0;
-	    rpath.frp_weight = 1;
-	    rpath.frp_sw_if_index = ~0;
-	    rpath.frp_proto = FIB_PROTOCOL_IP6;
-	}
-	else if (unformat (line_input, "via %U",
-			   unformat_ip4_address,
-			   &rpath.frp_addr.ip4))
-	{
-	    rpath.frp_fib_index = 0;
-	    rpath.frp_weight = 1;
-	    rpath.frp_sw_if_index = ~0;
-	    rpath.frp_proto = FIB_PROTOCOL_IP4;
-	}
-	else if (unformat (line_input, "l2-only"))
-	    l2_only = 1;
-	else
-	{
-	    error = clib_error_return (0, "unknown input '%U'",
-				       format_unformat_error, line_input);
-	    goto done;
-	}
+        if (unformat (line_input, "del %U",
+                      unformat_vnet_sw_interface, vnm,
+                      &sw_if_index))
+            is_del = 1;
+        else if (unformat (line_input, "add"))
+            is_del = 0;
+        else if (unformat (line_input, "out-label %U",
+                           unformat_mpls_unicast_label, &out_label))
+        {
+            vec_add1(rpath.frp_label_stack, out_label);
+        }
+        else if (unformat (line_input, "via %U %U",
+                           unformat_ip4_address,
+                           &rpath.frp_addr.ip4,
+                           unformat_vnet_sw_interface, vnm,
+                           &rpath.frp_sw_if_index))
+        {
+            rpath.frp_weight = 1;
+            rpath.frp_proto = FIB_PROTOCOL_IP4;
+        }
+
+        else if (unformat (line_input, "via %U %U",
+                           unformat_ip6_address,
+                           &rpath.frp_addr.ip6,
+                           unformat_vnet_sw_interface, vnm,
+                           &rpath.frp_sw_if_index))
+        {
+            rpath.frp_weight = 1;
+            rpath.frp_proto = FIB_PROTOCOL_IP6;
+        }
+        else if (unformat (line_input, "via %U",
+                           unformat_ip6_address,
+                           &rpath.frp_addr.ip6))
+        {
+            rpath.frp_fib_index = 0;
+            rpath.frp_weight = 1;
+            rpath.frp_sw_if_index = ~0;
+            rpath.frp_proto = FIB_PROTOCOL_IP6;
+        }
+        else if (unformat (line_input, "via %U",
+                           unformat_ip4_address,
+                           &rpath.frp_addr.ip4))
+        {
+            rpath.frp_fib_index = 0;
+            rpath.frp_weight = 1;
+            rpath.frp_sw_if_index = ~0;
+            rpath.frp_proto = FIB_PROTOCOL_IP4;
+        }
+        else if (unformat (line_input, "l2-only"))
+            l2_only = 1;
+        else if (unformat (line_input, "multicast"))
+            is_multicast = 1;
+        else
+        {
+            error = clib_error_return (0, "unknown input '%U'",
+                                       format_unformat_error, line_input);
+            goto done;
+        }
     }
 
     if (is_del)
     {
-	vnet_mpls_tunnel_del(sw_if_index);
+        vnet_mpls_tunnel_del(sw_if_index);
     }
     else
     {
-	if (0 == vec_len(labels))
-	{
-	    error = clib_error_return (0, "No Output Labels '%U'",
-				       format_unformat_error, line_input);
-	    goto done;
-	}
-
-	vec_add1(rpaths, rpath);
-	vnet_mpls_tunnel_add(rpaths, labels, l2_only, &sw_if_index);
+        if (0 == vec_len(rpath.frp_label_stack))
+        {
+            error = clib_error_return (0, "No Output Labels '%U'",
+                                       format_unformat_error, line_input);
+            goto done;
+        }
+
+        vec_add1(rpaths, rpath);
+        sw_if_index = vnet_mpls_tunnel_create(l2_only, is_multicast);
+        vnet_mpls_tunnel_path_add(sw_if_index, rpaths);
     }
 
 done:
-    vec_free(labels);
     vec_free(rpaths);
     unformat_free (line_input);
 
@@ -638,7 +938,7 @@ done:
  ?*/
 VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = {
   .path = "mpls tunnel",
-  .short_help = 
+  .short_help =
   "mpls tunnel via [addr] [interface] [out-labels]",
   .function = vnet_create_mpls_tunnel_command_fn,
 };
@@ -647,19 +947,28 @@ static u8 *
 format_mpls_tunnel (u8 * s, va_list * args)
 {
     mpls_tunnel_t *mt = va_arg (*args, mpls_tunnel_t *);
-    int ii;
+    mpls_tunnel_attribute_t attr;
+    fib_path_ext_t *path_ext;
 
     s = format(s, "mpls_tunnel%d: sw_if_index:%d hw_if_index:%d",
-	       mt - mpls_tunnel_pool,
-	       mt->mt_sw_if_index,
-	       mt->mt_hw_if_index);
-    s = format(s, "\n label-stack:\n  ");
-    for (ii = 0; ii < vec_len(mt->mt_label_stack); ii++)
-    {
-	s = format(s, "%d, ", mt->mt_label_stack[ii]);
+               mt - mpls_tunnel_pool,
+               mt->mt_sw_if_index,
+               mt->mt_hw_if_index);
+    if (MPLS_TUNNEL_FLAG_NONE != mt->mt_flags) {
+        s = format(s, " \n flags:");
+        FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(attr) {
+            if ((1<<attr) & mt->mt_flags) {
+                s = format (s, "%s,", mpls_tunnel_attribute_names[attr]);
+            }
+        }
     }
     s = format(s, "\n via:\n");
     s = fib_path_list_format(mt->mt_path_list, s);
+    s = format(s, "    Extensions:");
+    vec_foreach(path_ext, mt->mt_path_exts)
+    {
+        s = format(s, "\n     %U", format_fib_path_ext, path_ext);
+    }
     s = format(s, "\n");
 
     return (s);
@@ -667,42 +976,42 @@ format_mpls_tunnel (u8 * s, va_list * args)
 
 static clib_error_t *
 show_mpls_tunnel_command_fn (vlib_main_t * vm,
-			     unformat_input_t * input,
-			     vlib_cli_command_t * cmd)
+                             unformat_input_t * input,
+                             vlib_cli_command_t * cmd)
 {
     mpls_tunnel_t * mt;
     u32 mti = ~0;
 
     if (pool_elts (mpls_tunnel_pool) == 0)
-	vlib_cli_output (vm, "No MPLS tunnels configured...");
+        vlib_cli_output (vm, "No MPLS tunnels configured...");
 
     while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
-	if (unformat (input, "%d", &mti))
-	    ;
-	else
-	    break;
+        if (unformat (input, "%d", &mti))
+            ;
+        else
+            break;
     }
 
     if (~0 == mti)
     {
-	pool_foreach (mt, mpls_tunnel_pool,
-	({
-	    vlib_cli_output (vm, "[@%d] %U",
-			     mt - mpls_tunnel_pool,
-			     format_mpls_tunnel, mt);
-	}));
+        pool_foreach (mt, mpls_tunnel_pool,
+        ({
+            vlib_cli_output (vm, "[@%d] %U",
+                             mt - mpls_tunnel_pool,
+                             format_mpls_tunnel, mt);
+        }));
     }
     else
     {
-	if (pool_is_free_index(mpls_tunnel_pool, mti))
-	    return clib_error_return (0, "Not atunnel index %d", mti);
+        if (pool_is_free_index(mpls_tunnel_pool, mti))
+            return clib_error_return (0, "Not atunnel index %d", mti);
 
-	mt = pool_elt_at_index(mpls_tunnel_pool, mti);
+        mt = pool_elt_at_index(mpls_tunnel_pool, mti);
 
-	vlib_cli_output (vm, "[@%d] %U",
-			 mt - mpls_tunnel_pool,
-			 format_mpls_tunnel, mt);
+        vlib_cli_output (vm, "[@%d] %U",
+                         mt - mpls_tunnel_pool,
+                         format_mpls_tunnel, mt);
     }
 
     return 0;
@@ -715,7 +1024,7 @@ show_mpls_tunnel_command_fn (vlib_main_t * vm,
  * @cliexstart{sh mpls tunnel 2}
  * [@2] mpls_tunnel2: sw_if_index:5 hw_if_index:5
  *  label-stack:
- *    3, 
+ *    3,
  *  via:
  *   index:26 locks:1 proto:ipv4 uPRF-list:26 len:1 itfs:[2, ]
  *     index:26 pl-index:26 ipv4 weight=1 attached-nexthop:  oper-flags:resolved,
@@ -743,7 +1052,7 @@ mpls_tunnel_from_fib_node (fib_node_t *node)
  */
 static fib_node_back_walk_rc_t
 mpls_tunnel_back_walk (fib_node_t *node,
-		      fib_node_back_walk_ctx_t *ctx)
+                      fib_node_back_walk_ctx_t *ctx)
 {
     mpls_tunnel_restack(mpls_tunnel_from_fib_node(node));
 
diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h
index ee56c0fc..0b55d0db 100644
--- a/src/vnet/mpls/mpls_tunnel.h
+++ b/src/vnet/mpls/mpls_tunnel.h
@@ -17,6 +17,31 @@
 #define __MPLS_TUNNEL_H__
 
 #include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_path_ext.h>
+
+typedef enum mpls_tunnel_attribute_t_
+{
+    MPLS_TUNNEL_ATTRIBUTE_FIRST = 0,
+    /**
+     * @brief The tunnel has an underlying multicast LSP
+     */
+    MPLS_TUNNEL_ATTRIBUTE_MCAST = MPLS_TUNNEL_ATTRIBUTE_FIRST,
+    MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST,
+} mpls_tunnel_attribute_t;
+
+#define MPLS_TUNNEL_ATTRIBUTES {		  \
+    [MPLS_TUNNEL_ATTRIBUTE_MCAST]  = "multicast", \
+}
+#define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item)		\
+    for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST;		\
+	 _item < MPLS_TUNNEL_ATTRIBUTE_LAST;		\
+	 _item++)
+
+typedef enum mpls_tunnel_flag_t_ {
+    MPLS_TUNNEL_FLAG_NONE   = 0,
+    MPLS_TUNNEL_FLAG_MCAST  = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST),
+} __attribute__ ((packed)) mpls_tunnel_flags_t;
+
 
 /**
  * @brief A uni-directional MPLS tunnel
@@ -28,6 +53,11 @@ typedef struct mpls_tunnel_t_
      */
     fib_node_t mt_node;
 
+    /**
+     * @brief Tunnel flags
+     */
+    mpls_tunnel_flags_t mt_flags;
+
     /**
      * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET
      * adjacency
@@ -50,9 +80,9 @@ typedef struct mpls_tunnel_t_
     u32 mt_sibling_index;
 
     /**
-     * @brief The Label stack to apply to egress packets
+     * A vector of path extensions o hold the label stack for each path
      */
-    mpls_label_t *mt_label_stack;
+    fib_path_ext_t *mt_path_exts;
 
     /**
      * @brief Flag to indicate the tunnel is only for L2 traffic, that is
@@ -74,12 +104,27 @@ typedef struct mpls_tunnel_t_
 
 /**
  * @brief Create a new MPLS tunnel
+ * @return the SW Interface index of the newly created tuneel
  */
-extern void vnet_mpls_tunnel_add (fib_route_path_t *rpath,
-				  mpls_label_t *label_stack,
-				  u8 l2_only,
-				  u32 *sw_if_index);
+extern u32 vnet_mpls_tunnel_create (u8 l2_only,
+                                    u8 is_multicast);
 
+/**
+ * @brief Add a path to an MPLS tunnel
+ */
+extern void vnet_mpls_tunnel_path_add (u32 sw_if_index,
+                                       fib_route_path_t *rpath);
+
+/**
+ * @brief remove a path from a tunnel.
+ * @return the number of remaining paths. 0 implies the tunnel can be deleted
+ */
+extern int vnet_mpls_tunnel_path_remove (u32 sw_if_index,
+                                         fib_route_path_t *rpath);
+
+/**
+ * @brief Delete an MPLS tunnel
+ */
 extern void vnet_mpls_tunnel_del (u32 sw_if_index);
 
 extern const mpls_tunnel_t *mpls_tunnel_get(u32 index);
diff --git a/src/vnet/mpls/mpls_types.h b/src/vnet/mpls/mpls_types.h
index d7c629df..b1075cdd 100644
--- a/src/vnet/mpls/mpls_types.h
+++ b/src/vnet/mpls/mpls_types.h
@@ -1,3 +1,17 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #ifndef __MPLS_TYPES_H__
 #define __MPLS_TYPES_H__
 
@@ -36,4 +50,10 @@
     (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) &&	\
      ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL))
 
+/**
+ * The top bit of the index, which is the result of the MPLS lookup
+ * is used to determine if the DPO is a load-balance or a replicate
+ */
+#define MPLS_IS_REPLICATE 0x80000000
+
 #endif
diff --git a/src/vnet/srp/interface.c b/src/vnet/srp/interface.c
index d427cc3c..44e2b0d6 100644
--- a/src/vnet/srp/interface.c
+++ b/src/vnet/srp/interface.c
@@ -58,7 +58,7 @@ srp_build_rewrite (vnet_main_t * vnm,
 #define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
     _ (IP4, IP4);
     _ (IP6, IP6);
-    _ (MPLS, MPLS_UNICAST);
+    _ (MPLS, MPLS);
     _ (ARP, ARP);
 #undef _
   default:
diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py
index 36d597a7..c1397d70 100644
--- a/test/test_ip_mcast.py
+++ b/test/test_ip_mcast.py
@@ -622,6 +622,7 @@ class TestIPMcast(VppTestCase):
             (MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT |
              MRouteItfFlags.MFIB_ITF_FLAG_NEGATE_SIGNAL))
 
+        self.vapi.cli("clear trace")
         tx = self._mcast_connected_send_stream("232.1.1.1")
 
         signals = self.vapi.mfib_signal_dump()
diff --git a/test/test_mpls.py b/test/test_mpls.py
index fc832644..700b7091 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -5,7 +5,9 @@ import socket
 
 from framework import VppTestCase, VppTestRunner
 from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \
-    VppMplsIpBind
+    VppMplsIpBind, VppIpMRoute, VppMRoutePath, \
+    MRouteItfFlags, MRouteEntryFlags
+from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface
 
 from scapy.packet import Raw
 from scapy.layers.l2 import Ether
@@ -21,7 +23,7 @@ class TestMPLS(VppTestCase):
         super(TestMPLS, self).setUp()
 
         # create 2 pg interfaces
-        self.create_pg_interfaces(range(2))
+        self.create_pg_interfaces(range(4))
 
         # setup both interfaces
         # assign them different tables.
@@ -53,10 +55,12 @@ class TestMPLS(VppTestCase):
             mpls_labels,
             mpls_ttl=255,
             ping=0,
-            ip_itf=None):
+            ip_itf=None,
+            dst_ip=None,
+            n=257):
         self.reset_packet_infos()
         pkts = []
-        for i in range(0, 257):
+        for i in range(0, n):
             info = self.create_packet_info(src_if, src_if)
             payload = self.info_to_payload(info)
             p = Ether(dst=src_if.local_mac, src=src_if.remote_mac)
@@ -67,9 +71,14 @@ class TestMPLS(VppTestCase):
                 else:
                     p = p / MPLS(label=mpls_labels[ii], ttl=mpls_ttl, s=0)
             if not ping:
-                p = (p / IP(src=src_if.local_ip4, dst=src_if.remote_ip4) /
-                     UDP(sport=1234, dport=1234) /
-                     Raw(payload))
+                if not dst_ip:
+                    p = (p / IP(src=src_if.local_ip4, dst=src_if.remote_ip4) /
+                         UDP(sport=1234, dport=1234) /
+                         Raw(payload))
+                else:
+                    p = (p / IP(src=src_if.local_ip4, dst=dst_ip) /
+                         UDP(sport=1234, dport=1234) /
+                         Raw(payload))
             else:
                 p = (p / IP(src=ip_itf.remote_ip4,
                             dst=ip_itf.local_ip4) /
@@ -254,6 +263,13 @@ class TestMPLS(VppTestCase):
         except:
             raise
 
+    def send_and_assert_no_replies(self, intf, pkts, remark):
+        intf.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        for i in self.pg_interfaces:
+            i.assert_nothing_captured(remark=remark)
+
     def test_swap(self):
         """ MPLS label swap tests """
 
@@ -278,7 +294,7 @@ class TestMPLS(VppTestCase):
         self.pg_start()
 
         rx = self.pg0.get_capture()
-        self.verify_capture_labelled_ip4(self.pg0, rx, tx, [33])
+        self.verify_capture_labelled(self.pg0, rx, tx, [33])
 
         #
         # A simple MPLS xconnect - non-eos label in label out
@@ -358,7 +374,7 @@ class TestMPLS(VppTestCase):
         self.pg_start()
 
         rx = self.pg0.get_capture()
-        self.verify_capture_labelled_ip4(self.pg0, rx, tx, [33, 44, 45])
+        self.verify_capture_labelled(self.pg0, rx, tx, [33, 44, 45], num=2)
 
         #
         # A recursive non-EOS x-connect, which resolves through another
@@ -576,25 +592,19 @@ class TestMPLS(VppTestCase):
         #
         # Create a tunnel with a single out label
         #
-        nh_addr = socket.inet_pton(socket.AF_INET, self.pg0.remote_ip4)
-
-        reply = self.vapi.mpls_tunnel_add_del(
-            0xffffffff,  # don't know the if index yet
-            1,  # IPv4 next-hop
-            nh_addr,
-            self.pg0.sw_if_index,
-            0,  # next-hop-table-id
-            1,  # next-hop-weight
-            2,  # num-out-labels,
-            [44, 46])
-        self.vapi.sw_interface_set_flags(reply.sw_if_index, admin_up_down=1)
+        mpls_tun = VppMPLSTunnelInterface(self,
+                                          [VppRoutePath(self.pg0.remote_ip4,
+                                                        self.pg0.sw_if_index,
+                                                        labels=[44, 46])])
+        mpls_tun.add_vpp_config()
+        mpls_tun.admin_up()
 
         #
         # add an unlabelled route through the new tunnel
         #
         route_10_0_0_3 = VppIpRoute(self, "10.0.0.3", 32,
                                     [VppRoutePath("0.0.0.0",
-                                                  reply.sw_if_index)])
+                                                  mpls_tun._sw_if_index)])
         route_10_0_0_3.add_vpp_config()
 
         self.vapi.cli("clear trace")
@@ -738,6 +748,229 @@ class TestMPLS(VppTestCase):
         route_35_eos.remove_vpp_config()
         route_34_eos.remove_vpp_config()
 
+    def test_interface_rx(self):
+        """ MPLS Interface Receive """
+
+        #
+        # Add a non-recursive route that will forward the traffic
+        # post-interface-rx
+        #
+        route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32,
+                                    table_id=1,
+                                    paths=[VppRoutePath(self.pg1.remote_ip4,
+                                                        self.pg1.sw_if_index)])
+        route_10_0_0_1.add_vpp_config()
+
+        #
+        # An interface receive label that maps traffic to RX on interface
+        # pg1
+        # by injecting the packet in on pg0, which is in table 0
+        # doing an interface-rx on pg1 and matching a route in table 1
+        # if the packet egresses, then we must have swapped to pg1
+        # so as to have matched the route in table 1
+        #
+        route_34_eos = VppMplsRoute(self, 34, 1,
+                                    [VppRoutePath("0.0.0.0",
+                                                  self.pg1.sw_if_index,
+                                                  is_interface_rx=1)])
+        route_34_eos.add_vpp_config()
+
+        #
+        # ping an interface in the default table
+        # PG0 is in the default table
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_labelled_ip4(self.pg0, [34], n=257,
+                                             dst_ip="10.0.0.1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg1.get_capture(257)
+        self.verify_capture_ip4(self.pg1, rx, tx)
+
+    def test_mcast_mid_point(self):
+        """ MPLS Multicast Mid Point """
+
+        #
+        # Add a non-recursive route that will forward the traffic
+        # post-interface-rx
+        #
+        route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32,
+                                    table_id=1,
+                                    paths=[VppRoutePath(self.pg1.remote_ip4,
+                                                        self.pg1.sw_if_index)])
+        route_10_0_0_1.add_vpp_config()
+
+        #
+        # Add a mcast entry that replicate to pg2 and pg3
+        # and replicate to a interface-rx (like a bud node would)
+        #
+        route_3400_eos = VppMplsRoute(self, 3400, 1,
+                                      [VppRoutePath(self.pg2.remote_ip4,
+                                                    self.pg2.sw_if_index,
+                                                    labels=[3401]),
+                                       VppRoutePath(self.pg3.remote_ip4,
+                                                    self.pg3.sw_if_index,
+                                                    labels=[3402]),
+                                       VppRoutePath("0.0.0.0",
+                                                    self.pg1.sw_if_index,
+                                                    is_interface_rx=1)],
+                                      is_multicast=1)
+        route_3400_eos.add_vpp_config()
+
+        #
+        # ping an interface in the default table
+        # PG0 is in the default table
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_labelled_ip4(self.pg0, [3400], n=257,
+                                             dst_ip="10.0.0.1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg1.get_capture(257)
+        self.verify_capture_ip4(self.pg1, rx, tx)
+
+        rx = self.pg2.get_capture(257)
+        self.verify_capture_labelled(self.pg2, rx, tx, [3401])
+        rx = self.pg3.get_capture(257)
+        self.verify_capture_labelled(self.pg3, rx, tx, [3402])
+
+    def test_mcast_head(self):
+        """ MPLS Multicast Head-end """
+
+        #
+        # Create a multicast tunnel with two replications
+        #
+        mpls_tun = VppMPLSTunnelInterface(self,
+                                          [VppRoutePath(self.pg2.remote_ip4,
+                                                        self.pg2.sw_if_index,
+                                                        labels=[42]),
+                                           VppRoutePath(self.pg3.remote_ip4,
+                                                        self.pg3.sw_if_index,
+                                                        labels=[43])],
+                                          is_multicast=1)
+        mpls_tun.add_vpp_config()
+        mpls_tun.admin_up()
+
+        #
+        # add an unlabelled route through the new tunnel
+        #
+        route_10_0_0_3 = VppIpRoute(self, "10.0.0.3", 32,
+                                    [VppRoutePath("0.0.0.0",
+                                                  mpls_tun._sw_if_index)])
+        route_10_0_0_3.add_vpp_config()
+
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip4(self.pg0, "10.0.0.3")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg2.get_capture(257)
+        self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [42])
+        rx = self.pg3.get_capture(257)
+        self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [43])
+
+        #
+        # An an IP multicast route via the tunnel
+        # A (*,G).
+        # one accepting interface, pg0, 1 forwarding interface via the tunnel
+        #
+        route_232_1_1_1 = VppIpMRoute(
+            self,
+            "0.0.0.0",
+            "232.1.1.1", 32,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [VppMRoutePath(self.pg0.sw_if_index,
+                           MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             VppMRoutePath(mpls_tun._sw_if_index,
+                           MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)])
+        route_232_1_1_1.add_vpp_config()
+
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip4(self.pg0, "232.1.1.1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg2.get_capture(257)
+        self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [42])
+        rx = self.pg3.get_capture(257)
+        self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [43])
+
+    def test_mcast_tail(self):
+        """ MPLS Multicast Tail """
+
+        #
+        # Add a multicast route that will forward the traffic
+        # post-disposition
+        #
+        route_232_1_1_1 = VppIpMRoute(
+            self,
+            "0.0.0.0",
+            "232.1.1.1", 32,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            table_id=1,
+            paths=[VppMRoutePath(self.pg1.sw_if_index,
+                                 MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)])
+        route_232_1_1_1.add_vpp_config()
+
+        #
+        # An interface receive label that maps traffic to RX on interface
+        # pg1
+        # by injecting the packet in on pg0, which is in table 0
+        # doing an rpf-id  and matching a route in table 1
+        # if the packet egresses, then we must have matched the route in
+        # table 1
+        #
+        route_34_eos = VppMplsRoute(self, 34, 1,
+                                    [VppRoutePath("0.0.0.0",
+                                                  self.pg1.sw_if_index,
+                                                  nh_table_id=1,
+                                                  rpf_id=55)],
+                                    is_multicast=1)
+
+        route_34_eos.add_vpp_config()
+
+        #
+        # Drop due to interface lookup miss
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_labelled_ip4(self.pg0, [34],
+                                             dst_ip="232.1.1.1", n=1)
+        self.send_and_assert_no_replies(self.pg0, tx, "RPF-ID drop none")
+
+        #
+        # set the RPF-ID of the enrtry to match the input packet's
+        #
+        route_232_1_1_1.update_rpf_id(55)
+
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_labelled_ip4(self.pg0, [34],
+                                             dst_ip="232.1.1.1", n=257)
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg1.get_capture(257)
+        self.verify_capture_ip4(self.pg1, rx, tx)
+
+        #
+        # set the RPF-ID of the enrtry to not match the input packet's
+        #
+        route_232_1_1_1.update_rpf_id(56)
+        tx = self.create_stream_labelled_ip4(self.pg0, [34],
+                                             dst_ip="232.1.1.1")
+        self.send_and_assert_no_replies(self.pg0, tx, "RPF-ID drop 56")
+
 
 class TestMPLSDisabled(VppTestCase):
     """ MPLS disabled """
diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py
index faf5f801..d6146f28 100644
--- a/test/vpp_ip_route.py
+++ b/test/vpp_ip_route.py
@@ -55,15 +55,24 @@ class VppRoutePath(object):
             nh_table_id=0,
             labels=[],
             nh_via_label=MPLS_LABEL_INVALID,
-            is_ip6=0):
+            is_ip6=0,
+            rpf_id=0,
+            is_interface_rx=0):
         self.nh_itf = nh_sw_if_index
         self.nh_table_id = nh_table_id
         self.nh_via_label = nh_via_label
         self.nh_labels = labels
+        self.weight = 1
+        self.rpf_id = rpf_id
         if is_ip6:
             self.nh_addr = inet_pton(AF_INET6, nh_addr)
         else:
             self.nh_addr = inet_pton(AF_INET, nh_addr)
+        self.is_interface_rx = is_interface_rx
+        self.is_rpf_id = 0
+        if rpf_id != 0:
+            self.is_rpf_id = 1
+            self.nh_itf = rpf_id
 
 
 class VppMRoutePath(VppRoutePath):
@@ -176,13 +185,15 @@ class VppIpMRoute(VppObject):
     """
 
     def __init__(self, test, src_addr, grp_addr,
-                 grp_addr_len, e_flags, paths, table_id=0, is_ip6=0):
+                 grp_addr_len, e_flags, paths, table_id=0,
+                 rpf_id=0, is_ip6=0):
         self._test = test
         self.paths = paths
         self.grp_addr_len = grp_addr_len
         self.table_id = table_id
         self.e_flags = e_flags
         self.is_ip6 = is_ip6
+        self.rpf_id = rpf_id
 
         if is_ip6:
             self.grp_addr = inet_pton(AF_INET6, grp_addr)
@@ -199,6 +210,7 @@ class VppIpMRoute(VppObject):
                                               self.e_flags,
                                               path.nh_itf,
                                               path.nh_i_flags,
+                                              rpf_id=self.rpf_id,
                                               table_id=self.table_id,
                                               is_ipv6=self.is_ip6)
         self._test.registry.register(self, self._test.logger)
@@ -226,6 +238,18 @@ class VppIpMRoute(VppObject):
                                           table_id=self.table_id,
                                           is_ipv6=self.is_ip6)
 
+    def update_rpf_id(self, rpf_id):
+        self.rpf_id = rpf_id
+        self._test.vapi.ip_mroute_add_del(self.src_addr,
+                                          self.grp_addr,
+                                          self.grp_addr_len,
+                                          self.e_flags,
+                                          0xffffffff,
+                                          0,
+                                          rpf_id=self.rpf_id,
+                                          table_id=self.table_id,
+                                          is_ipv6=self.is_ip6)
+
     def update_path_flags(self, itf, flags):
         for path in self.paths:
             if path.nh_itf == itf:
@@ -342,14 +366,17 @@ class VppMplsRoute(VppObject):
     MPLS Route/LSP
     """
 
-    def __init__(self, test, local_label, eos_bit, paths, table_id=0):
+    def __init__(self, test, local_label, eos_bit, paths, table_id=0,
+                 is_multicast=0):
         self._test = test
         self.paths = paths
         self.local_label = local_label
         self.eos_bit = eos_bit
         self.table_id = table_id
+        self.is_multicast = is_multicast
 
     def add_vpp_config(self):
+        is_multipath = len(self.paths) > 1
         for path in self.paths:
             self._test.vapi.mpls_route_add_del(
                 self.local_label,
@@ -357,7 +384,11 @@ class VppMplsRoute(VppObject):
                 1,
                 path.nh_addr,
                 path.nh_itf,
+                is_multicast=self.is_multicast,
+                is_multipath=is_multipath,
                 table_id=self.table_id,
+                is_interface_rx=path.is_interface_rx,
+                is_rpf_id=path.is_rpf_id,
                 next_hop_out_label_stack=path.nh_labels,
                 next_hop_n_out_labels=len(
                     path.nh_labels),
@@ -372,6 +403,7 @@ class VppMplsRoute(VppObject):
                                                1,
                                                path.nh_addr,
                                                path.nh_itf,
+                                               is_rpf_id=path.is_rpf_id,
                                                table_id=self.table_id,
                                                is_add=0)
 
diff --git a/test/vpp_mpls_tunnel_interface.py b/test/vpp_mpls_tunnel_interface.py
new file mode 100644
index 00000000..f2001574
--- /dev/null
+++ b/test/vpp_mpls_tunnel_interface.py
@@ -0,0 +1,46 @@
+
+from vpp_interface import VppInterface
+from vpp_ip_route import VppRoutePath
+import socket
+
+
+class VppMPLSTunnelInterface(VppInterface):
+    """
+    VPP MPLS Tunnel interface
+    """
+
+    def __init__(self, test, paths, is_multicast=0):
+        """ Create MPLS Tunnel interface """
+        self._sw_if_index = 0
+        super(VppMPLSTunnelInterface, self).__init__(test)
+        self._test = test
+        self.t_paths = paths
+        self.is_multicast = is_multicast
+
+    def add_vpp_config(self):
+        self._sw_if_index = 0xffffffff
+        for path in self.t_paths:
+            reply = self.test.vapi.mpls_tunnel_add_del(
+                self._sw_if_index,
+                1,  # IPv4 next-hop
+                path.nh_addr,
+                path.nh_itf,
+                path.nh_table_id,
+                path.weight,
+                next_hop_out_label_stack=path.nh_labels,
+                next_hop_n_out_labels=len(path.nh_labels),
+                is_multicast=self.is_multicast)
+            self._sw_if_index = reply.sw_if_index
+
+    def remove_vpp_config(self):
+        for path in self.t_paths:
+            reply = self.test.vapi.mpls_tunnel_add_del(
+                self.sw_if_index,
+                1,  # IPv4 next-hop
+                path.nh_addr,
+                path.nh_itf,
+                path.nh_table_id,
+                path.weight,
+                next_hop_out_label_stack=path.nh_labels,
+                next_hop_n_out_labels=len(path.nh_labels),
+                is_add=0)
diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py
index e8025dff..ceb684b7 100644
--- a/test/vpp_papi_provider.py
+++ b/test/vpp_papi_provider.py
@@ -849,6 +849,9 @@ class VppPapiProvider(object):
             create_vrf_if_needed=0,
             is_resolve_host=0,
             is_resolve_attached=0,
+            is_interface_rx=0,
+            is_rpf_id=0,
+            is_multicast=0,
             is_add=1,
             is_drop=0,
             is_multipath=0,
@@ -872,6 +875,7 @@ class VppPapiProvider(object):
         :param is_local:  (Default value = 0)
         :param is_classify:  (Default value = 0)
         :param is_multipath:  (Default value = 0)
+        :param is_multicast:  (Default value = 0)
         :param is_resolve_host:  (Default value = 0)
         :param is_resolve_attached:  (Default value = 0)
         :param not_last:  (Default value = 0)
@@ -889,8 +893,11 @@ class VppPapiProvider(object):
              'mr_is_add': is_add,
              'mr_is_classify': is_classify,
              'mr_is_multipath': is_multipath,
+             'mr_is_multicast': is_multicast,
              'mr_is_resolve_host': is_resolve_host,
              'mr_is_resolve_attached': is_resolve_attached,
+             'mr_is_interface_rx': is_interface_rx,
+             'mr_is_rpf_id': is_rpf_id,
              'mr_next_hop_proto_is_ip4': next_hop_proto_is_ip4,
              'mr_next_hop_weight': next_hop_weight,
              'mr_next_hop': next_hop_address,
@@ -936,7 +943,8 @@ class VppPapiProvider(object):
             next_hop_via_label=MPLS_LABEL_INVALID,
             create_vrf_if_needed=0,
             is_add=1,
-            l2_only=0):
+            l2_only=0,
+            is_multicast=0):
         """
 
         :param dst_address_length:
@@ -956,8 +964,8 @@ class VppPapiProvider(object):
         :param is_multipath:  (Default value = 0)
         :param is_resolve_host:  (Default value = 0)
         :param is_resolve_attached:  (Default value = 0)
-        :param not_last:  (Default value = 0)
         :param next_hop_weight:  (Default value = 1)
+        :param is_multicast:  (Default value = 0)
 
         """
         return self.api(
@@ -965,6 +973,7 @@ class VppPapiProvider(object):
             {'mt_sw_if_index': tun_sw_if_index,
              'mt_is_add': is_add,
              'mt_l2_only': l2_only,
+             'mt_is_multicast': is_multicast,
              'mt_next_hop_proto_is_ip4': next_hop_proto_is_ip4,
              'mt_next_hop_weight': next_hop_weight,
              'mt_next_hop': next_hop_address,
@@ -1469,6 +1478,7 @@ class VppPapiProvider(object):
                           e_flags,
                           next_hop_sw_if_index,
                           i_flags,
+                          rpf_id=0,
                           table_id=0,
                           create_vrf_if_needed=0,
                           is_add=1,
@@ -1481,6 +1491,8 @@ class VppPapiProvider(object):
             {'next_hop_sw_if_index': next_hop_sw_if_index,
              'entry_flags': e_flags,
              'itf_flags': i_flags,
+             'table_id': table_id,
+             'rpf_id': rpf_id,
              'create_vrf_if_needed': create_vrf_if_needed,
              'is_add': is_add,
              'is_ipv6': is_ipv6,
-- 
cgit 1.2.3-korg


From 994dab40555427792aff0f75bd970dfaae8163ee Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Tue, 18 Apr 2017 12:56:45 -0700
Subject: Memory leak in load-balance maps

Change-Id: Iec67ae1232e346d5e0000e0b4c997fdc31865bc6
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet/dpo/load_balance_map.c | 13 ++++++++++---
 src/vnet/fib/fib_test.c         | 12 ++++++++++--
 2 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/dpo/load_balance_map.c b/src/vnet/dpo/load_balance_map.c
index 70ce1bf7..4e27e5db 100644
--- a/src/vnet/dpo/load_balance_map.c
+++ b/src/vnet/dpo/load_balance_map.c
@@ -415,6 +415,14 @@ load_balance_map_init (load_balance_map_t *lbm,
     return (lbm);
 }
 
+static void
+load_balance_map_destroy (load_balance_map_t *lbm)
+{
+    vec_free(lbm->lbm_paths);
+    vec_free(lbm->lbm_buckets);
+    pool_put(load_balance_map_pool, lbm);
+}
+
 index_t
 load_balance_map_add_or_lock (u32 n_buckets,
                               u32 sum_of_weights,
@@ -434,6 +442,7 @@ load_balance_map_add_or_lock (u32 n_buckets,
     else
     {
         lbm = load_balance_map_get(lbmi);
+        load_balance_map_destroy(tmp);
     }
 
     lbm->lbm_locks++;
@@ -468,9 +477,7 @@ load_balance_map_unlock (index_t lbmi)
     if (0 == lbm->lbm_locks)
     {
         load_balance_map_db_remove(lbm);
-        vec_free(lbm->lbm_paths);
-        vec_free(lbm->lbm_buckets);
-        pool_put(load_balance_map_pool, lbm);
+        load_balance_map_destroy(lbm);
     }
 }
 
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index c58dc5a1..cbb5640a 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -717,6 +717,7 @@ fib_test_v4 (void)
     const load_balance_t *lb;
     test_main_t *tm;
     u32 fib_index;
+    int lb_count;
     int ii;
 
     /* via 10.10.10.1 */
@@ -730,6 +731,9 @@ fib_test_v4 (void)
 
     tm = &test_main;
 
+    /* record the nubmer of load-balances in use before we start */
+    lb_count = pool_elts(load_balance_pool);
+
     /* Find or create FIB table 11 */
     fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11);
 
@@ -3154,7 +3158,7 @@ fib_test_v4 (void)
              "LB maps's bucket 1 is %d",
              lbm->lbm_buckets[1]);
 
-    load_balance_map_unlock(lb->lb_map);
+    load_balance_map_unlock(lbmi);
 
     /*
      * add it back. again 
@@ -3249,7 +3253,7 @@ fib_test_v4 (void)
                  "LB Map for 200.200.200.200/32 at %d is %d",
                  ii, lbm->lbm_buckets[ii]);
     }
-
+    load_balance_map_unlock(lbmi);
 
     /*
      * tidy up
@@ -3850,6 +3854,10 @@ fib_test_v4 (void)
     	     fib_entry_pool_size());
     FIB_TEST((ENBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d",
     	     pool_elts(fib_urpf_list_pool));
+    FIB_TEST((0 == pool_elts(load_balance_map_pool)), "LB-map pool size is %d",
+             pool_elts(load_balance_map_pool));
+    FIB_TEST((lb_count == pool_elts(load_balance_pool)), "LB pool size is %d",
+             pool_elts(load_balance_pool));
 
     return 0;
 }
-- 
cgit 1.2.3-korg


From f12a83f54ff2239d70494d577af3e1bb253692e1 Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Tue, 18 Apr 2017 09:09:40 -0700
Subject: Improve Load-Balance MAPs

- only build them for popular path-lists (where popular means more than 64 children)
   the reason to have a map is to improve convergence speed for recursive prefixes - if there are only a few this technique is not needed
- only build them when there is at least one path that has recursive constraints, i.e. a path that can 'fail' in a PIC scenario.
- Use the MAPS in the switch path.
- PIC test cases for functionality (not convergence performance)

Change-Id: I70705444c8469d22b07ae34be82cfb6a01358e10
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet/dpo/load_balance.c     |   3 +-
 src/vnet/dpo/load_balance_map.h |  31 ++++
 src/vnet/fib/fib_entry_src.c    |  15 +-
 src/vnet/fib/fib_entry_src_rr.c |   2 +-
 src/vnet/fib/fib_path.c         |   6 +-
 src/vnet/fib/fib_path.h         |   2 +-
 src/vnet/fib/fib_path_list.c    |  73 ++++++--
 src/vnet/fib/fib_path_list.h    |  24 +--
 src/vnet/fib/fib_table.c        |  12 +-
 src/vnet/fib/fib_test.c         | 116 +++++++++++--
 src/vnet/fib/fib_walk.c         | 122 +++++++++++---
 src/vnet/fib/fib_walk.h         |   3 +
 src/vnet/ip/ip4_forward.c       | 137 ++++++++-------
 src/vnet/ip/ip6_forward.c       | 104 ++++++++----
 src/vnet/ip/ip6_neighbor.c      |   2 +-
 src/vnet/mpls/mpls_lookup.c     | 118 ++++++++-----
 test/test_mpls.py               | 359 ++++++++++++++++++++++++++++++++++++++++
 test/vpp_ip_route.py            |  53 ++++--
 18 files changed, 966 insertions(+), 216 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index 6b0eda0e..af054f1c 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -118,7 +118,8 @@ load_balance_format (index_t lbi,
     buckets = load_balance_get_buckets(lb);
 
     s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
-    s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets);
+    s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
+    s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
     s = format(s, "uRPF:%d ", lb->lb_urpf);
     s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
     if (0 != via.packets)
diff --git a/src/vnet/dpo/load_balance_map.h b/src/vnet/dpo/load_balance_map.h
index 454bf4b3..237f24b0 100644
--- a/src/vnet/dpo/load_balance_map.h
+++ b/src/vnet/dpo/load_balance_map.h
@@ -73,6 +73,37 @@ load_balance_map_get (index_t lbmi)
     return (pool_elt_at_index(load_balance_map_pool, lbmi));
 }
 
+static inline u16
+load_balance_map_translate (index_t lbmi,
+                            u16 bucket)
+{
+    load_balance_map_t*lbm;
+
+    lbm = load_balance_map_get(lbmi);
+
+    return (lbm->lbm_buckets[bucket]);
+}
+
+static inline const dpo_id_t *
+load_balance_get_fwd_bucket (const load_balance_t *lb,
+                             u16 bucket)
+{
+    ASSERT(bucket < lb->lb_n_buckets);
+
+    if (INDEX_INVALID != lb->lb_map)
+    {
+        bucket = load_balance_map_translate(lb->lb_map, bucket);
+    }
+
+    if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb)))
+    {
+	return (&lb->lb_buckets_inline[bucket]);
+    }
+    else
+    {
+	return (&lb->lb_buckets[bucket]);
+    }
+}
 
 extern void load_balance_map_module_init(void);
 
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index a700282e..fd80497c 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -192,7 +192,7 @@ typedef struct fib_entry_src_collect_forwarding_ctx_t_
     const fib_entry_t *fib_entry;
     const fib_entry_src_t *esrc;
     fib_forward_chain_type_t fct;
-    int is_recursive;
+    int n_recursive_constrained;
 } fib_entry_src_collect_forwarding_ctx_t;
 
 /**
@@ -203,10 +203,11 @@ load_balance_flags_t
 fib_entry_calc_lb_flags (fib_entry_src_collect_forwarding_ctx_t *ctx)
 {
     /**
-     * We'll use a LB map is the path-list has recursive paths.
+     * We'll use a LB map if the path-list has multiple recursive paths.
      * recursive paths implies BGP, and hence scale.
      */
-    if (ctx->is_recursive)
+    if (ctx->n_recursive_constrained > 1 &&
+        fib_path_list_is_popular(ctx->esrc->fes_pl))
     {
         return (LOAD_BALANCE_FLAG_USES_MAP);
     }
@@ -282,9 +283,9 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
         return (!0);
     }
 
-    if (fib_path_is_recursive(path_index))
+    if (fib_path_is_recursive_constrained(path_index))
     {
-        ctx->is_recursive = 1;
+        ctx->n_recursive_constrained += 1;
     }
 
     /*
@@ -397,7 +398,7 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
         .esrc = esrc,
         .fib_entry = fib_entry,
         .next_hops = NULL,
-        .is_recursive = 0,
+        .n_recursive_constrained = 0,
         .fct = fct,
     };
 
@@ -409,7 +410,7 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
     vec_validate(ctx.next_hops, fib_path_list_get_n_paths(esrc->fes_pl));
     vec_reset_length(ctx.next_hops);
 
-    lb_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto);
+    lb_proto = fib_forw_chain_type_to_dpo_proto(fct);
 
     fib_path_list_walk(esrc->fes_pl,
                        fib_entry_src_collect_forwarding,
diff --git a/src/vnet/fib/fib_entry_src_rr.c b/src/vnet/fib/fib_entry_src_rr.c
index ff15c54e..c145aaa2 100644
--- a/src/vnet/fib/fib_entry_src_rr.c
+++ b/src/vnet/fib/fib_entry_src_rr.c
@@ -103,7 +103,7 @@ fib_entry_src_rr_activate (fib_entry_src_t *src,
 	fib_entry_cover_track(cover, fib_entry_get_index(fib_entry));
 
     /*
-     * if the ocver is attached then install an attached-host path
+     * if the cover is attached then install an attached-host path
      * (like an adj-fib). Otherwise inherit the forwarding from the cover
      */
     if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover))
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 70c87905..889317fd 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -2025,13 +2025,15 @@ fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
 }
 
 int
-fib_path_is_recursive (fib_node_index_t path_index)
+fib_path_is_recursive_constrained (fib_node_index_t path_index)
 {
     fib_path_t *path;
 
     path = fib_path_get(path_index);
 
-    return (FIB_PATH_TYPE_RECURSIVE == path->fp_type);
+    return ((FIB_PATH_TYPE_RECURSIVE == path->fp_type) &&
+            ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED) ||
+             (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)));
 }
 
 int
diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h
index 334be6f5..b6bf1e4f 100644
--- a/src/vnet/fib/fib_path.h
+++ b/src/vnet/fib/fib_path.h
@@ -144,7 +144,7 @@ extern fib_node_index_t fib_path_copy(fib_node_index_t path_index,
 				      fib_node_index_t path_list_index);
 extern int fib_path_resolve(fib_node_index_t path_index);
 extern int fib_path_is_resolved(fib_node_index_t path_index);
-extern int fib_path_is_recursive(fib_node_index_t path_index);
+extern int fib_path_is_recursive_constrained(fib_node_index_t path_index);
 extern int fib_path_is_exclusive(fib_node_index_t path_index);
 extern int fib_path_is_deag(fib_node_index_t path_index);
 extern int fib_path_is_looped(fib_node_index_t path_index);
diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c
index ea6565dd..64917f95 100644
--- a/src/vnet/fib/fib_path_list.c
+++ b/src/vnet/fib/fib_path_list.c
@@ -25,6 +25,16 @@
 #include <vnet/fib/fib_walk.h>
 #include <vnet/fib/fib_urpf_list.h>
 
+/**
+ * The magic number of child entries that make a path-list popular.
+ * There's a trade-off here between convergnece and forwarding speed.
+ * Popular path-lists generate load-balance maps for the entires that
+ * use them. If the map is present there is a switch path cost to indirect
+ * through the map - this indirection provides the fast convergence - so
+ * without the map convergence is slower.
+ */
+#define FIB_PATH_LIST_POPULAR 64
+
 /**
  * FIB path-list
  * A representation of the list/set of path trough which a prefix is reachable
@@ -454,14 +464,7 @@ fib_path_list_back_walk (fib_node_index_t path_list_index,
     /*
      * propagate the backwalk further
      */
-    if (32 >= fib_node_list_get_size(path_list->fpl_node.fn_children))
-    {
-        /*
-         * only a few children. continue the walk synchronously
-         */
-	fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx);
-    }
-    else
+    if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR)
     {
         /*
          * many children. schedule a async walk
@@ -471,6 +474,13 @@ fib_path_list_back_walk (fib_node_index_t path_list_index,
                        FIB_WALK_PRIORITY_LOW,
                        ctx);
     }
+    else
+    {
+        /*
+         * only a few children. continue the walk synchronously
+         */
+	fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx);
+    }
 }
 
 /*
@@ -625,6 +635,16 @@ fib_path_list_is_looped (fib_node_index_t path_list_index)
     return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED);
 }
 
+int
+fib_path_list_is_popular (fib_node_index_t path_list_index)
+{
+    fib_path_list_t *path_list;
+
+    path_list = fib_path_list_get(path_list_index);
+
+    return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR);
+}
+
 static fib_path_list_flags_t
 fib_path_list_flags_fixup (fib_path_list_flags_t flags)
 {
@@ -807,6 +827,7 @@ fib_path_list_path_add (fib_node_index_t path_list_index,
          */
 	if (0 == fib_path_cmp(new_path_index, *orig_path_index))
         {
+            fib_path_destroy(new_path_index);
             return (*orig_path_index);
         }
     }
@@ -1173,10 +1194,38 @@ fib_path_list_child_add (fib_node_index_t path_list_index,
 			 fib_node_type_t child_type,
 			 fib_node_index_t child_index)
 {
-    return (fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
-                               path_list_index,
-                               child_type,
-                               child_index));
+    u32 sibling;
+
+    sibling = fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
+                                 path_list_index,
+                                 child_type,
+                                 child_index);
+
+    if (FIB_PATH_LIST_POPULAR == fib_node_get_n_children(FIB_NODE_TYPE_PATH_LIST,
+                                                         path_list_index))
+    {
+        /*
+         * Set the popular flag on the path-list once we pass the magic
+         * threshold. then walk children to update.
+         * We don't undo this action. The rational being that the number
+         * of entries using this prefix is large enough such that it is a
+         * non-trival amount of effort to converge them. If we get into the
+         * situation where we are adding and removing entries such that we
+         * flip-flop over the threshold, then this non-trivial work is added
+         * to each of those routes adds/deletes - not a situation we want.
+         */
+        fib_node_back_walk_ctx_t ctx = {
+            .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+        };
+        fib_path_list_t *path_list;
+
+        path_list = fib_path_list_get(path_list_index);
+        path_list->fpl_flags |= FIB_PATH_LIST_FLAG_POPULAR;
+
+	fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, &ctx);
+    }
+
+    return (sibling);
 }
 
 void
diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h
index 9d246211..376cb726 100644
--- a/src/vnet/fib/fib_path_list.h
+++ b/src/vnet/fib/fib_path_list.h
@@ -38,11 +38,6 @@ typedef enum fib_path_list_attribute_t_ {
      * be searched for each route update.
      */
     FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST,
-    /**
-     * Indexed means the path-list keeps a hash table of all paths for
-     * fast lookup. The lookup result is the fib_node_index of the path.
-     */
-    FIB_PATH_LIST_ATTRIBUTE_INDEXED,
     /**
      * explicit drop path-list. Used when the entry source needs to 
      * force a drop, despite the fact the path info is present.
@@ -65,6 +60,12 @@ typedef enum fib_path_list_attribute_t_ {
      * looped path-list. one path looped implies the whole list is
      */
     FIB_PATH_LIST_ATTRIBUTE_LOOPED,
+    /**
+     * a popular path-ist is one that is shared amongst many entries.
+     * Path list become popular as they gain more children, but they
+     * don't become unpopular as they lose them.
+     */
+    FIB_PATH_LIST_ATTRIBUTE_POPULAR,
     /**
      * no uRPF - do not generate unicast RPF list for this path-list
      */
@@ -72,30 +73,30 @@ typedef enum fib_path_list_attribute_t_ {
     /**
      * Marher. Add new flags before this one, and then update it.
      */
-    FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_LOOPED,
+    FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_NO_URPF,
 } fib_path_list_attribute_t;
 
 typedef enum fib_path_list_flags_t_ {
     FIB_PATH_LIST_FLAG_NONE      = 0,
     FIB_PATH_LIST_FLAG_SHARED    = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED),
-    FIB_PATH_LIST_FLAG_INDEXED    = (1 << FIB_PATH_LIST_ATTRIBUTE_INDEXED),
     FIB_PATH_LIST_FLAG_DROP      = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP),
     FIB_PATH_LIST_FLAG_LOCAL     = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL),
     FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE),
     FIB_PATH_LIST_FLAG_RESOLVED  = (1 << FIB_PATH_LIST_ATTRIBUTE_RESOLVED),
     FIB_PATH_LIST_FLAG_LOOPED    = (1 << FIB_PATH_LIST_ATTRIBUTE_LOOPED),
+    FIB_PATH_LIST_FLAG_POPULAR   = (1 << FIB_PATH_LIST_ATTRIBUTE_POPULAR),
     FIB_PATH_LIST_FLAG_NO_URPF   = (1 << FIB_PATH_LIST_ATTRIBUTE_NO_URPF),
 } fib_path_list_flags_t;
 
 #define FIB_PATH_LIST_ATTRIBUTES {       		 \
     [FIB_PATH_LIST_ATTRIBUTE_SHARED]    = "shared",	 \
-    [FIB_PATH_LIST_ATTRIBUTE_INDEXED]    = "indexed",	 \
     [FIB_PATH_LIST_ATTRIBUTE_RESOLVED]  = "resolved",	 \
     [FIB_PATH_LIST_ATTRIBUTE_DROP]      = "drop",	 \
     [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive",   \
-    [FIB_PATH_LIST_ATTRIBUTE_LOCAL]     = "local",      \
-    [FIB_PATH_LIST_ATTRIBUTE_LOOPED]     = "looped",	 \
-    [FIB_PATH_LIST_ATTRIBUTE_NO_URPF]     = "no-uRPF",	 \
+    [FIB_PATH_LIST_ATTRIBUTE_LOCAL]     = "local",       \
+    [FIB_PATH_LIST_ATTRIBUTE_LOOPED]    = "looped",	 \
+    [FIB_PATH_LIST_ATTRIBUTE_POPULAR]   = "popular",	 \
+    [FIB_PATH_LIST_ATTRIBUTE_NO_URPF]   = "no-uRPF",	 \
 }
 
 #define FOR_EACH_PATH_LIST_ATTRIBUTE(_item)		\
@@ -148,6 +149,7 @@ extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index,
 					       fib_node_index_t **entry_indicies);
 extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index);
 extern int fib_path_list_is_looped(fib_node_index_t path_list_index);
+extern int fib_path_list_is_popular(fib_node_index_t path_list_index);
 extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index);
 extern u8 * fib_path_list_format(fib_node_index_t pl_index,
 				 u8 * s);
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 0938ce9b..ff428049 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -608,11 +608,19 @@ fib_table_entry_path_remove2 (u32 fib_index,
 	fib_entry_src_flag_t src_flag;
         int was_sourced;
 
-	/*
+        /*
+         * if it's not sourced, then there's nowt to remove
+         */
+        was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+        if (!was_sourced)
+        {
+            return;
+        }
+
+        /*
 	 * don't nobody go nowhere
 	 */
 	fib_entry_lock(fib_entry_index);
-        was_sourced = fib_entry_is_sourced(fib_entry_index, source);
 
         for (ii = 0; ii < vec_len(rpath); ii++)
         {
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index cbb5640a..d3bdfa35 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -729,6 +729,9 @@ fib_test_v4 (void)
 	.ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
     };
 
+    FIB_TEST((0 == pool_elts(load_balance_map_pool)), "LB-map pool size is %d",
+    	     pool_elts(load_balance_map_pool));
+
     tm = &test_main;
 
     /* record the nubmer of load-balances in use before we start */
@@ -3090,6 +3093,43 @@ fib_test_v4 (void)
 			     NULL,
 			     FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
 
+    /*
+     * add a bunch load more entries using this path combo so that we get
+     * an LB-map created.
+     */
+#define N_P 128
+    fib_prefix_t bgp_78s[N_P];
+    for (ii = 0; ii < N_P; ii++)
+    {
+        bgp_78s[ii].fp_len = 32;
+        bgp_78s[ii].fp_proto = FIB_PROTOCOL_IP4;
+        bgp_78s[ii].fp_addr.ip4.as_u32 = clib_host_to_net_u32(0x4e000000+ii);
+
+        
+        fib_table_entry_path_add(fib_index,
+                                 &bgp_78s[ii],
+                                 FIB_SOURCE_API,
+                                 FIB_ENTRY_FLAG_NONE,
+                                 FIB_PROTOCOL_IP4,
+                                 &pfx_1_1_1_3_s_32.fp_addr,
+                                 ~0,
+                                 fib_index,
+                                 1,
+                                 NULL,
+                                 FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+        fib_table_entry_path_add(fib_index,
+                                 &bgp_78s[ii],
+                                 FIB_SOURCE_API,
+                                 FIB_ENTRY_FLAG_NONE,
+                                 FIB_PROTOCOL_IP4,
+                                 &nh_1_1_1_1,
+                                 ~0,
+                                 fib_index,
+                                 1,
+                                 NULL,
+                                 FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+    }
+
     fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
     dpo = fib_entry_contribute_ip_forwarding(fei);
 
@@ -3138,6 +3178,9 @@ fib_test_v4 (void)
 				1,
 				FIB_ROUTE_PATH_FLAG_NONE);
 
+    /* suspend so the update walk kicks int */
+    vlib_process_suspend(vlib_get_main(), 1e-5);
+
     fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
     FIB_TEST(!dpo_cmp(dpo, fib_entry_contribute_ip_forwarding(fei)),
 	     "post PIC 200.200.200.200/32 was inplace modified");
@@ -3175,6 +3218,9 @@ fib_test_v4 (void)
 			     NULL,
 			     FIB_ROUTE_PATH_FLAG_NONE);
 
+    /* suspend so the update walk kicks in */
+    vlib_process_suspend(vlib_get_main(), 1e-5);
+
     FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket_i(lb, 0)),
 	     "post PIC recovery adj for 200.200.200.200/32 is recursive "
 	     "via adj for 1.1.1.1");
@@ -3201,6 +3247,20 @@ fib_test_v4 (void)
 			     1,
 			     NULL,
 			     FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+    for (ii = 0; ii < N_P; ii++)
+    {
+        fib_table_entry_path_add(fib_index,
+                                 &bgp_78s[ii],
+			     FIB_SOURCE_API,
+			     FIB_ENTRY_FLAG_NONE,
+			     FIB_PROTOCOL_IP4,
+			     &pfx_1_1_1_2_s_32.fp_addr,
+			     ~0,
+			     fib_index,
+			     1,
+			     NULL,
+			     FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+    }
 
     fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
     dpo = fib_entry_contribute_ip_forwarding(fei);
@@ -3233,6 +3293,8 @@ fib_test_v4 (void)
 				~0,
 				1,
 				FIB_ROUTE_PATH_FLAG_NONE);
+    /* suspend so the update walk kicks int */
+    vlib_process_suspend(vlib_get_main(), 1e-5);
 
     fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
     dpo = fib_entry_contribute_ip_forwarding(fei);
@@ -3270,6 +3332,16 @@ fib_test_v4 (void)
                              NULL,
                              FIB_ROUTE_PATH_FLAG_NONE);
 
+    for (ii = 0; ii < N_P; ii++)
+    {
+        fib_table_entry_delete(fib_index,
+                               &bgp_78s[ii],
+                               FIB_SOURCE_API);
+        FIB_TEST((FIB_NODE_INDEX_INVALID ==
+                  fib_table_lookup_exact_match(fib_index, &bgp_78s[ii])),
+                 "%U removed",
+                 format_fib_prefix, &bgp_78s[ii]);
+    }
     fib_table_entry_path_remove(fib_index,
                                 &bgp_200_pfx,
                                 FIB_SOURCE_API,
@@ -3303,6 +3375,8 @@ fib_test_v4 (void)
     fib_table_entry_delete(fib_index,
 			   &pfx_1_1_1_0_s_28,
 			   FIB_SOURCE_API);
+    /* suspend so the update walk kicks int */
+    vlib_process_suspend(vlib_get_main(), 1e-5);
     FIB_TEST((FIB_NODE_INDEX_INVALID ==
 	      fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28)),
 	     "1.1.1.1/28 removed");
@@ -3821,7 +3895,7 @@ fib_test_v4 (void)
     /*
      * -2 entries and -2 non-shared path-list
      */
-    FIB_TEST((0  == fib_path_list_db_size()),   "path list DB population:%d",
+    FIB_TEST((0 == fib_path_list_db_size()),   "path list DB population:%d",
     	     fib_path_list_db_size());
     FIB_TEST((PNBR == fib_path_list_pool_size()), "path list pool size is %d",
     	     fib_path_list_pool_size());
@@ -3855,7 +3929,7 @@ fib_test_v4 (void)
     FIB_TEST((ENBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d",
     	     pool_elts(fib_urpf_list_pool));
     FIB_TEST((0 == pool_elts(load_balance_map_pool)), "LB-map pool size is %d",
-             pool_elts(load_balance_map_pool));
+    	     pool_elts(load_balance_map_pool));
     FIB_TEST((lb_count == pool_elts(load_balance_pool)), "LB pool size is %d",
              pool_elts(load_balance_pool));
 
@@ -5900,6 +5974,12 @@ fib_test_label (void)
 	    .adj = DPO_PROTO_IP4,
 	},
     };
+    fib_test_lb_bucket_t mpls_bucket_drop = {
+	.type = FT_LB_SPECIAL,
+	.special = {
+	    .adj = DPO_PROTO_MPLS,
+	},
+    };
 
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_1_s_32,
@@ -5932,9 +6012,9 @@ fib_test_label (void)
 			   &pfx_24001_neos);
     FIB_TEST(fib_test_validate_entry(fei, 
 				     FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
-				      1,
-				      &bucket_drop),
-	     "24001/eos LB 1 buckets via: DROP");
+                                     1,
+                                     &mpls_bucket_drop),
+	     "24001/neos LB 1 buckets via: DROP");
 
     /*
      * add back the path with the valid label
@@ -7707,6 +7787,12 @@ lfib_test (void)
      * A recursive via a label that does not exist
      */
     fib_test_lb_bucket_t bucket_drop = {
+	.type = FT_LB_SPECIAL,
+	.special = {
+	    .adj = DPO_PROTO_IP4,
+	},
+    };
+    fib_test_lb_bucket_t mpls_bucket_drop = {
 	.type = FT_LB_SPECIAL,
 	.special = {
 	    .adj = DPO_PROTO_MPLS,
@@ -7735,7 +7821,12 @@ lfib_test (void)
 				     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
 				     1,
 				     &bucket_drop),
-	     "2.2.2.4/32 LB 1 buckets via: ip4-DROP");
+	     "1200/neos LB 1 buckets via: ip4-DROP");
+    FIB_TEST(fib_test_validate_entry(lfe,
+				     FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+				     1,
+				     &mpls_bucket_drop),
+	     "1200/neos LB 1 buckets via: mpls-DROP");
 
     fib_table_entry_delete(fib_index, &pfx_2_2_2_4_s_32, FIB_SOURCE_API);
 
@@ -7940,18 +8031,19 @@ fib_test (vlib_main_t * vm,
     }
     else
     {
-        /*
-         * These walk UT aren't run as part of the full suite, since the
-         * fib-walk process must be disabled in order for the tests to work
-         *
-         * fib_test_walk();
-         */
 	res += fib_test_v4();
 	res += fib_test_v6();
 	res += fib_test_ae();
 	res += fib_test_bfd();
 	res += fib_test_label();
 	res += lfib_test();
+
+        /*
+         * fib-walk process must be disabled in order for the walk tests to work
+         */
+        fib_walk_process_disable();
+        res += fib_test_walk();
+        fib_walk_process_enable();
     }
 
     if (res)
diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c
index 938f7b8c..c570476d 100644
--- a/src/vnet/fib/fib_walk.c
+++ b/src/vnet/fib/fib_walk.c
@@ -95,11 +95,6 @@ typedef struct fib_walk_t_
  */
 static fib_walk_t *fib_walk_pool;
 
-/**
- * @brief There's only one event type sent to the walk process
- */
-#define FIB_WALK_EVENT 0
-
 /**
  * Statistics maintained per-walk queue
  */
@@ -240,10 +235,13 @@ fib_walk_queue_get_front (fib_walk_priority_t prio)
 }
 
 static void
-fib_walk_destroy (fib_walk_t *fwalk)
+fib_walk_destroy (index_t fwi)
 {
+    fib_walk_t *fwalk;
     u32 bucket, ii;
 
+    fwalk = fib_walk_get(fwi);
+
     if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling)
     {
 	fib_node_list_elt_remove(fwalk->fw_prio_sibling);
@@ -252,6 +250,12 @@ fib_walk_destroy (fib_walk_t *fwalk)
 			  fwalk->fw_parent.fnp_index,
 			  fwalk->fw_dep_sibling);
 
+    /*
+     * refetch the walk object. More walks could have been spawned as a result
+     * of releasing the lock on the parent.
+     */
+    fwalk = fib_walk_get(fwi);
+
     /*
      * add the stats to the continuous histogram collection.
      */
@@ -466,8 +470,7 @@ fib_walk_process_queues (vlib_main_t * vm,
 	     */
 	    if (FIB_WALK_ADVANCE_MORE != rc)
 	    {
-		fwalk = fib_walk_get(fwi);
-		fib_walk_destroy(fwalk);
+                fib_walk_destroy(fwi);
 		fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_COMPLETED]++;
 	    }
 	    else
@@ -510,6 +513,16 @@ that_will_do_for_now:
     return (fib_walk_sleep_duration[sleep]);
 }
 
+/**
+ * Events sent to the FIB walk process
+ */
+typedef enum fib_walk_process_event_t_
+{
+    FIB_WALK_PROCESS_EVENT_DATA,
+    FIB_WALK_PROCESS_EVENT_ENABLE,
+    FIB_WALK_PROCESS_EVENT_DISABLE,
+} fib_walk_process_event;
+
 /**
  * @brief The 'fib-walk' process's main loop.
  */
@@ -518,22 +531,47 @@ fib_walk_process (vlib_main_t * vm,
 		  vlib_node_runtime_t * node,
 		  vlib_frame_t * f)
 {
+    uword event_type, *event_data = 0;
     f64 sleep_time;
+    int enabled;
 
+    enabled = 1;
     sleep_time = fib_walk_sleep_duration[FIB_WALK_SHORT_SLEEP];
 
     while (1)
     {
-	vlib_process_wait_for_event_or_clock(vm, sleep_time);
+        /*
+         * the feature to disable/enable this walk process is only
+         * for testing purposes
+         */
+        if (enabled)
+        {
+            vlib_process_wait_for_event_or_clock(vm, sleep_time);
+        }
+        else
+        {
+            vlib_process_wait_for_event(vm);
+        }
 
-	/*
-	 * there may be lots of event queued between the processes,
-	 * but the walks we want to schedule are in the priority queues,
-	 * so we ignore the process events.
-	 */
-	vlib_process_get_events(vm, NULL);
+        event_type = vlib_process_get_events(vm, &event_data);
+        vec_reset_length(event_data);
+
+        switch (event_type)
+	{
+	case FIB_WALK_PROCESS_EVENT_ENABLE:
+            enabled = 1;
+            break;
+	case FIB_WALK_PROCESS_EVENT_DISABLE:
+            enabled = 0;
+            break;
+	default:
+            break;
+	}
 
-	sleep_time = fib_walk_process_queues(vm, quota);
+        if (enabled)
+        {
+            sleep_time = fib_walk_process_queues(vm, quota);
+        }
     }
 
     /*
@@ -610,8 +648,8 @@ fib_walk_prio_queue_enquue (fib_walk_priority_t prio,
      */
     vlib_process_signal_event(vlib_get_main(),
 			      fib_walk_process_node.index,
-			      FIB_WALK_EVENT,
-			      FIB_WALK_EVENT);
+			      FIB_WALK_PROCESS_EVENT_DATA,
+			      0);
 
     return (sibling);
 }
@@ -742,7 +780,7 @@ fib_walk_sync (fib_node_type_t parent_type,
 	    ASSERT(FIB_NODE_INDEX_INVALID != merged_walk.fnp_index);
 	    ASSERT(FIB_NODE_TYPE_WALK == merged_walk.fnp_type);
 
-	    fib_walk_destroy(fwalk);
+	    fib_walk_destroy(fwi);
 
 	    fwi = merged_walk.fnp_index;
 	    fwalk = fib_walk_get(fwi);
@@ -774,7 +812,7 @@ fib_walk_sync (fib_node_type_t parent_type,
 
     if (NULL != fwalk)
     {
-	fib_walk_destroy(fwalk);
+	fib_walk_destroy(fwi);
     }
 }
 
@@ -1106,3 +1144,47 @@ VLIB_CLI_COMMAND (fib_walk_clear_command, static) = {
     .short_help = "clear fib walk",
     .function = fib_walk_clear,
 };
+
+void
+fib_walk_process_enable (void)
+{
+    vlib_process_signal_event(vlib_get_main(),
+                              fib_walk_process_node.index,
+                              FIB_WALK_PROCESS_EVENT_ENABLE,
+                              0);
+}
+
+void
+fib_walk_process_disable (void)
+{
+    vlib_process_signal_event(vlib_get_main(),
+                              fib_walk_process_node.index,
+                              FIB_WALK_PROCESS_EVENT_DISABLE,
+                              0);
+}
+
+static clib_error_t *
+fib_walk_process_enable_disable (vlib_main_t * vm,
+                                 unformat_input_t * input,
+                                 vlib_cli_command_t * cmd)
+{
+    if (unformat (input, "enable"))
+    {
+        fib_walk_process_enable();
+    }
+    else if (unformat (input, "disable"))
+    {
+        fib_walk_process_disable();
+    }
+    else
+    {
+        return clib_error_return(0, "choose enable or disable");
+    }
+    return (NULL);
+}
+
+VLIB_CLI_COMMAND (fib_walk_process_command, static) = {
+    .path = "test fib-walk-process",
+    .short_help = "test fib-walk-process [enable|disable]",
+    .function = fib_walk_process_enable_disable,
+};
diff --git a/src/vnet/fib/fib_walk.h b/src/vnet/fib/fib_walk.h
index 7413d8a2..fdf2f10c 100644
--- a/src/vnet/fib/fib_walk.h
+++ b/src/vnet/fib/fib_walk.h
@@ -54,5 +54,8 @@ extern void fib_walk_sync(fib_node_type_t parent_type,
 
 extern u8* format_fib_walk_priority(u8 *s, va_list ap);
 
+extern void fib_walk_process_enable(void);
+extern void fib_walk_process_disable(void);
+
 #endif
 
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 0f562037..697d2169 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -49,6 +49,7 @@
 #include <vnet/fib/fib_urpf_list.h>	/* for FIB uRPF check */
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/classify_dpo.h>
 #include <vnet/mfib/mfib_table.h>	/* for mFIB table and entry creation */
 
@@ -89,7 +90,6 @@ ip4_lookup_inline (vlib_main_t * vm,
 	{
 	  vlib_buffer_t *p0, *p1, *p2, *p3;
 	  ip4_header_t *ip0, *ip1, *ip2, *ip3;
-	  __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
 	  ip_lookup_next_t next0, next1, next2, next3;
 	  const load_balance_t *lb0, *lb1, *lb2, *lb3;
 	  ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
@@ -188,11 +188,6 @@ ip4_lookup_inline (vlib_main_t * vm,
 	      leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
 	    }
 
-	  tcp0 = (void *) (ip0 + 1);
-	  tcp1 = (void *) (ip1 + 1);
-	  tcp2 = (void *) (ip2 + 1);
-	  tcp3 = (void *) (ip3 + 1);
-
 	  if (!lookup_for_responses_to_locally_received_packets)
 	    {
 	      leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
@@ -230,6 +225,15 @@ ip4_lookup_inline (vlib_main_t * vm,
 	  lb2 = load_balance_get (lb_index2);
 	  lb3 = load_balance_get (lb_index3);
 
+	  ASSERT (lb0->lb_n_buckets > 0);
+	  ASSERT (is_pow2 (lb0->lb_n_buckets));
+	  ASSERT (lb1->lb_n_buckets > 0);
+	  ASSERT (is_pow2 (lb1->lb_n_buckets));
+	  ASSERT (lb2->lb_n_buckets > 0);
+	  ASSERT (is_pow2 (lb2->lb_n_buckets));
+	  ASSERT (lb3->lb_n_buckets > 0);
+	  ASSERT (is_pow2 (lb3->lb_n_buckets));
+
 	  /* Use flow hash to compute multipath adjacency. */
 	  hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
 	  hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
@@ -240,47 +244,57 @@ ip4_lookup_inline (vlib_main_t * vm,
 	      flow_hash_config0 = lb0->lb_hash_config;
 	      hash_c0 = vnet_buffer (p0)->ip.flow_hash =
 		ip4_compute_flow_hash (ip0, flow_hash_config0);
+	      dpo0 =
+		load_balance_get_fwd_bucket (lb0,
+					     (hash_c0 &
+					      (lb0->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo0 = load_balance_get_bucket_i (lb0, 0);
 	    }
 	  if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
 	    {
 	      flow_hash_config1 = lb1->lb_hash_config;
 	      hash_c1 = vnet_buffer (p1)->ip.flow_hash =
 		ip4_compute_flow_hash (ip1, flow_hash_config1);
+	      dpo1 =
+		load_balance_get_fwd_bucket (lb1,
+					     (hash_c1 &
+					      (lb1->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo1 = load_balance_get_bucket_i (lb1, 0);
 	    }
 	  if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
 	    {
 	      flow_hash_config2 = lb2->lb_hash_config;
 	      hash_c2 = vnet_buffer (p2)->ip.flow_hash =
 		ip4_compute_flow_hash (ip2, flow_hash_config2);
+	      dpo2 =
+		load_balance_get_fwd_bucket (lb2,
+					     (hash_c2 &
+					      (lb2->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo2 = load_balance_get_bucket_i (lb2, 0);
 	    }
 	  if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
 	    {
 	      flow_hash_config3 = lb3->lb_hash_config;
 	      hash_c3 = vnet_buffer (p3)->ip.flow_hash =
 		ip4_compute_flow_hash (ip3, flow_hash_config3);
+	      dpo3 =
+		load_balance_get_fwd_bucket (lb3,
+					     (hash_c3 &
+					      (lb3->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo3 = load_balance_get_bucket_i (lb3, 0);
 	    }
-
-	  ASSERT (lb0->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb0->lb_n_buckets));
-	  ASSERT (lb1->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb1->lb_n_buckets));
-	  ASSERT (lb2->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb2->lb_n_buckets));
-	  ASSERT (lb3->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb3->lb_n_buckets));
-
-	  dpo0 = load_balance_get_bucket_i (lb0,
-					    (hash_c0 &
-					     (lb0->lb_n_buckets_minus_1)));
-	  dpo1 = load_balance_get_bucket_i (lb1,
-					    (hash_c1 &
-					     (lb1->lb_n_buckets_minus_1)));
-	  dpo2 = load_balance_get_bucket_i (lb2,
-					    (hash_c2 &
-					     (lb2->lb_n_buckets_minus_1)));
-	  dpo3 = load_balance_get_bucket_i (lb3,
-					    (hash_c3 &
-					     (lb3->lb_n_buckets_minus_1)));
 
 	  next0 = dpo0->dpoi_next_node;
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
@@ -293,20 +307,16 @@ ip4_lookup_inline (vlib_main_t * vm,
 
 	  vlib_increment_combined_counter
 	    (cm, thread_index, lb_index0, 1,
-	     vlib_buffer_length_in_chain (vm, p0)
-	     + sizeof (ethernet_header_t));
+	     vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
 	    (cm, thread_index, lb_index1, 1,
-	     vlib_buffer_length_in_chain (vm, p1)
-	     + sizeof (ethernet_header_t));
+	     vlib_buffer_length_in_chain (vm, p1));
 	  vlib_increment_combined_counter
 	    (cm, thread_index, lb_index2, 1,
-	     vlib_buffer_length_in_chain (vm, p2)
-	     + sizeof (ethernet_header_t));
+	     vlib_buffer_length_in_chain (vm, p2));
 	  vlib_increment_combined_counter
 	    (cm, thread_index, lb_index3, 1,
-	     vlib_buffer_length_in_chain (vm, p3)
-	     + sizeof (ethernet_header_t));
+	     vlib_buffer_length_in_chain (vm, p3));
 
 	  vlib_validate_buffer_enqueue_x4 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -318,7 +328,6 @@ ip4_lookup_inline (vlib_main_t * vm,
 	{
 	  vlib_buffer_t *p0;
 	  ip4_header_t *ip0;
-	  __attribute__ ((unused)) tcp_header_t *tcp0;
 	  ip_lookup_next_t next0;
 	  const load_balance_t *lb0;
 	  ip4_fib_mtrie_t *mtrie0;
@@ -352,8 +361,6 @@ ip4_lookup_inline (vlib_main_t * vm,
 	      leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
 	    }
 
-	  tcp0 = (void *) (ip0 + 1);
-
 	  if (!lookup_for_responses_to_locally_received_packets)
 	    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
 
@@ -371,6 +378,9 @@ ip4_lookup_inline (vlib_main_t * vm,
 	  ASSERT (lbi0);
 	  lb0 = load_balance_get (lbi0);
 
+	  ASSERT (lb0->lb_n_buckets > 0);
+	  ASSERT (is_pow2 (lb0->lb_n_buckets));
+
 	  /* Use flow hash to compute multipath adjacency. */
 	  hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
 	  if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
@@ -379,20 +389,22 @@ ip4_lookup_inline (vlib_main_t * vm,
 
 	      hash_c0 = vnet_buffer (p0)->ip.flow_hash =
 		ip4_compute_flow_hash (ip0, flow_hash_config0);
+	      dpo0 =
+		load_balance_get_fwd_bucket (lb0,
+					     (hash_c0 &
+					      (lb0->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo0 = load_balance_get_bucket_i (lb0, 0);
 	    }
-
-	  ASSERT (lb0->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb0->lb_n_buckets));
-
-	  dpo0 = load_balance_get_bucket_i (lb0,
-					    (hash_c0 &
-					     (lb0->lb_n_buckets_minus_1)));
 
 	  next0 = dpo0->dpoi_next_node;
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
-	  vlib_increment_combined_counter
-	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	  vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
+					   vlib_buffer_length_in_chain (vm,
+									p0));
 
 	  from += 1;
 	  to_next += 1;
@@ -555,6 +567,12 @@ ip4_load_balance (vlib_main_t * vm,
 		  hc0 = vnet_buffer (p0)->ip.flow_hash =
 		    ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
 		}
+	      dpo0 = load_balance_get_fwd_bucket
+		(lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo0 = load_balance_get_bucket_i (lb0, 0);
 	    }
 	  if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
 	    {
@@ -568,14 +586,13 @@ ip4_load_balance (vlib_main_t * vm,
 		  hc1 = vnet_buffer (p1)->ip.flow_hash =
 		    ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
 		}
+	      dpo1 = load_balance_get_fwd_bucket
+		(lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo1 = load_balance_get_bucket_i (lb1, 0);
 	    }
-
-	  dpo0 =
-	    load_balance_get_bucket_i (lb0,
-				       hc0 & (lb0->lb_n_buckets_minus_1));
-	  dpo1 =
-	    load_balance_get_bucket_i (lb1,
-				       hc1 & (lb1->lb_n_buckets_minus_1));
 
 	  next0 = dpo0->dpoi_next_node;
 	  next1 = dpo1->dpoi_next_node;
@@ -629,11 +646,13 @@ ip4_load_balance (vlib_main_t * vm,
 		  hc0 = vnet_buffer (p0)->ip.flow_hash =
 		    ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
 		}
+	      dpo0 = load_balance_get_fwd_bucket
+		(lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo0 = load_balance_get_bucket_i (lb0, 0);
 	    }
-
-	  dpo0 =
-	    load_balance_get_bucket_i (lb0,
-				       hc0 & (lb0->lb_n_buckets_minus_1));
 
 	  next0 = dpo0->dpoi_next_node;
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 98bfd4d1..3bc07d0e 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -45,7 +45,7 @@
 #include <vnet/fib/fib_urpf_list.h>	/* for FIB uRPF check */
 #include <vnet/fib/ip6_fib.h>
 #include <vnet/mfib/ip6_mfib.h>
-#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/classify_dpo.h>
 
 #include <vppinfra/bihash_template.c>
@@ -138,6 +138,10 @@ ip6_lookup_inline (vlib_main_t * vm,
 
 	  lb0 = load_balance_get (lbi0);
 	  lb1 = load_balance_get (lbi1);
+	  ASSERT (lb0->lb_n_buckets > 0);
+	  ASSERT (lb1->lb_n_buckets > 0);
+	  ASSERT (is_pow2 (lb0->lb_n_buckets));
+	  ASSERT (is_pow2 (lb1->lb_n_buckets));
 
 	  vnet_buffer (p0)->ip.flow_hash = vnet_buffer (p1)->ip.flow_hash = 0;
 
@@ -146,25 +150,29 @@ ip6_lookup_inline (vlib_main_t * vm,
 	      flow_hash_config0 = lb0->lb_hash_config;
 	      vnet_buffer (p0)->ip.flow_hash =
 		ip6_compute_flow_hash (ip0, flow_hash_config0);
+	      dpo0 =
+		load_balance_get_fwd_bucket (lb0,
+					     (vnet_buffer (p0)->ip.flow_hash &
+					      (lb0->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo0 = load_balance_get_bucket_i (lb0, 0);
 	    }
 	  if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
 	    {
 	      flow_hash_config1 = lb1->lb_hash_config;
 	      vnet_buffer (p1)->ip.flow_hash =
 		ip6_compute_flow_hash (ip1, flow_hash_config1);
+	      dpo1 =
+		load_balance_get_fwd_bucket (lb1,
+					     (vnet_buffer (p1)->ip.flow_hash &
+					      (lb1->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo1 = load_balance_get_bucket_i (lb1, 0);
 	    }
-
-	  ASSERT (lb0->lb_n_buckets > 0);
-	  ASSERT (lb1->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb0->lb_n_buckets));
-	  ASSERT (is_pow2 (lb1->lb_n_buckets));
-	  dpo0 = load_balance_get_bucket_i (lb0,
-					    (vnet_buffer (p0)->ip.flow_hash &
-					     lb0->lb_n_buckets_minus_1));
-	  dpo1 = load_balance_get_bucket_i (lb1,
-					    (vnet_buffer (p1)->ip.flow_hash &
-					     lb1->lb_n_buckets_minus_1));
-
 	  next0 = dpo0->dpoi_next_node;
 	  next1 = dpo1->dpoi_next_node;
 
@@ -266,16 +274,24 @@ ip6_lookup_inline (vlib_main_t * vm,
 	  lb0 = load_balance_get (lbi0);
 
 	  vnet_buffer (p0)->ip.flow_hash = 0;
+	  ASSERT (lb0->lb_n_buckets > 0);
+	  ASSERT (is_pow2 (lb0->lb_n_buckets));
 
 	  if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
 	    {
 	      flow_hash_config0 = lb0->lb_hash_config;
 	      vnet_buffer (p0)->ip.flow_hash =
 		ip6_compute_flow_hash (ip0, flow_hash_config0);
+	      dpo0 =
+		load_balance_get_fwd_bucket (lb0,
+					     (vnet_buffer (p0)->ip.flow_hash &
+					      (lb0->lb_n_buckets_minus_1)));
+	    }
+	  else
+	    {
+	      dpo0 = load_balance_get_bucket_i (lb0, 0);
 	    }
 
-	  ASSERT (lb0->lb_n_buckets > 0);
-	  ASSERT (is_pow2 (lb0->lb_n_buckets));
 	  dpo0 = load_balance_get_bucket_i (lb0,
 					    (vnet_buffer (p0)->ip.flow_hash &
 					     lb0->lb_n_buckets_minus_1));
@@ -337,10 +353,18 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
     {
       fib_node_index_t fei;
 
-      fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP6, NULL,	/* No next-hop address */
-					     sw_if_index, ~0,	// invalid FIB index
-					     1, NULL,	// no label stack
-					     FIB_ROUTE_PATH_FLAG_NONE);
+      fei = fib_table_entry_update_one_path (fib_index,
+					     &pfx,
+					     FIB_SOURCE_INTERFACE,
+					     (FIB_ENTRY_FLAG_CONNECTED |
+					      FIB_ENTRY_FLAG_ATTACHED),
+					     FIB_PROTOCOL_IP6,
+					     /* No next-hop address */
+					     NULL, sw_if_index,
+					     /* invalid FIB index */
+					     ~0, 1,
+					     /* no label stack */
+					     NULL, FIB_ROUTE_PATH_FLAG_NONE);
       a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
     }
 
@@ -366,7 +390,13 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
 	}
     }
 
-  fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP6, &pfx.fp_addr, sw_if_index, ~0,	// invalid FIB index
+  fib_table_entry_update_one_path (fib_index, &pfx,
+				   FIB_SOURCE_INTERFACE,
+				   (FIB_ENTRY_FLAG_CONNECTED |
+				    FIB_ENTRY_FLAG_LOCAL),
+				   FIB_PROTOCOL_IP6,
+				   &pfx.fp_addr,
+				   sw_if_index, ~0,
 				   1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
 }
 
@@ -780,6 +810,14 @@ ip6_load_balance (vlib_main_t * vm,
 		  hc0 = vnet_buffer (p0)->ip.flow_hash =
 		    ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
 		}
+	      dpo0 =
+		load_balance_get_fwd_bucket (lb0,
+					     (hc0 &
+					      lb0->lb_n_buckets_minus_1));
+	    }
+	  else
+	    {
+	      dpo0 = load_balance_get_bucket_i (lb0, 0);
 	    }
 	  if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
 	    {
@@ -793,14 +831,15 @@ ip6_load_balance (vlib_main_t * vm,
 		  hc1 = vnet_buffer (p1)->ip.flow_hash =
 		    ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
 		}
+	      dpo1 =
+		load_balance_get_fwd_bucket (lb1,
+					     (hc1 &
+					      lb1->lb_n_buckets_minus_1));
+	    }
+	  else
+	    {
+	      dpo1 = load_balance_get_bucket_i (lb1, 0);
 	    }
-
-	  dpo0 =
-	    load_balance_get_bucket_i (lb0,
-				       hc0 & (lb0->lb_n_buckets_minus_1));
-	  dpo1 =
-	    load_balance_get_bucket_i (lb1,
-				       hc1 & (lb1->lb_n_buckets_minus_1));
 
 	  next0 = dpo0->dpoi_next_node;
 	  next1 = dpo1->dpoi_next_node;
@@ -869,10 +908,15 @@ ip6_load_balance (vlib_main_t * vm,
 		  hc0 = vnet_buffer (p0)->ip.flow_hash =
 		    ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
 		}
+	      dpo0 =
+		load_balance_get_fwd_bucket (lb0,
+					     (hc0 &
+					      lb0->lb_n_buckets_minus_1));
+	    }
+	  else
+	    {
+	      dpo0 = load_balance_get_bucket_i (lb0, 0);
 	    }
-	  dpo0 =
-	    load_balance_get_bucket_i (lb0,
-				       hc0 & (lb0->lb_n_buckets_minus_1));
 
 	  next0 = dpo0->dpoi_next_node;
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index 31182770..ee80ee3d 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -630,7 +630,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
 	  n->fib_entry_index =
 	    fib_table_entry_update_one_path (fib_index, &pfx,
 					     FIB_SOURCE_ADJ,
-					     FIB_ENTRY_FLAG_NONE,
+					     FIB_ENTRY_FLAG_ATTACHED,
 					     FIB_PROTOCOL_IP6, &pfx.fp_addr,
 					     n->key.sw_if_index, ~0, 1, NULL,
 					     FIB_ROUTE_PATH_FLAG_NONE);
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 3c6be7e8..4b8a3eef 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -19,7 +19,7 @@
 #include <vnet/pg/pg.h>
 #include <vnet/mpls/mpls.h>
 #include <vnet/fib/mpls_fib.h>
-#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/replicate_dpo.h>
 
 /**
@@ -47,7 +47,7 @@ format_mpls_lookup_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
 
-  s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %d"
+  s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %#x "
               "label %d eos %d", 
               t->next_index, t->lfib_index, t->lb_index, t->hash,
               vnet_mpls_uc_get_label(
@@ -64,8 +64,15 @@ always_inline u32
 mpls_compute_flow_hash (const mpls_unicast_header_t * hdr,
                         flow_hash_config_t flow_hash_config)
 {
-    // FIXME
-    return (vnet_mpls_uc_get_label(hdr->label_exp_s_ttl));
+    /*
+     * improve this to include:
+     *  - all labels in the stack.
+     *  - recognise entropy labels.
+     *
+     * We need to byte swap so we use the numerical value. i.e. an odd label
+     * leads to an odd bucket. ass opposed to a label above and below value X.
+     */
+    return (vnet_mpls_uc_get_label(clib_net_to_host_u32(hdr->label_exp_s_ttl)));
 }
 
 static inline uword
@@ -179,17 +186,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb0 = load_balance_get(lbi0);
+              ASSERT (lb0->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb0->lb_n_buckets));
 
               if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
               {
                   hash_c0 = vnet_buffer (b0)->ip.flow_hash =
                       mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+                  dpo0 = load_balance_get_fwd_bucket
+                      (lb0,
+                       (hash_c0 & (lb0->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo0 = load_balance_get_bucket_i (lb0, 0);
               }
-              ASSERT (lb0->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb0->lb_n_buckets));
-              dpo0 = load_balance_get_bucket_i(lb0,
-                                               (hash_c0 &
-                                                (lb0->lb_n_buckets_minus_1)));
               next0 = dpo0->dpoi_next_node;
 
               vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
@@ -207,17 +218,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb1 = load_balance_get(lbi1);
+              ASSERT (lb1->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb1->lb_n_buckets));
 
               if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
               {
                   hash_c1 = vnet_buffer (b1)->ip.flow_hash =
                       mpls_compute_flow_hash(h1, lb1->lb_hash_config);
+                  dpo1 = load_balance_get_fwd_bucket
+                      (lb1,
+                       (hash_c1 & (lb1->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo1 = load_balance_get_bucket_i (lb1, 0);
               }
-              ASSERT (lb1->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb1->lb_n_buckets));
-              dpo1 = load_balance_get_bucket_i(lb1,
-                                               (hash_c1 &
-                                                (lb1->lb_n_buckets_minus_1)));
               next1 = dpo1->dpoi_next_node;
 
               vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
@@ -235,17 +250,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb2 = load_balance_get(lbi2);
+              ASSERT (lb2->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb2->lb_n_buckets));
 
               if (PREDICT_FALSE(lb2->lb_n_buckets > 1))
               {
                   hash_c2 = vnet_buffer (b2)->ip.flow_hash =
                       mpls_compute_flow_hash(h2, lb2->lb_hash_config);
+                  dpo2 = load_balance_get_fwd_bucket
+                      (lb2,
+                       (hash_c2 & (lb2->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo2 = load_balance_get_bucket_i (lb2, 0);
               }
-              ASSERT (lb2->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb2->lb_n_buckets));
-              dpo2 = load_balance_get_bucket_i(lb2,
-                                               (hash_c2 &
-                                                (lb2->lb_n_buckets_minus_1)));
               next2 = dpo2->dpoi_next_node;
 
               vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
@@ -263,17 +282,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb3 = load_balance_get(lbi3);
+              ASSERT (lb3->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb3->lb_n_buckets));
 
               if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
               {
                   hash_c3 = vnet_buffer (b3)->ip.flow_hash =
                       mpls_compute_flow_hash(h3, lb3->lb_hash_config);
+                  dpo3 = load_balance_get_fwd_bucket
+                      (lb3,
+                       (hash_c3 & (lb3->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo3 = load_balance_get_bucket_i (lb3, 0);
               }
-              ASSERT (lb3->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb3->lb_n_buckets));
-              dpo3 = load_balance_get_bucket_i(lb3,
-                                               (hash_c3 &
-                                                (lb3->lb_n_buckets_minus_1)));
               next3 = dpo3->dpoi_next_node;
 
               vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
@@ -393,20 +416,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb0 = load_balance_get(lbi0);
+              ASSERT (lb0->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb0->lb_n_buckets));
 
               if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
               {
                   hash_c0 = vnet_buffer (b0)->ip.flow_hash =
                       mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+                  dpo0 = load_balance_get_fwd_bucket
+                      (lb0,
+                       (hash_c0 & (lb0->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo0 = load_balance_get_bucket_i (lb0, 0);
               }
-
-              ASSERT (lb0->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb0->lb_n_buckets));
-
-              dpo0 = load_balance_get_bucket_i(lb0,
-                                               (hash_c0 &
-                                                (lb0->lb_n_buckets_minus_1)));
-
               next0 = dpo0->dpoi_next_node;
               vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
@@ -467,7 +491,7 @@ VLIB_REGISTER_NODE (mpls_lookup_node, static) = {
   .n_errors = MPLS_N_ERROR,
   .error_strings = mpls_error_strings,
 
-  .sibling_of = "ip4-lookup",
+  .sibling_of = "mpls-load-balance",
 
   .format_buffer = format_mpls_header,
   .format_trace = format_mpls_lookup_trace,
@@ -574,6 +598,11 @@ mpls_load_balance (vlib_main_t * vm,
               {
                   hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
               }
+              dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+          }
+          else
+          {
+              dpo0 = load_balance_get_bucket_i (lb0, 0);
           }
           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
           {
@@ -585,10 +614,12 @@ mpls_load_balance (vlib_main_t * vm,
               {
                   hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1);
               }
+              dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1));
+          }
+          else
+          {
+              dpo1 = load_balance_get_bucket_i (lb1, 0);
           }
-
-          dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
-          dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1));
 
           next0 = dpo0->dpoi_next_node;
           next1 = dpo1->dpoi_next_node;
@@ -650,9 +681,12 @@ mpls_load_balance (vlib_main_t * vm,
               {
                   hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
               }
+               dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+          }
+          else
+          {
+              dpo0 = load_balance_get_bucket_i (lb0, 0);
           }
-
-          dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
 
           next0 = dpo0->dpoi_next_node;
           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
@@ -676,9 +710,13 @@ VLIB_REGISTER_NODE (mpls_load_balance_node) = {
   .function = mpls_load_balance,
   .name = "mpls-load-balance",
   .vector_size = sizeof (u32),
-  .sibling_of = "mpls-lookup",
-
   .format_trace = format_mpls_load_balance_trace,
+  .n_next_nodes = 1,
+  .next_nodes =
+  {
+      [0] = "mpls-drop",
+  },
+
 };
 
 VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance)
diff --git a/test/test_mpls.py b/test/test_mpls.py
index 700b7091..0ad1ee69 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -1054,5 +1054,364 @@ class TestMPLSDisabled(VppTestCase):
         self.send_and_assert_no_replies(self.pg1, tx, "IPv6 disabled")
 
 
+class TestMPLSPIC(VppTestCase):
+    """ MPLS PIC edge convergence """
+
+    def setUp(self):
+        super(TestMPLSPIC, self).setUp()
+
+        # create 2 pg interfaces
+        self.create_pg_interfaces(range(4))
+
+        # core links
+        self.pg0.admin_up()
+        self.pg0.config_ip4()
+        self.pg0.resolve_arp()
+        self.pg0.enable_mpls()
+        self.pg1.admin_up()
+        self.pg1.config_ip4()
+        self.pg1.resolve_arp()
+        self.pg1.enable_mpls()
+
+        # VRF (customer facing) link
+        self.pg2.admin_up()
+        self.pg2.set_table_ip4(1)
+        self.pg2.config_ip4()
+        self.pg2.resolve_arp()
+        self.pg2.set_table_ip6(1)
+        self.pg2.config_ip6()
+        self.pg2.resolve_ndp()
+        self.pg3.admin_up()
+        self.pg3.set_table_ip4(1)
+        self.pg3.config_ip4()
+        self.pg3.resolve_arp()
+        self.pg3.set_table_ip6(1)
+        self.pg3.config_ip6()
+        self.pg3.resolve_ndp()
+
+    def tearDown(self):
+        super(TestMPLSPIC, self).tearDown()
+        self.pg0.disable_mpls()
+        for i in self.pg_interfaces:
+            i.unconfig_ip4()
+            i.unconfig_ip6()
+            i.set_table_ip4(0)
+            i.set_table_ip6(0)
+            i.admin_down()
+
+    def test_mpls_ibgp_pic(self):
+        """ MPLS iBGP PIC edge convergence
+
+        1) setup many iBGP VPN routes via a pair of iBGP peers.
+        2) Check EMCP forwarding to these peers
+        3) withdraw the IGP route to one of these peers.
+        4) check forwarding continues to the remaining peer
+        """
+
+        #
+        # IGP+LDP core routes
+        #
+        core_10_0_0_45 = VppIpRoute(self, "10.0.0.45", 32,
+                                    [VppRoutePath(self.pg0.remote_ip4,
+                                                  self.pg0.sw_if_index,
+                                                  labels=[45])])
+        core_10_0_0_45.add_vpp_config()
+
+        core_10_0_0_46 = VppIpRoute(self, "10.0.0.46", 32,
+                                    [VppRoutePath(self.pg1.remote_ip4,
+                                                  self.pg1.sw_if_index,
+                                                  labels=[46])])
+        core_10_0_0_46.add_vpp_config()
+
+        #
+        # Lot's of VPN routes. We need more the 64 so VPP will build
+        # the fast convergence indirection
+        #
+        vpn_routes = []
+        pkts = []
+        for ii in range(64):
+            dst = "192.168.1.%d" % ii
+            vpn_routes.append(VppIpRoute(self, dst, 32,
+                                         [VppRoutePath("10.0.0.45",
+                                                       0xffffffff,
+                                                       labels=[145],
+                                                       is_resolve_host=1),
+                                          VppRoutePath("10.0.0.46",
+                                                       0xffffffff,
+                                                       labels=[146],
+                                                       is_resolve_host=1)],
+                                         table_id=1))
+            vpn_routes[ii].add_vpp_config()
+
+            pkts.append(Ether(dst=self.pg2.local_mac,
+                              src=self.pg2.remote_mac) /
+                        IP(src=self.pg2.remote_ip4, dst=dst) /
+                        UDP(sport=1234, dport=1234) /
+                        Raw('\xa5' * 100))
+
+        #
+        # Send the packet stream (one pkt to each VPN route)
+        #  - expect a 50-50 split of the traffic
+        #
+        self.pg2.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0._get_capture(1)
+        rx1 = self.pg1._get_capture(1)
+
+        # not testig the LB hashing algorithm so we're not concerned
+        # with the split ratio, just as long as neither is 0
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+        #
+        # use a test CLI command to stop the FIB walk process, this
+        # will prevent the FIB converging the VPN routes and thus allow
+        # us to probe the interim (psot-fail, pre-converge) state
+        #
+        self.vapi.ppcli("test fib-walk-process disable")
+
+        #
+        # Withdraw one of the IGP routes
+        #
+        core_10_0_0_46.remove_vpp_config()
+
+        #
+        # now all packets should be forwarded through the remaining peer
+        #
+        self.vapi.ppcli("clear trace")
+        self.pg2.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0.get_capture(len(pkts))
+
+        #
+        # enable the FIB walk process to converge the FIB
+        #
+        self.vapi.ppcli("test fib-walk-process enable")
+
+        #
+        # packets should still be forwarded through the remaining peer
+        #
+        self.pg2.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0.get_capture(64)
+
+        #
+        # Add the IGP route back and we return to load-balancing
+        #
+        core_10_0_0_46.add_vpp_config()
+
+        self.pg2.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0._get_capture(1)
+        rx1 = self.pg1._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+    def test_mpls_ebgp_pic(self):
+        """ MPLS eBGP PIC edge convergence
+
+        1) setup many eBGP VPN routes via a pair of eBGP peers
+        2) Check EMCP forwarding to these peers
+        3) withdraw one eBGP path - expect LB across remaining eBGP
+        """
+
+        #
+        # Lot's of VPN routes. We need more the 64 so VPP will build
+        # the fast convergence indirection
+        #
+        vpn_routes = []
+        vpn_bindings = []
+        pkts = []
+        for ii in range(64):
+            dst = "192.168.1.%d" % ii
+            local_label = 1600 + ii
+            vpn_routes.append(VppIpRoute(self, dst, 32,
+                                         [VppRoutePath(self.pg2.remote_ip4,
+                                                       0xffffffff,
+                                                       nh_table_id=1,
+                                                       is_resolve_attached=1),
+                                          VppRoutePath(self.pg3.remote_ip4,
+                                                       0xffffffff,
+                                                       nh_table_id=1,
+                                                       is_resolve_attached=1)],
+                                         table_id=1))
+            vpn_routes[ii].add_vpp_config()
+
+            vpn_bindings.append(VppMplsIpBind(self, local_label, dst, 32,
+                                              ip_table_id=1))
+            vpn_bindings[ii].add_vpp_config()
+
+            pkts.append(Ether(dst=self.pg0.local_mac,
+                              src=self.pg0.remote_mac) /
+                        MPLS(label=local_label, ttl=64) /
+                        IP(src=self.pg0.remote_ip4, dst=dst) /
+                        UDP(sport=1234, dport=1234) /
+                        Raw('\xa5' * 100))
+
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg2._get_capture(1)
+        rx1 = self.pg3._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+        #
+        # use a test CLI command to stop the FIB walk process, this
+        # will prevent the FIB converging the VPN routes and thus allow
+        # us to probe the interim (psot-fail, pre-converge) state
+        #
+        self.vapi.ppcli("test fib-walk-process disable")
+
+        #
+        # withdraw the connected prefix on the interface.
+        #
+        self.pg2.unconfig_ip4()
+
+        #
+        # now all packets should be forwarded through the remaining peer
+        #
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg3.get_capture(len(pkts))
+
+        #
+        # enable the FIB walk process to converge the FIB
+        #
+        self.vapi.ppcli("test fib-walk-process enable")
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg3.get_capture(len(pkts))
+
+        #
+        # put the connecteds back
+        #
+        self.pg2.config_ip4()
+
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg2._get_capture(1)
+        rx1 = self.pg3._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+    def test_mpls_v6_ebgp_pic(self):
+        """ MPLSv6 eBGP PIC edge convergence
+
+        1) setup many eBGP VPNv6 routes via a pair of eBGP peers
+        2) Check EMCP forwarding to these peers
+        3) withdraw one eBGP path - expect LB across remaining eBGP
+        """
+
+        #
+        # Lot's of VPN routes. We need more the 64 so VPP will build
+        # the fast convergence indirection
+        #
+        vpn_routes = []
+        vpn_bindings = []
+        pkts = []
+        for ii in range(64):
+            dst = "3000::%d" % ii
+            local_label = 1600 + ii
+            vpn_routes.append(VppIpRoute(self, dst, 128,
+                                         [VppRoutePath(self.pg2.remote_ip6,
+                                                       0xffffffff,
+                                                       nh_table_id=1,
+                                                       is_resolve_attached=1,
+                                                       is_ip6=1),
+                                          VppRoutePath(self.pg3.remote_ip6,
+                                                       0xffffffff,
+                                                       nh_table_id=1,
+                                                       is_ip6=1,
+                                                       is_resolve_attached=1)],
+                                         table_id=1,
+                                         is_ip6=1))
+            vpn_routes[ii].add_vpp_config()
+
+            vpn_bindings.append(VppMplsIpBind(self, local_label, dst, 128,
+                                              ip_table_id=1,
+                                              is_ip6=1))
+            vpn_bindings[ii].add_vpp_config()
+
+            pkts.append(Ether(dst=self.pg0.local_mac,
+                              src=self.pg0.remote_mac) /
+                        MPLS(label=local_label, ttl=64) /
+                        IPv6(src=self.pg0.remote_ip6, dst=dst) /
+                        UDP(sport=1234, dport=1234) /
+                        Raw('\xa5' * 100))
+
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg2._get_capture(1)
+        rx1 = self.pg3._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+        #
+        # use a test CLI command to stop the FIB walk process, this
+        # will prevent the FIB converging the VPN routes and thus allow
+        # us to probe the interim (psot-fail, pre-converge) state
+        #
+        self.vapi.ppcli("test fib-walk-process disable")
+
+        #
+        # withdraw the connected prefix on the interface.
+        # and shutdown the interface so the ND cache is flushed.
+        #
+        self.pg2.unconfig_ip6()
+        self.pg2.admin_down()
+
+        #
+        # now all packets should be forwarded through the remaining peer
+        #
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg3.get_capture(len(pkts))
+
+        #
+        # enable the FIB walk process to converge the FIB
+        #
+        self.vapi.ppcli("test fib-walk-process enable")
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg3.get_capture(len(pkts))
+
+        #
+        # put the connecteds back
+        #
+        self.pg2.admin_up()
+        self.pg2.config_ip6()
+
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg2._get_capture(1)
+        rx1 = self.pg3._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+
 if __name__ == '__main__':
     unittest.main(testRunner=VppTestRunner)
diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py
index d6146f28..b68e2105 100644
--- a/test/vpp_ip_route.py
+++ b/test/vpp_ip_route.py
@@ -57,7 +57,9 @@ class VppRoutePath(object):
             nh_via_label=MPLS_LABEL_INVALID,
             is_ip6=0,
             rpf_id=0,
-            is_interface_rx=0):
+            is_interface_rx=0,
+            is_resolve_host=0,
+            is_resolve_attached=0):
         self.nh_itf = nh_sw_if_index
         self.nh_table_id = nh_table_id
         self.nh_via_label = nh_via_label
@@ -68,6 +70,8 @@ class VppRoutePath(object):
             self.nh_addr = inet_pton(AF_INET6, nh_addr)
         else:
             self.nh_addr = inet_pton(AF_INET, nh_addr)
+        self.is_resolve_host = is_resolve_host
+        self.is_resolve_attached = is_resolve_attached
         self.is_interface_rx = is_interface_rx
         self.is_rpf_id = 0
         if rpf_id != 0:
@@ -136,7 +140,11 @@ class VppIpRoute(VppObject):
                     next_hop_n_out_labels=len(
                         path.nh_labels),
                     next_hop_via_label=path.nh_via_label,
-                    is_ipv6=self.is_ip6)
+                    next_hop_table_id=path.nh_table_id,
+                    is_ipv6=self.is_ip6,
+                    is_resolve_host=path.is_resolve_host,
+                    is_resolve_attached=path.is_resolve_attached,
+                    is_multipath=1 if len(self.paths) > 1 else 0)
         self._test.registry.register(self, self._test.logger)
 
     def remove_vpp_config(self):
@@ -154,13 +162,16 @@ class VppIpRoute(VppObject):
                 is_ipv6=self.is_ip6)
         else:
             for path in self.paths:
-                self._test.vapi.ip_add_del_route(self.dest_addr,
-                                                 self.dest_addr_len,
-                                                 path.nh_addr,
-                                                 path.nh_itf,
-                                                 table_id=self.table_id,
-                                                 is_add=0,
-                                                 is_ipv6=self.is_ip6)
+                self._test.vapi.ip_add_del_route(
+                    self.dest_addr,
+                    self.dest_addr_len,
+                    path.nh_addr,
+                    path.nh_itf,
+                    table_id=self.table_id,
+                    next_hop_table_id=path.nh_table_id,
+                    next_hop_via_label=path.nh_via_label,
+                    is_add=0,
+                    is_ipv6=self.is_ip6)
 
     def query_vpp_config(self):
         return find_route(self._test,
@@ -318,33 +329,41 @@ class VppMplsIpBind(VppObject):
     """
 
     def __init__(self, test, local_label, dest_addr, dest_addr_len,
-                 table_id=0, ip_table_id=0):
+                 table_id=0, ip_table_id=0, is_ip6=0):
         self._test = test
-        self.dest_addr = inet_pton(AF_INET, dest_addr)
         self.dest_addr_len = dest_addr_len
+        self.dest_addr = dest_addr
         self.local_label = local_label
         self.table_id = table_id
         self.ip_table_id = ip_table_id
+        self.is_ip6 = is_ip6
+        if is_ip6:
+            self.dest_addrn = inet_pton(AF_INET6, dest_addr)
+        else:
+            self.dest_addrn = inet_pton(AF_INET, dest_addr)
 
     def add_vpp_config(self):
         self._test.vapi.mpls_ip_bind_unbind(self.local_label,
-                                            self.dest_addr,
+                                            self.dest_addrn,
                                             self.dest_addr_len,
                                             table_id=self.table_id,
-                                            ip_table_id=self.ip_table_id)
+                                            ip_table_id=self.ip_table_id,
+                                            is_ip4=(self.is_ip6 == 0))
         self._test.registry.register(self, self._test.logger)
 
     def remove_vpp_config(self):
         self._test.vapi.mpls_ip_bind_unbind(self.local_label,
-                                            self.dest_addr,
+                                            self.dest_addrn,
                                             self.dest_addr_len,
-                                            is_bind=0)
+                                            table_id=self.table_id,
+                                            ip_table_id=self.ip_table_id,
+                                            is_bind=0,
+                                            is_ip4=(self.is_ip6 == 0))
 
     def query_vpp_config(self):
         dump = self._test.vapi.mpls_fib_dump()
         for e in dump:
             if self.local_label == e.label \
-               and self.eos_bit == e.eos_bit \
                and self.table_id == e.table_id:
                 return True
         return False
@@ -357,7 +376,7 @@ class VppMplsIpBind(VppObject):
                 % (self.table_id,
                    self.local_label,
                    self.ip_table_id,
-                   inet_ntop(AF_INET, self.dest_addr),
+                   self.dest_addr,
                    self.dest_addr_len))
 
 
-- 
cgit 1.2.3-korg


From 5d73eecd63018db69b10bf56adeec9cc5cf92790 Mon Sep 17 00:00:00 2001
From: Pablo Camarillo <pcamaril@cisco.com>
Date: Mon, 24 Apr 2017 17:51:56 +0200
Subject: First commit SR MPLS

Change-Id: I961685a2a0e4c314049444c64eb6ccf877c278dd
Signed-off-by: Pablo Camarillo <pcamaril@cisco.com>
---
 MAINTAINERS                                        |    6 +-
 doxygen/user_doc.md                                |    1 +
 .../srv6-sample-localsid/srv6_localsid_sample.h    |    4 +-
 .../srv6_sample_localsid_doc.md                    |    2 +-
 src/plugins/ioam/ip6/ioam_cache.h                  |    2 +-
 .../ioam/ip6/ioam_cache_tunnel_select_node.c       |    2 +-
 src/plugins/ioam/udp-ping/udp_ping_node.c          |    2 +-
 src/scripts/vnet/sr/left-linux-ping.sh             |    3 -
 src/scripts/vnet/sr/leftpeer.conf                  |   27 -
 src/scripts/vnet/sr/mcast                          |   58 -
 src/scripts/vnet/sr/right-linux-ping.sh            |    4 -
 src/scripts/vnet/sr/rightpeer.conf                 |   22 -
 src/scripts/vnet/sr/sr_mpls                        |   11 +
 src/scripts/vnet/sr/srlocal.sh                     |    4 -
 src/vnet.am                                        |   32 +-
 src/vnet/dpo/mpls_label_dpo.c                      |    2 +-
 src/vnet/fib/fib_entry.h                           |    8 +-
 src/vnet/sr/dir.dox                                |   25 -
 src/vnet/sr/ietf_draft_05.txt                      | 1564 ----------
 src/vnet/sr/sr.api                                 |  168 -
 src/vnet/sr/sr.c                                   |   57 -
 src/vnet/sr/sr.h                                   |  323 --
 src/vnet/sr/sr_api.c                               |  244 --
 src/vnet/sr/sr_doc.md                              |   55 -
 src/vnet/sr/sr_localsid.c                          | 1492 ---------
 src/vnet/sr/sr_localsid.md                         |   58 -
 src/vnet/sr/sr_packet.h                            |  159 -
 src/vnet/sr/sr_policy.md                           |   56 -
 src/vnet/sr/sr_policy_rewrite.c                    | 3227 --------------------
 src/vnet/sr/sr_steering.c                          |  573 ----
 src/vnet/sr/sr_steering.md                         |   11 -
 src/vnet/srmpls/dir.dox                            |   22 +
 src/vnet/srmpls/sr.h                               |  152 +
 src/vnet/srmpls/sr_doc.md                          |   87 +
 src/vnet/srmpls/sr_mpls_policy.c                   |  569 ++++
 src/vnet/srmpls/sr_mpls_steering.c                 |  453 +++
 src/vnet/srv6/dir.dox                              |   25 +
 src/vnet/srv6/ietf_draft_05.txt                    | 1564 ++++++++++
 src/vnet/srv6/sr.api                               |  168 +
 src/vnet/srv6/sr.c                                 |   57 +
 src/vnet/srv6/sr.h                                 |  325 ++
 src/vnet/srv6/sr_api.c                             |  244 ++
 src/vnet/srv6/sr_doc.md                            |   55 +
 src/vnet/srv6/sr_localsid.c                        | 1492 +++++++++
 src/vnet/srv6/sr_localsid.md                       |   58 +
 src/vnet/srv6/sr_packet.h                          |  159 +
 src/vnet/srv6/sr_policy.md                         |   56 +
 src/vnet/srv6/sr_policy_rewrite.c                  | 3227 ++++++++++++++++++++
 src/vnet/srv6/sr_steering.c                        |  573 ++++
 src/vnet/srv6/sr_steering.md                       |   11 +
 src/vnet/vnet_all_api_h.h                          |    2 +-
 src/vpp/api/api.c                                  |    2 +-
 src/vpp/api/custom_dump.c                          |    2 +-
 src/vpp/api/vpe.api                                |    2 +-
 54 files changed, 9349 insertions(+), 8158 deletions(-)
 delete mode 100755 src/scripts/vnet/sr/left-linux-ping.sh
 delete mode 100644 src/scripts/vnet/sr/leftpeer.conf
 delete mode 100644 src/scripts/vnet/sr/mcast
 delete mode 100755 src/scripts/vnet/sr/right-linux-ping.sh
 delete mode 100644 src/scripts/vnet/sr/rightpeer.conf
 create mode 100644 src/scripts/vnet/sr/sr_mpls
 delete mode 100755 src/scripts/vnet/sr/srlocal.sh
 delete mode 100755 src/vnet/sr/dir.dox
 delete mode 100755 src/vnet/sr/ietf_draft_05.txt
 delete mode 100644 src/vnet/sr/sr.api
 delete mode 100755 src/vnet/sr/sr.c
 delete mode 100755 src/vnet/sr/sr.h
 delete mode 100644 src/vnet/sr/sr_api.c
 delete mode 100644 src/vnet/sr/sr_doc.md
 delete mode 100755 src/vnet/sr/sr_localsid.c
 delete mode 100644 src/vnet/sr/sr_localsid.md
 delete mode 100755 src/vnet/sr/sr_packet.h
 delete mode 100644 src/vnet/sr/sr_policy.md
 delete mode 100755 src/vnet/sr/sr_policy_rewrite.c
 delete mode 100755 src/vnet/sr/sr_steering.c
 delete mode 100644 src/vnet/sr/sr_steering.md
 create mode 100755 src/vnet/srmpls/dir.dox
 create mode 100755 src/vnet/srmpls/sr.h
 create mode 100644 src/vnet/srmpls/sr_doc.md
 create mode 100755 src/vnet/srmpls/sr_mpls_policy.c
 create mode 100755 src/vnet/srmpls/sr_mpls_steering.c
 create mode 100755 src/vnet/srv6/dir.dox
 create mode 100755 src/vnet/srv6/ietf_draft_05.txt
 create mode 100644 src/vnet/srv6/sr.api
 create mode 100755 src/vnet/srv6/sr.c
 create mode 100755 src/vnet/srv6/sr.h
 create mode 100644 src/vnet/srv6/sr_api.c
 create mode 100644 src/vnet/srv6/sr_doc.md
 create mode 100755 src/vnet/srv6/sr_localsid.c
 create mode 100644 src/vnet/srv6/sr_localsid.md
 create mode 100755 src/vnet/srv6/sr_packet.h
 create mode 100644 src/vnet/srv6/sr_policy.md
 create mode 100755 src/vnet/srv6/sr_policy_rewrite.c
 create mode 100755 src/vnet/srv6/sr_steering.c
 create mode 100644 src/vnet/srv6/sr_steering.md

(limited to 'src/vnet/dpo')

diff --git a/MAINTAINERS b/MAINTAINERS
index bdc33abe..2f198319 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -76,9 +76,11 @@ VNET IPv4 and IPv6 LPM
 M:	Dave Barach <dave@barachs.net>
 F:	src/vnet/ip/
 
-VNET IPv6 Segment Routing
+VNET Segment Routing (IPv6 and MPLS)
 M:	Pablo Camarillo <pcamaril@cisco.com>
-F:	src/vnet/sr/
+F:	src/vnet/srv6/
+F:	src/vnet/srmpls/
+F:	src/examples/srv6-sample-localsid/
 
 VNET IPSec
 M:	Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
diff --git a/doxygen/user_doc.md b/doxygen/user_doc.md
index 29df6156..d052c53b 100644
--- a/doxygen/user_doc.md
+++ b/doxygen/user_doc.md
@@ -15,3 +15,4 @@ Several modules provide operational, dataplane-user focused documentation.
 - @subpage qos_doc
 - @subpage span_doc
 - @subpage srv6_doc
+- @subpage srmpls_doc
diff --git a/src/examples/srv6-sample-localsid/srv6_localsid_sample.h b/src/examples/srv6-sample-localsid/srv6_localsid_sample.h
index 474b5de2..ef74ea3e 100644
--- a/src/examples/srv6-sample-localsid/srv6_localsid_sample.h
+++ b/src/examples/srv6-sample-localsid/srv6_localsid_sample.h
@@ -17,8 +17,8 @@
 
 #include <vnet/vnet.h>
 #include <vnet/ip/ip.h>
-#include <vnet/sr/sr.h>
-#include <vnet/sr/sr_packet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/srv6/sr_packet.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
diff --git a/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md b/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md
index 78e91ab3..cd717db8 100644
--- a/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md
+++ b/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md
@@ -26,5 +26,5 @@ Notice that the plugin only 'defines' a new SRv6 LocalSID behavior, but the exis
 ## Graph node
 
 The current graph node uses the function 'end_srh_processing' to do the Segment Routing Endpoint behavior. Notice that it does not allow the cleanup of a Segment Routing header (as per the SRv6 behavior specs).
-This function is identical to the one found in /src/vnet/sr/sr_localsid.c
+This function is identical to the one found in /src/vnet/srv6/sr_localsid.c
 In case that by some other reason you want to do decapsulation, or SRH clean_up you can use the functions 'end_decaps_srh_processing' or 'end_psp_srh_processing' respectively.
diff --git a/src/plugins/ioam/ip6/ioam_cache.h b/src/plugins/ioam/ip6/ioam_cache.h
index 3f69fa72..e668ad7f 100644
--- a/src/plugins/ioam/ip6/ioam_cache.h
+++ b/src/plugins/ioam/ip6/ioam_cache.h
@@ -20,7 +20,7 @@
 #include <vnet/ip/ip_packet.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/ip6_packet.h>
-#include <vnet/sr/sr.h>
+#include <vnet/srv6/sr.h>
 
 #include <vppinfra/pool.h>
 #include <vppinfra/hash.h>
diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
index 0cf742c9..ca06607d 100644
--- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
+++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
@@ -43,7 +43,7 @@
 #include <vnet/pg/pg.h>
 #include <vppinfra/error.h>
 #include <vnet/ip/ip.h>
-#include <vnet/sr/sr.h>
+#include <vnet/srv6/sr.h>
 #include <ioam/ip6/ioam_cache.h>
 #include <vnet/ip/ip6_hop_by_hop.h>
 #include <vnet/ip/ip6_hop_by_hop_packet.h>
diff --git a/src/plugins/ioam/udp-ping/udp_ping_node.c b/src/plugins/ioam/udp-ping/udp_ping_node.c
index 84759b0f..e1a57955 100644
--- a/src/plugins/ioam/udp-ping/udp_ping_node.c
+++ b/src/plugins/ioam/udp-ping/udp_ping_node.c
@@ -25,7 +25,7 @@
 #include <ioam/udp-ping/udp_ping_packet.h>
 #include <ioam/udp-ping/udp_ping.h>
 #include <ioam/udp-ping/udp_ping_util.h>
-#include <vnet/sr/sr_packet.h>
+#include <vnet/srv6/sr_packet.h>
 
 typedef enum
 {
diff --git a/src/scripts/vnet/sr/left-linux-ping.sh b/src/scripts/vnet/sr/left-linux-ping.sh
deleted file mode 100755
index 55b83506..00000000
--- a/src/scripts/vnet/sr/left-linux-ping.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-ifconfig eth2 inet6 add db02::1/64
-route -A inet6 add db04::1/128 gw db02::2
diff --git a/src/scripts/vnet/sr/leftpeer.conf b/src/scripts/vnet/sr/leftpeer.conf
deleted file mode 100644
index 9591d968..00000000
--- a/src/scripts/vnet/sr/leftpeer.conf
+++ /dev/null
@@ -1,27 +0,0 @@
-comment { test sr segment chunk-offset on }
-test sr hmac validate on
-
-comment { trunk to rightpeer }
-set int ip address GigabitEthernet2/3/0 db03::2/64
-enable ip6 interface GigabitEthernet2/3/0
-set int state GigabitEthernet2/3/0 up
-
-comment { subscriber left-linux-ping }
-set int ip address GigabitEthernet2/2/0 db02::2/64
-enable ip6 interface GigabitEthernet2/2/0
-set int state GigabitEthernet2/2/0 up
-
-sr hmac id 2 key Gozzer
-sr hmac id 3 key Hoser
-
-sr tunnel src db01::1 dst db04::1/128 next db03::1 next db04::1 tag db02::2 clean key Gozzer InPE 1
-
-comment { sr unaware service chaining to db03::5 }
-comment { sr tunnel src db01::1 dst db04::1/128 next db03::1 next db03::5 next db04::1 tag db02::2 clean key Gozzer InPE 1 }
-
-comment { tap connect srlocal hwaddr random }
-comment { set int ip6 table tap-0 1 }
-comment { set int ip address tap-0 db04::99/64 }
-comment { enable ip6 interface tap-0 }
-comment { set int state tap-0 up }
-comment { ip route add table 1 db02::0/64 lookup in table 0 }
diff --git a/src/scripts/vnet/sr/mcast b/src/scripts/vnet/sr/mcast
deleted file mode 100644
index 50e73efa..00000000
--- a/src/scripts/vnet/sr/mcast
+++ /dev/null
@@ -1,58 +0,0 @@
-
-loop create
-loop create
-loop create
-loop create
-
-set int state loop0 up
-set int state loop1 up
-set int state loop2 up
-set int state loop3 up
-
-set int ip address loop0 2001::1/64
-set int ip address loop1 2001:1::1/64
-set int ip address loop2 2001:2::1/64
-set int ip address loop3 2001:3::1/64
-
-set ip6 neighbor loop1 2001:1::2 00:00:dd:ee:cc:d1
-set ip6 neighbor loop2 2001:2::2 00:00:dd:ee:cc:d2
-set ip6 neighbor loop3 2001:3::2 00:00:dd:ee:cc:d3
-
-ip route 3001::1/128 via 2001:1::2 loop1
-ip route 3001::2/128 via 2001:2::2 loop2
-ip route 3001::3/128 via 2001:3::2 loop3
-
-sr tunnel name SR1 src aaaa::2:1 dst ff19::1/128 next 3001::1 clean
-sr tunnel name SR2 src aaaa::2:2 dst ff19::2/128 next 3001::2 clean
-sr tunnel name SR3 src aaaa::2:3 dst ff19::3/128 next 3001::3 clean
-
-sr policy name MCAST1 tunnel SR1 tunnel SR2 tunnel SR3
-
-sr multicast-map address ff18::1 sr-policy MCAST1
-
-packet-generator new {
-  name x
-  limit 1
-  node ethernet-input
-  size 64-64
-  no-recycle
-  data {
-    IP6: 1.2.3 -> 4.5.6
-    ICMP: 3002::2 -> ff18::1
-    ICMP echo_request
-    incrementing 100
-  }
-}
-trace add pg-input 100
-
-sr multicast-map del address ff18::1 sr-policy MCAST1
-sr policy del name MCAST1 tunnel SR1 tunnel SR2 tunnel SR3
-
-ip route del 3001::1/128 via 2001:1::2 loop1
-ip route del 3001::2/128 via 2001:2::2 loop2
-ip route del 3001::3/128 via 2001:3::2 loop3
-
-sr tunnel del name SR1 src aaaa::2:1 dst ff19::1/128 next 3001::1 clean
-sr tunnel del name SR2 src aaaa::2:2 dst ff19::2/128 next 3001::2 clean
-sr tunnel del name SR3 src aaaa::2:3 dst ff19::3/128 next 3001::3 clean
-
diff --git a/src/scripts/vnet/sr/right-linux-ping.sh b/src/scripts/vnet/sr/right-linux-ping.sh
deleted file mode 100755
index 029368db..00000000
--- a/src/scripts/vnet/sr/right-linux-ping.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-ifconfig eth1 inet6 add db04::1/64
-route -A inet6 add db02::1/128 gw db04::2
diff --git a/src/scripts/vnet/sr/rightpeer.conf b/src/scripts/vnet/sr/rightpeer.conf
deleted file mode 100644
index 6da7a7af..00000000
--- a/src/scripts/vnet/sr/rightpeer.conf
+++ /dev/null
@@ -1,22 +0,0 @@
-comment { trunk to leftpeer }
-set int ip address GigabitEthernet2/0/0 db03::1/64
-enable ip6 interface GigabitEthernet2/0/0
-set int state GigabitEthernet2/0/0 up
-
-comment { subscriber right-linux-ping }
-set int ip address GigabitEthernet2/2/0 db04::2/64
-
-comment { next address to fake out ND on shared LAN segment }
-set int ip address GigabitEthernet2/2/0 db02::13/64
-
-enable ip6 interface GigabitEthernet2/2/0
-set int state GigabitEthernet2/2/0 up
-
-sr tunnel src db04::1 dst db02::1/128 next db03::2 next db02::1 tag db04::2 clean
-
-tap connect srlocal hwaddr random
-set int ip6 table tap-0 1
-set int ip address tap-0 db04::99/64
-enable ip6 interface tap-0
-set int state tap-0 up
-ip route add table 1 db02::0/64 lookup in table 0
diff --git a/src/scripts/vnet/sr/sr_mpls b/src/scripts/vnet/sr/sr_mpls
new file mode 100644
index 00000000..4646372a
--- /dev/null
+++ b/src/scripts/vnet/sr/sr_mpls
@@ -0,0 +1,11 @@
+set interface mpls local0 enable
+sr mpls policy add bsid 20001 next 16001 next 16002 next 16003
+sr mpls steer l3 a::/112 via sr policy bsid 20001
+
+loop create
+set int state loop0 up
+
+set int ip address loop0 11.0.0.1/24
+set ip arp loop0 11.0.0.2 00:00:11:aa:bb:cc
+
+mpls local-label 16001 via 11.0.0.2 loop0 out-label 16001
diff --git a/src/scripts/vnet/sr/srlocal.sh b/src/scripts/vnet/sr/srlocal.sh
deleted file mode 100755
index 2f568408..00000000
--- a/src/scripts/vnet/sr/srlocal.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-ifconfig srlocal inet6 add db04::1/64
-route -6 add db02::0/64 gw db04::99
diff --git a/src/vnet.am b/src/vnet.am
index 6e35df87..121d1a9c 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -697,21 +697,31 @@ nobase_include_HEADERS +=			\
 # ipv6 segment routing
 ########################################
 
-if WITH_LIBSSL
 libvnet_la_SOURCES +=				\
- vnet/sr/sr.c						\
- vnet/sr/sr_localsid.c				\
- vnet/sr/sr_policy_rewrite.c		\
- vnet/sr/sr_steering.c				\
- vnet/sr/sr_api.c
-endif
+ vnet/srv6/sr.c						\
+ vnet/srv6/sr_localsid.c			\
+ vnet/srv6/sr_policy_rewrite.c		\
+ vnet/srv6/sr_steering.c			\
+ vnet/srv6/sr_api.c
 
 nobase_include_HEADERS +=			\
- vnet/sr/sr_packet.h				\
- vnet/sr/sr.h						\
- vnet/sr/sr.api.h
+ vnet/srv6/sr_packet.h				\
+ vnet/srv6/sr.h 					\
+ vnet/srv6/sr.api.h
+
+API_FILES += vnet/srv6/sr.api
+
+########################################
+# mpls segment routing
+########################################
+
+libvnet_la_SOURCES +=				\
+ vnet/srmpls/sr_mpls_policy.c		\
+ vnet/srmpls/sr_mpls_steering.c
 
-API_FILES += vnet/sr/sr.api
+
+nobase_include_HEADERS +=			\
+ vnet/srmpls/sr.h
 
 ########################################
 # IPFIX / netflow v10
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index 4d84b900..18479531 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -171,7 +171,7 @@ mpls_label_paint (vlib_buffer_t * b0,
 
     hdr0 = vlib_buffer_get_current(b0);
 
-    if (PREDICT_TRUE(1 == mld0->mld_n_labels))
+    if (1 == mld0->mld_n_labels)
     {
         /* optimise for the common case of one label */
         *hdr0 = mld0->mld_hdr[0];
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index 2196079b..aa1000e0 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -48,6 +48,10 @@ typedef enum fib_source_t_ {
      * that is from confiiguration on an interface, not a 'ip route' command
      */
     FIB_SOURCE_INTERFACE,
+    /**
+     * SRv6 and SR-MPLS
+     */
+    FIB_SOURCE_SR,
     /**
      * A high priority source a plugin can use
      */
@@ -64,10 +68,6 @@ typedef enum fib_source_t_ {
      * LISP
      */
     FIB_SOURCE_LISP,
-    /**
-     * SRv6
-     */
-    FIB_SOURCE_SR,
     /**
      * IPv[46] Mapping
      */
diff --git a/src/vnet/sr/dir.dox b/src/vnet/sr/dir.dox
deleted file mode 100755
index 3f539a58..00000000
--- a/src/vnet/sr/dir.dox
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- @dir
- @brief Segment Routing code
-
- An implementation of Segment Routing as per:
- draft-ietf-6man-segment-routing-header-05
-
- @see ietf_draft_05.txt 
- 
-*/
\ No newline at end of file
diff --git a/src/vnet/sr/ietf_draft_05.txt b/src/vnet/sr/ietf_draft_05.txt
deleted file mode 100755
index e9bff04f..00000000
--- a/src/vnet/sr/ietf_draft_05.txt
+++ /dev/null
@@ -1,1564 +0,0 @@
-Network Working Group                                    S. Previdi, Ed.
-Internet-Draft                                               C. Filsfils
-Intended status: Standards Track                     Cisco Systems, Inc.
-Expires: August 5, 2017                                         B. Field
-                                                                 Comcast
-                                                                I. Leung
-                                                   Rogers Communications
-                                                              J. Linkova
-                                                                  Google
-                                                                E. Aries
-                                                                Facebook
-                                                               T. Kosugi
-                                                                     NTT
-                                                               E. Vyncke
-                                                     Cisco Systems, Inc.
-                                                               D. Lebrun
-                                        Universite Catholique de Louvain
-                                                        February 1, 2017
-
-
-                   IPv6 Segment Routing Header (SRH)
-               draft-ietf-6man-segment-routing-header-05
-
-Abstract
-
-   Segment Routing (SR) allows a node to steer a packet through a
-   controlled set of instructions, called segments, by prepending an SR
-   header to the packet.  A segment can represent any instruction,
-   topological or service-based.  SR allows to enforce a flow through
-   any path (topological, or application/service based) while
-   maintaining per-flow state only at the ingress node to the SR domain.
-
-   Segment Routing can be applied to the IPv6 data plane with the
-   addition of a new type of Routing Extension Header.  This draft
-   describes the Segment Routing Extension Header Type and how it is
-   used by SR capable nodes.
-
-Requirements Language
-
-   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
-   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
-   document are to be interpreted as described in RFC 2119 [RFC2119].
-
-Status of This Memo
-
-   This Internet-Draft is submitted in full conformance with the
-   provisions of BCP 78 and BCP 79.
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                 [Page 1]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   Internet-Drafts are working documents of the Internet Engineering
-   Task Force (IETF).  Note that other groups may also distribute
-   working documents as Internet-Drafts.  The list of current Internet-
-   Drafts is at http://datatracker.ietf.org/drafts/current/.
-
-   Internet-Drafts are draft documents valid for a maximum of six months
-   and may be updated, replaced, or obsoleted by other documents at any
-   time.  It is inappropriate to use Internet-Drafts as reference
-   material or to cite them other than as "work in progress."
-
-   This Internet-Draft will expire on August 5, 2017.
-
-Copyright Notice
-
-   Copyright (c) 2017 IETF Trust and the persons identified as the
-   document authors.  All rights reserved.
-
-   This document is subject to BCP 78 and the IETF Trust's Legal
-   Provisions Relating to IETF Documents
-   (http://trustee.ietf.org/license-info) in effect on the date of
-   publication of this document.  Please review these documents
-   carefully, as they describe your rights and restrictions with respect
-   to this document.  Code Components extracted from this document must
-   include Simplified BSD License text as described in Section 4.e of
-   the Trust Legal Provisions and are provided without warranty as
-   described in the Simplified BSD License.
-
-Table of Contents
-
-   1.  Segment Routing Documents . . . . . . . . . . . . . . . . . .   3
-   2.  Introduction  . . . . . . . . . . . . . . . . . . . . . . . .   3
-     2.1.  Data Planes supporting Segment Routing  . . . . . . . . .   4
-     2.2.  Segment Routing (SR) Domain . . . . . . . . . . . . . . .   4
-       2.2.1.  SR Domain in a Service Provider Network . . . . . . .   5
-       2.2.2.  SR Domain in a Overlay Network  . . . . . . . . . . .   6
-   3.  Segment Routing Extension Header (SRH)  . . . . . . . . . . .   7
-     3.1.  SRH TLVs  . . . . . . . . . . . . . . . . . . . . . . . .   9
-       3.1.1.  Ingress Node TLV  . . . . . . . . . . . . . . . . . .  10
-       3.1.2.  Egress Node TLV . . . . . . . . . . . . . . . . . . .  11
-       3.1.3.  Opaque Container TLV  . . . . . . . . . . . . . . . .  11
-       3.1.4.  Padding TLV . . . . . . . . . . . . . . . . . . . . .  12
-       3.1.5.  HMAC TLV  . . . . . . . . . . . . . . . . . . . . . .  13
-     3.2.  SRH and RFC2460 behavior  . . . . . . . . . . . . . . . .  14
-   4.  SRH Procedures  . . . . . . . . . . . . . . . . . . . . . . .  14
-     4.1.  Source SR Node  . . . . . . . . . . . . . . . . . . . . .  14
-     4.2.  Transit Node  . . . . . . . . . . . . . . . . . . . . . .  15
-     4.3.  SR Segment Endpoint Node  . . . . . . . . . . . . . . . .  16
-   5.  Security Considerations . . . . . . . . . . . . . . . . . . .  16
-
-
-
-Previdi, et al.          Expires August 5, 2017                 [Page 2]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-     5.1.  Threat model  . . . . . . . . . . . . . . . . . . . . . .  17
-       5.1.1.  Source routing threats  . . . . . . . . . . . . . . .  17
-       5.1.2.  Applicability of RFC 5095 to SRH  . . . . . . . . . .  17
-       5.1.3.  Service stealing threat . . . . . . . . . . . . . . .  18
-       5.1.4.  Topology disclosure . . . . . . . . . . . . . . . . .  18
-       5.1.5.  ICMP Generation . . . . . . . . . . . . . . . . . . .  18
-     5.2.  Security fields in SRH  . . . . . . . . . . . . . . . . .  19
-       5.2.1.  Selecting a hash algorithm  . . . . . . . . . . . . .  20
-       5.2.2.  Performance impact of HMAC  . . . . . . . . . . . . .  21
-       5.2.3.  Pre-shared key management . . . . . . . . . . . . . .  21
-     5.3.  Deployment Models . . . . . . . . . . . . . . . . . . . .  22
-       5.3.1.  Nodes within the SR domain  . . . . . . . . . . . . .  22
-       5.3.2.  Nodes outside of the SR domain  . . . . . . . . . . .  22
-       5.3.3.  SR path exposure  . . . . . . . . . . . . . . . . . .  23
-       5.3.4.  Impact of BCP-38  . . . . . . . . . . . . . . . . . .  23
-   6.  IANA Considerations . . . . . . . . . . . . . . . . . . . . .  24
-   7.  Manageability Considerations  . . . . . . . . . . . . . . . .  24
-   8.  Contributors  . . . . . . . . . . . . . . . . . . . . . . . .  24
-   9.  Acknowledgements  . . . . . . . . . . . . . . . . . . . . . .  24
-   10. References  . . . . . . . . . . . . . . . . . . . . . . . . .  25
-     10.1.  Normative References . . . . . . . . . . . . . . . . . .  25
-     10.2.  Informative References . . . . . . . . . . . . . . . . .  25
-   Authors' Addresses  . . . . . . . . . . . . . . . . . . . . . . .  27
-
-1.  Segment Routing Documents
-
-   Segment Routing terminology is defined in
-   [I-D.ietf-spring-segment-routing].
-
-   Segment Routing use cases are described in [RFC7855] and
-   [I-D.ietf-spring-ipv6-use-cases].
-
-   Segment Routing protocol extensions are defined in
-   [I-D.ietf-isis-segment-routing-extensions], and
-   [I-D.ietf-ospf-ospfv3-segment-routing-extensions].
-
-2.  Introduction
-
-   Segment Routing (SR), defined in [I-D.ietf-spring-segment-routing],
-   allows a node to steer a packet through a controlled set of
-   instructions, called segments, by prepending an SR header to the
-   packet.  A segment can represent any instruction, topological or
-   service-based.  SR allows to enforce a flow through any path
-   (topological or service/application based) while maintaining per-flow
-   state only at the ingress node to the SR domain.  Segments can be
-   derived from different components: IGP, BGP, Services, Contexts,
-   Locators, etc.  The list of segment forming the path is called the
-   Segment List and is encoded in the packet header.
-
-
-
-Previdi, et al.          Expires August 5, 2017                 [Page 3]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   SR allows the use of strict and loose source based routing paradigms
-   without requiring any additional signaling protocols in the
-   infrastructure hence delivering an excellent scalability property.
-
-   The source based routing model described in
-   [I-D.ietf-spring-segment-routing] is inherited from the ones proposed
-   by [RFC1940] and [RFC2460].  The source based routing model offers
-   the support for explicit routing capability.
-
-2.1.  Data Planes supporting Segment Routing
-
-   Segment Routing (SR), can be instantiated over MPLS
-   ([I-D.ietf-spring-segment-routing-mpls]) and IPv6.  This document
-   defines its instantiation over the IPv6 data-plane based on the use-
-   cases defined in [I-D.ietf-spring-ipv6-use-cases].
-
-   This document defines a new type of Routing Header (originally
-   defined in [RFC2460]) called the Segment Routing Header (SRH) in
-   order to convey the Segment List in the packet header as defined in
-   [I-D.ietf-spring-segment-routing].  Mechanisms through which segment
-   are known and advertised are outside the scope of this document.
-
-   A segment is materialized by an IPv6 address.  A segment identifies a
-   topological instruction or a service instruction.  A segment can be
-   either:
-
-   o  global: a global segment represents an instruction supported by
-      all nodes in the SR domain and it is instantiated through an IPv6
-      address globally known in the SR domain.
-
-   o  local: a local segment represents an instruction supported only by
-      the node who originates it and it is instantiated through an IPv6
-      address that is known only by the local node.
-
-2.2.  Segment Routing (SR) Domain
-
-   We define the concept of the Segment Routing Domain (SR Domain) as
-   the set of nodes participating into the source based routing model.
-   These nodes may be connected to the same physical infrastructure
-   (e.g.: a Service Provider's network) as well as nodes remotely
-   connected to each other (e.g.: an enterprise VPN or an overlay).
-
-   A non-exhaustive list of examples of SR Domains is:
-
-   o  The network of an operator, service provider, content provider,
-      enterprise including nodes, links and Autonomous Systems.
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                 [Page 4]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   o  A set of nodes connected as an overlay over one or more transit
-      providers.  The overlay nodes exchange SR-enabled traffic with
-      segments belonging solely to the overlay routers (the SR domain).
-      None of the segments in the SR-enabled packets exchanged by the
-      overlay belong to the transit networks
-
-   The source based routing model through its instantiation of the
-   Segment Routing Header (SRH) defined in this document equally applies
-   to all the above examples.
-
-   It is assumed in this document that the SRH is added to the packet by
-   its source, consistently with the source routing model defined in
-   [RFC2460].  For example:
-
-   o  At the node originating the packet (host, server).
-
-   o  At the ingress node of an SR domain where the ingress node
-      receives an IPv6 packet and encapsulates it into an outer IPv6
-      header followed by a Segment Routing header.
-
-2.2.1.  SR Domain in a Service Provider Network
-
-   The following figure illustrates an SR domain consisting of an
-   operator's network infrastructure.
-
-     (-------------------------- Operator 1 -----------------------)
-     (                                                             )
-     (  (-----AS 1-----)  (-------AS 2-------)  (----AS 3-------)  )
-     (  (              )  (                  )  (               )  )
- A1--(--(--11---13--14-)--(-21---22---23--24-)--(-31---32---34--)--)--Z1
-     (  ( /|\  /|\  /| )  ( |\  /|\  /|\  /| )  ( |\  /|\  /| \ )  )
- A2--(--(/ | \/ | \/ | )  ( | \/ | \/ | \/ | )  ( | \/ | \/ |  \)--)--Z2
-     (  (  | /\ | /\ | )  ( | /\ | /\ | /\ | )  ( | /\ | /\ |   )  )
-     (  (  |/  \|/  \| )  ( |/  \|/  \|/  \| )  ( |/  \|/  \|   )  )
- A3--(--(--15---17--18-)--(-25---26---27--28-)--(-35---36---38--)--)--Z3
-     (  (              )  (                  )  (               )  )
-     (  (--------------)  (------------------)  (---------------)  )
-     (                                                             )
-     (-------------------------------------------------------------)
-
-                   Figure 1: Service Provider SR Domain
-
-   Figure 1 describes an operator network including several ASes and
-   delivering connectivity between endpoints.  In this scenario, Segment
-   Routing is used within the operator networks and across the ASes
-   boundaries (all being under the control of the same operator).  In
-   this case segment routing can be used in order to address use cases
-   such as end-to-end traffic engineering, fast re-route, egress peer
-
-
-
-Previdi, et al.          Expires August 5, 2017                 [Page 5]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   engineering, data-center traffic engineering as described in
-   [RFC7855], [I-D.ietf-spring-ipv6-use-cases] and
-   [I-D.ietf-spring-resiliency-use-cases].
-
-   Typically, an IPv6 packet received at ingress (i.e.: from outside the
-   SR domain), is classified according to network operator policies and
-   such classification results into an outer header with an SRH applied
-   to the incoming packet.  The SRH contains the list of segment
-   representing the path the packet must take inside the SR domain.
-   Thus, the SA of the packet is the ingress node, the DA (due to SRH
-   procedures described in Section 4) is set as the first segment of the
-   path and the last segment of the path is the egress node of the SR
-   domain.
-
-   The path may include intra-AS as well as inter-AS segments.  It has
-   to be noted that all nodes within the SR domain are under control of
-   the same administration.  When the packet reaches the egress point of
-   the SR domain, the outer header and its SRH are removed so that the
-   destination of the packet is unaware of the SR domain the packet has
-   traversed.
-
-   The outer header with the SRH is no different from any other
-   tunneling encapsulation mechanism and allows a network operator to
-   implement traffic engineering mechanisms so to efficiently steer
-   traffic across his infrastructure.
-
-2.2.2.  SR Domain in a Overlay Network
-
-   The following figure illustrates an SR domain consisting of an
-   overlay network over multiple operator's networks.
-
-       (--Operator 1---)  (-----Operator 2-----)  (--Operator 3---)
-       (               )  (                    )  (               )
-   A1--(--11---13--14--)--(--21---22---23--24--)--(-31---32---34--)--C1
-       ( /|\  /|\  /|  )  (  |\  /|\  /|\  /|  )  ( |\  /|\  /| \ )
-   A2--(/ | \/ | \/ |  )  (  | \/ | \/ | \/ |  )  ( | \/ | \/ |  \)--C2
-       (  | /\ | /\ |  )  (  | /\ | /\ | /\ |  )  ( | /\ | /\ |   )
-       (  |/  \|/  \|  )  (  |/  \|/  \|/  \|  )  ( |/  \|/  \|   )
-   A3--(--15---17--18--)--(--25---26---27--28--)--(-35---36---38--)--C3
-       (               )  (  |    |         |  )  (               )
-       (---------------)  (--|----|---------|--)  (---------------)
-                             |    |         |
-                             B1   B2        B3
-
-                        Figure 2: Overlay SR Domain
-
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                 [Page 6]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   Figure 2 describes an overlay consisting of nodes connected to three
-   different network operators and forming a single overlay network
-   where Segment routing packets are exchanged.
-
-   The overlay consists of nodes A1, A2, A3, B1, B2, B3, C1, C2 and C3.
-   These nodes are connected to their respective network operator and
-   form an overlay network.
-
-   Each node may originate packets with an SRH which contains, in the
-   segment list of the SRH or in the DA, segments identifying other
-   overlay nodes.  This implies that packets with an SRH may traverse
-   operator's networks but, obviously, these SRHs cannot contain an
-   address/segment of the transit operators 1, 2 and 3.  The SRH
-   originated by the overlay can only contain address/segment under the
-   administration of the overlay (e.g. address/segments supported by A1,
-   A2, A3, B1, B2, B3, C1,C2 or C3).
-
-   In this model, the operator network nodes are transit nodes and,
-   according to [RFC2460], MUST NOT inspect the routing extension header
-   since they are not the DA of the packet.
-
-   It is a common practice in operators networks to filter out, at
-   ingress, any packet whose DA is the address of an internal node and
-   it is also possible that an operator would filter out any packet
-   destined to an internal address and having an extension header in it.
-
-   This common practice does not impact the SR-enabled traffic between
-   the overlay nodes as the intermediate transit networks never see a
-   destination address belonging to their infrastructure.  These SR-
-   enabled overlay packets will thus never be filtered by the transit
-   operators.
-
-   In all cases, transit packets (i.e.: packets whose DA is outside the
-   domain of the operator's network) will be forwarded accordingly
-   without introducing any security concern in the operator's network.
-   This is similar to tunneled packets.
-
-3.  Segment Routing Extension Header (SRH)
-
-   A new type of the Routing Header (originally defined in [RFC2460]) is
-   defined: the Segment Routing Header (SRH) which has a new Routing
-   Type, (suggested value 4) to be assigned by IANA.
-
-   The Segment Routing Header (SRH) is defined as follows:
-
-
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                 [Page 7]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-     0                   1                   2                   3
-     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    | Next Header   |  Hdr Ext Len  | Routing Type  | Segments Left |
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    | First Segment |     Flags     |           RESERVED            |
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    |                                                               |
-    |            Segment List[0] (128 bits IPv6 address)            |
-    |                                                               |
-    |                                                               |
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    |                                                               |
-    |                                                               |
-                                  ...
-    |                                                               |
-    |                                                               |
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    |                                                               |
-    |            Segment List[n] (128 bits IPv6 address)            |
-    |                                                               |
-    |                                                               |
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    //                                                             //
-    //         Optional Type Length Value objects (variable)       //
-    //                                                             //
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-   where:
-
-   o  Next Header: 8-bit selector.  Identifies the type of header
-      immediately following the SRH.
-
-   o  Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
-      header in 8-octet units, not including the first 8 octets.
-
-   o  Routing Type: TBD, to be assigned by IANA (suggested value: 4).
-
-   o  Segments Left.  Defined in [RFC2460], it contains the index, in
-      the Segment List, of the next segment to inspect.  Segments Left
-      is decremented at each segment.
-
-   o  First Segment: contains the index, in the Segment List, of the
-      first segment of the path which is in fact the last element of the
-      Segment List.
-
-   o  Flags: 8 bits of flags.  Following flags are defined:
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                 [Page 8]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-          0 1 2 3 4 5 6 7
-         +-+-+-+-+-+-+-+-+
-         |U|P|O|A|H|  U  |
-         +-+-+-+-+-+-+-+-+
-
-         U: Unused and for future use.  SHOULD be unset on transmission
-         and MUST be ignored on receipt.
-
-         P-flag: Protected flag.  Set when the packet has been rerouted
-         through FRR mechanism by an SR endpoint node.
-
-         O-flag: OAM flag.  When set, it indicates that this packet is
-         an operations and management (OAM) packet.
-
-         A-flag: Alert flag.  If present, it means important Type Length
-         Value (TLV) objects are present.  See Section 3.1 for details
-         on TLVs objects.
-
-         H-flag: HMAC flag.  If set, the HMAC TLV is present and is
-         encoded as the last TLV of the SRH.  In other words, the last
-         36 octets of the SRH represent the HMAC information.  See
-         Section 3.1.5 for details on the HMAC TLV.
-
-   o  RESERVED: SHOULD be unset on transmission and MUST be ignored on
-      receipt.
-
-   o  Segment List[n]: 128 bit IPv6 addresses representing the nth
-      segment in the Segment List.  The Segment List is encoded starting
-      from the last segment of the path.  I.e., the first element of the
-      segment list (Segment List [0]) contains the last segment of the
-      path while the last segment of the Segment List (Segment List[n])
-      contains the first segment of the path.  The index contained in
-      "Segments Left" identifies the current active segment.
-
-   o  Type Length Value (TLV) are described in Section 3.1.
-
-3.1.  SRH TLVs
-
-   This section defines TLVs of the Segment Routing Header.
-
-   Type Length Value (TLV) contain optional information that may be used
-   by the node identified in the DA of the packet.  It has to be noted
-   that the information carried in the TLVs is not intended to be used
-   by the routing layer.  Typically, TLVs carry information that is
-   consumed by other components (e.g.: OAM) than the routing function.
-
-   Each TLV has its own length, format and semantic.  The code-point
-   allocated (by IANA) to each TLV defines both the format and the
-
-
-
-Previdi, et al.          Expires August 5, 2017                 [Page 9]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   semantic of the information carried in the TLV.  Multiple TLVs may be
-   encoded in the same SRH.
-
-   The "Length" field of the TLV is primarily used to skip the TLV while
-   inspecting the SRH in case the node doesn't support or recognize the
-   TLV codepoint.  The "Length" defines the TLV length in octets and not
-   including the "Type" and "Length" fields.
-
-   The primary scope of TLVs is to give the receiver of the packet
-   information related to the source routed path (e.g.: where the packet
-   entered in the SR domain and where it is expected to exit).
-
-   Additional TLVs may be defined in the future.
-
-3.1.1.  Ingress Node TLV
-
-   The Ingress Node TLV is optional and identifies the node this packet
-   traversed when entered the SR domain.  The Ingress Node TLV has
-   following format:
-
-    0                   1                   2                   3
-    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |      Type     |    Length     |   RESERVED    |     Flags     |
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |                                                               |
-   |                 Ingress Node (16 octets)                      |
-   |                                                               |
-   |                                                               |
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-   where:
-
-   o  Type: to be assigned by IANA (suggested value 1).
-
-   o  Length: 18.
-
-   o  RESERVED: 8 bits.  SHOULD be unset on transmission and MUST be
-      ignored on receipt.
-
-   o  Flags: 8 bits.  No flags are defined in this document.
-
-   o  Ingress Node: 128 bits.  Defines the node where the packet is
-      expected to enter the SR domain.  In the encapsulation case
-      described in Section 2.2.1, this information corresponds to the SA
-      of the encapsulating header.
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 10]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-3.1.2.  Egress Node TLV
-
-   The Egress Node TLV is optional and identifies the node this packet
-   is expected to traverse when exiting the SR domain.  The Egress Node
-   TLV has following format:
-
-    0                   1                   2                   3
-    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |      Type     |    Length     |   RESERVED    |     Flags     |
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |                                                               |
-   |                  Egress Node (16 octets)                      |
-   |                                                               |
-   |                                                               |
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-   where:
-
-   o  Type: to be assigned by IANA (suggested value 2).
-
-   o  Length: 18.
-
-   o  RESERVED: 8 bits.  SHOULD be unset on transmission and MUST be
-      ignored on receipt.
-
-   o  Flags: 8 bits.  No flags are defined in this document.
-
-   o  Egress Node: 128 bits.  Defines the node where the packet is
-      expected to exit the SR domain.  In the encapsulation case
-      described in Section 2.2.1, this information corresponds to the
-      last segment of the SRH in the encapsulating header.
-
-3.1.3.  Opaque Container TLV
-
-   The Opaque Container TLV is optional and has the following format:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 11]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-    0                   1                   2                   3
-    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |      Type     |    Length     |   RESERVED    |     Flags     |
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |                                                               |
-   |             Opaque Container (16 octets)                      |
-   |                                                               |
-   |                                                               |
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-   where:
-
-   o  Type: to be assigned by IANA (suggested value 3).
-
-   o  Length: 18.
-
-   o  RESERVED: 8 bits.  SHOULD be unset on transmission and MUST be
-      ignored on receipt.
-
-   o  Flags: 8 bits.  No flags are defined in this document.
-
-   o  Opaque Container: 128 bits of opaque data not relevant for the
-      routing layer.  Typically, this information is consumed by a non-
-      routing component of the node receiving the packet (i.e.: the node
-      in the DA).
-
-3.1.4.  Padding TLV
-
-   The Padding TLV is optional and with the purpose of aligning the SRH
-   on a 8 octet boundary.  The Padding TLV has the following format:
-
-    0                   1                   2                   3
-    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |     Type      |    Length     |      Padding (variable)       |
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   //                    Padding (variable)                       //
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-   where:
-
-   o  Type: to be assigned by IANA (suggested value 4).
-
-   o  Length: 1 to 7
-
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 12]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   o  Padding: from 1 to 7 octets of padding.  Padding bits have no
-      semantic.  They SHOULD be set to 0 on transmission and MUST be
-      ignored on receipt.
-
-   The following applies to the Padding TLV:
-
-   o  Padding TLV is optional and MAY only appear once in the SRH.  If
-      present, it MUST have a length between 1 and 7 octets.
-
-   o  The Padding TLV is used in order to align the SRH total length on
-      the 8 octet boundary.
-
-   o  When present, the Padding TLV MUST appear as the last TLV before
-      the HMAC TLV (if HMAC TLV is present).
-
-   o  When present, the Padding TLV MUST have a length from 1 to 7 in
-      order to align the SRH total lenght on a 8-octet boundary.
-
-   o  When a router inspecting the SRH encounters the Padding TLV, it
-      MUST assume that no other TLV (other than the HMAC) follow the
-      Padding TLV.
-
-3.1.5.  HMAC TLV
-
-   HMAC TLV is optional and contains the HMAC information.  The HMAC TLV
-   has the following format:
-
-    0                   1                   2                   3
-    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |      Type     |     Length    |          RESERVED             |
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |                      HMAC Key ID (4 octets)                   |
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   |                                                              //
-   |                      HMAC (32 octets)                        //
-   |                                                              //
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-   where:
-
-   o  Type: to be assigned by IANA (suggested value 5).
-
-   o  Length: 38.
-
-   o  RESERVED: 2 octets.  SHOULD be unset on transmission and MUST be
-      ignored on receipt.
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 13]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   o  HMAC Key ID: 4 octets.
-
-   o  HMAC: 32 octets.
-
-   o  HMAC and HMAC Key ID usage is described in Section 5
-
-   The Following applies to the HMAC TLV:
-
-   o  When present, the HMAC TLV MUST be encoded as the last TLV of the
-      SRH.
-
-   o  If the HMAC TLV is present, the SRH H-Flag (Figure 4) MUST be set.
-
-   o  When the H-flag is set in the SRH, the router inspecting the SRH
-      MUST find the HMAC TLV in the last 38 octets of the SRH.
-
-3.2.  SRH and RFC2460 behavior
-
-   The SRH being a new type of the Routing Header, it also has the same
-   properties:
-
-      SHOULD only appear once in the packet.
-
-      Only the router whose address is in the DA field of the packet
-      header MUST inspect the SRH.
-
-   Therefore, Segment Routing in IPv6 networks implies that the segment
-   identifier (i.e.: the IPv6 address of the segment) is moved into the
-   DA of the packet.
-
-   The DA of the packet changes at each segment termination/completion
-   and therefore the final DA of the packet MUST be encoded as the last
-   segment of the path.
-
-4.  SRH Procedures
-
-   In this section we describe the different procedures on the SRH.
-
-4.1.  Source SR Node
-
-   A Source SR Node can be any node originating an IPv6 packet with its
-   IPv6 and Segment Routing Headers.  This include either:
-
-      A host originating an IPv6 packet.
-
-      An SR domain ingress router encapsulating a received IPv6 packet
-      into an outer IPv6 header followed by an SRH.
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 14]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   The mechanism through which a Segment List is derived is outside of
-   the scope of this document.  As an example, the Segment List may be
-   obtained through:
-
-      Local path computation.
-
-      Local configuration.
-
-      Interaction with a centralized controller delivering the path.
-
-      Any other mechanism.
-
-   The following are the steps of the creation of the SRH:
-
-      Next Header and Hdr Ext Len fields are set according to [RFC2460].
-
-      Routing Type field is set as TBD (to be allocated by IANA,
-      suggested value 4).
-
-      The Segment List is built with the FIRST segment of the path
-      encoded in the LAST element of the Segment List.  Subsequent
-      segments are encoded on top of the first segment.  Finally, the
-      LAST segment of the path is encoded in the FIRST element of the
-      Segment List.  In other words, the Segment List is encoded in the
-      reverse order of the path.
-
-      The final DA of the packet is encoded as the last segment of the
-      path (encoded in the first element of the Segment List).
-
-      The DA of the packet is set with the value of the first segment
-      (found in the last element of the segment list).
-
-      The Segments Left field is set to n-1 where n is the number of
-      elements in the Segment List.
-
-      The First Segment field is set to n-1 where n is the number of
-      elements in the Segment List.
-
-      The packet is sent out towards the first segment (i.e.:
-      represented in the packet DA).
-
-      HMAC TLV may be set according to Section 5.
-
-4.2.  Transit Node
-
-   According to [RFC2460], the only node who is allowed to inspect the
-   Routing Extension Header (and therefore the SRH), is the node
-   corresponding to the DA of the packet.  Any other transit node MUST
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 15]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   NOT inspect the underneath routing header and MUST forward the packet
-   towards the DA and according to the IPv6 routing table.
-
-   In the example case described in Section 2.2.2, when SR capable nodes
-   are connected through an overlay spanning multiple third-party
-   infrastructure, it is safe to send SRH packets (i.e.: packet having a
-   Segment Routing Header) between each other overlay/SR-capable nodes
-   as long as the segment list does not include any of the transit
-   provider nodes.  In addition, as a generic security measure, any
-   service provider will block any packet destined to one of its
-   internal routers, especially if these packets have an extended header
-   in it.
-
-4.3.  SR Segment Endpoint Node
-
-   The SR segment endpoint node is the node whose address is in the DA.
-   The segment endpoint node inspects the SRH and does:
-
-   1.   IF DA = myself (segment endpoint)
-   2.      IF Segments Left > 0 THEN
-              decrement Segments Left
-              update DA with Segment List[Segments Left]
-   3.      ELSE continue IPv6 processing of the packet
-                End of processing.
-   4.   Forward the packet out
-
-5.  Security Considerations
-
-   This section analyzes the security threat model, the security issues
-   and proposed solutions related to the new Segment Routing Header.
-
-   The Segment Routing Header (SRH) is simply another type of the
-   routing header as described in RFC 2460 [RFC2460] and is:
-
-   o  Added by an SR edge router when entering the segment routing
-      domain or by the originating host itself.  The source host can
-      even be outside the SR domain;
-
-   o  inspected and acted upon when reaching the destination address of
-      the IP header per RFC 2460 [RFC2460].
-
-   Per RFC2460 [RFC2460], routers on the path that simply forward an
-   IPv6 packet (i.e. the IPv6 destination address is none of theirs)
-   will never inspect and process the content of the SRH.  Routers whose
-   one interface IPv6 address equals the destination address field of
-   the IPv6 packet MUST parse the SRH and, if supported and if the local
-   configuration allows it, MUST act accordingly to the SRH content.
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 16]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   According to RFC2460 [RFC2460], the default behavior of a non SR-
-   capable router upon receipt of an IPv6 packet with SRH destined to an
-   address of its, is to:
-
-   o  ignore the SRH completely if the Segment Left field is 0 and
-      proceed to process the next header in the IPv6 packet;
-
-   o  discard the IPv6 packet if Segment Left field is greater than 0,
-      it MAY send a Parameter Problem ICMP message back to the Source
-      Address.
-
-5.1.  Threat model
-
-5.1.1.  Source routing threats
-
-   Using an SRH is similar to source routing, therefore it has some
-   well-known security issues as described in RFC4942 [RFC4942] section
-   2.1.1 and RFC5095 [RFC5095]:
-
-   o  amplification attacks: where a packet could be forged in such a
-      way to cause looping among a set of SR-enabled routers causing
-      unnecessary traffic, hence a Denial of Service (DoS) against
-      bandwidth;
-
-   o  reflection attack: where a hacker could force an intermediate node
-      to appear as the immediate attacker, hence hiding the real
-      attacker from naive forensic;
-
-   o  bypass attack: where an intermediate node could be used as a
-      stepping stone (for example in a De-Militarized Zone) to attack
-      another host (for example in the datacenter or any back-end
-      server).
-
-5.1.2.  Applicability of RFC 5095 to SRH
-
-   First of all, the reader must remember this specific part of section
-   1 of RFC5095 [RFC5095], "A side effect is that this also eliminates
-   benign RH0 use-cases; however, such applications may be facilitated
-   by future Routing Header specifications.".  In short, it is not
-   forbidden to create new secure type of Routing Header; for example,
-   RFC 6554 (RPL) [RFC6554] also creates a new Routing Header type for a
-   specific application confined in a single network.
-
-   In the segment routing architecture described in
-   [I-D.ietf-spring-segment-routing] there are basically two kinds of
-   nodes (routers and hosts):
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 17]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   o  nodes within the SR domain, which is within one single
-      administrative domain, i.e., where all nodes are trusted anyway
-      else the damage caused by those nodes could be worse than
-      amplification attacks: traffic interception, man-in-the-middle
-      attacks, more server DoS by dropping packets, and so on.
-
-   o  nodes outside of the SR domain, which is outside of the
-      administrative segment routing domain hence they cannot be trusted
-      because there is no physical security for those nodes, i.e., they
-      can be replaced by hostile nodes or can be coerced in wrong
-      behaviors.
-
-   The main use case for SR consists of the single administrative domain
-   where only trusted nodes with SR enabled and configured participate
-   in SR: this is the same model as in RFC6554 [RFC6554].  All non-
-   trusted nodes do not participate as either SR processing is not
-   enabled by default or because they only process SRH from nodes within
-   their domain.
-
-   Moreover, all SR nodes ignore SRH created by outsiders based on
-   topology information (received on a peering or internal interface) or
-   on presence and validity of the HMAC field.  Therefore, if
-   intermediate nodes ONLY act on valid and authorized SRH (such as
-   within a single administrative domain), then there is no security
-   threat similar to RH-0.  Hence, the RFC 5095 [RFC5095] attacks are
-   not applicable.
-
-5.1.3.  Service stealing threat
-
-   Segment routing is used for added value services, there is also a
-   need to prevent non-participating nodes to use those services; this
-   is called 'service stealing prevention'.
-
-5.1.4.  Topology disclosure
-
-   The SRH may also contains IPv6 addresses of some intermediate SR-
-   nodes in the path towards the destination, this obviously reveals
-   those addresses to the potentially hostile attackers if those
-   attackers are able to intercept packets containing SRH.  On the other
-   hand, if the attacker can do a traceroute whose probes will be
-   forwarded along the SR path, then there is little learned by
-   intercepting the SRH itself.
-
-5.1.5.  ICMP Generation
-
-   Per section 4.4 of RFC2460 [RFC2460], when destination nodes (i.e.
-   where the destination address is one of theirs) receive a Routing
-   Header with unsupported Routing Type, the required behavior is:
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 18]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   o  If Segments Left is zero, the node must ignore the Routing header
-      and proceed to process the next header in the packet.
-
-   o  If Segments Left is non-zero, the node must discard the packet and
-      send an ICMP Parameter Problem, Code 0, message to the packet's
-      Source Address, pointing to the unrecognized Routing Type.
-
-   This required behavior could be used by an attacker to force the
-   generation of ICMP message by any node.  The attacker could send
-   packets with SRH (with Segment Left set to 0) destined to a node not
-   supporting SRH.  Per RFC2460 [RFC2460], the destination node could
-   generate an ICMP message, causing a local CPU utilization and if the
-   source of the offending packet with SRH was spoofed could lead to a
-   reflection attack without any amplification.
-
-   It must be noted that this is a required behavior for any unsupported
-   Routing Type and not limited to SRH packets.  So, it is not specific
-   to SRH and the usual rate limiting for ICMP generation is required
-   anyway for any IPv6 implementation and has been implemented and
-   deployed for many years.
-
-5.2.  Security fields in SRH
-
-   This section summarizes the use of specific fields in the SRH.  They
-   are based on a key-hashed message authentication code (HMAC).
-
-   The security-related fields in the SRH are instantiated by the HMAC
-   TLV, containing:
-
-   o  HMAC Key-id, 32 bits wide;
-
-   o  HMAC, 256 bits wide (optional, exists only if HMAC Key-id is not
-      0).
-
-   The HMAC field is the output of the HMAC computation (per RFC 2104
-   [RFC2104]) using a pre-shared key identified by HMAC Key-id and of
-   the text which consists of the concatenation of:
-
-   o  the source IPv6 address;
-
-   o  First Segment field;
-
-   o  an octet of bit flags;
-
-   o  HMAC Key-id;
-
-   o  all addresses in the Segment List.
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 19]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   The purpose of the HMAC TLV is to verify the validity, the integrity
-   and the authorization of the SRH itself.  If an outsider of the SR
-   domain does not have access to a current pre-shared secret, then it
-   cannot compute the right HMAC field and the first SR router on the
-   path processing the SRH and configured to check the validity of the
-   HMAC will simply reject the packet.
-
-   The HMAC TLV is located at the end of the SRH simply because only the
-   router on the ingress of the SR domain needs to process it, then all
-   other SR nodes can ignore it (based on local policy) because they
-   trust the upstream router.  This is to speed up forwarding operations
-   because SR routers which do not validate the SRH do not need to parse
-   the SRH until the end.
-
-   The HMAC Key-id field allows for the simultaneous existence of
-   several hash algorithms (SHA-256, SHA3-256 ... or future ones) as
-   well as pre-shared keys.  The HMAC Key-id field is opaque, i.e., it
-   has neither syntax nor semantic except as an index to the right
-   combination of pre-shared key and hash algorithm and except that a
-   value of 0 means that there is no HMAC field.  Having an HMAC Key-id
-   field allows for pre-shared key roll-over when two pre-shared keys
-   are supported for a while when all SR nodes converged to a fresher
-   pre-shared key.  It could also allow for interoperation among
-   different SR domains if allowed by local policy and assuming a
-   collision-free HMAC Key Id allocation.
-
-   When a specific SRH is linked to a time-related service (such as
-   turbo-QoS for a 1-hour period) where the DA, Segment ID (SID) are
-   identical, then it is important to refresh the shared-secret
-   frequently as the HMAC validity period expires only when the HMAC
-   Key-id and its associated shared-secret expires.
-
-5.2.1.  Selecting a hash algorithm
-
-   The HMAC field in the HMAC TLV is 256 bit wide.  Therefore, the HMAC
-   MUST be based on a hash function whose output is at least 256 bits.
-   If the output of the hash function is 256, then this output is simply
-   inserted in the HMAC field.  If the output of the hash function is
-   larger than 256 bits, then the output value is truncated to 256 by
-   taking the least-significant 256 bits and inserting them in the HMAC
-   field.
-
-   SRH implementations can support multiple hash functions but MUST
-   implement SHA-2 [FIPS180-4] in its SHA-256 variant.
-
-   NOTE: SHA-1 is currently used by some early implementations used for
-   quick interoperations testing, the 160-bit hash value must then be
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 20]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   right-hand padded with 96 bits set to 0.  The authors understand that
-   this is not secure but is ok for limited tests.
-
-5.2.2.  Performance impact of HMAC
-
-   While adding an HMAC to each and every SR packet increases the
-   security, it has a performance impact.  Nevertheless, it must be
-   noted that:
-
-   o  the HMAC field is used only when SRH is added by a device (such as
-      a home set-up box) which is outside of the segment routing domain.
-      If the SRH is added by a router in the trusted segment routing
-      domain, then, there is no need for an HMAC field, hence no
-      performance impact.
-
-   o  when present, the HMAC field MUST only be checked and validated by
-      the first router of the segment routing domain, this router is
-      named 'validating SR router'.  Downstream routers may not inspect
-      the HMAC field.
-
-   o  this validating router can also have a cache of <IPv6 header +
-      SRH, HMAC field value> to improve the performance.  It is not the
-      same use case as in IPsec where HMAC value was unique per packet,
-      in SRH, the HMAC value is unique per flow.
-
-   o  Last point, hash functions such as SHA-2 have been optimized for
-      security and performance and there are multiple implementations
-      with good performance.
-
-   With the above points in mind, the performance impact of using HMAC
-   is minimized.
-
-5.2.3.  Pre-shared key management
-
-   The field HMAC Key-id allows for:
-
-   o  key roll-over: when there is a need to change the key (the hash
-      pre-shared secret), then multiple pre-shared keys can be used
-      simultaneously.  The validating routing can have a table of <HMAC
-      Key-id, pre-shared secret> for the currently active and future
-      keys.
-
-   o  different algorithms: by extending the previous table to <HMAC
-      Key-id, hash function, pre-shared secret>, the validating router
-      can also support simultaneously several hash algorithms (see
-      section Section 5.2.1)
-
-   The pre-shared secret distribution can be done:
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 21]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   o  in the configuration of the validating routers, either by static
-      configuration or any SDN oriented approach;
-
-   o  dynamically using a trusted key distribution such as [RFC6407]
-
-   The intent of this document is NOT to define yet-another-key-
-   distribution-protocol.
-
-5.3.  Deployment Models
-
-5.3.1.  Nodes within the SR domain
-
-   An SR domain is defined as a set of interconnected routers where all
-   routers at the perimeter are configured to add and act on SRH.  Some
-   routers inside the SR domain can also act on SRH or simply forward
-   IPv6 packets.
-
-   The routers inside an SR domain can be trusted to generate SRH and to
-   process SRH received on interfaces that are part of the SR domain.
-   These nodes MUST drop all SRH packets received on an interface that
-   is not part of the SR domain and containing an SRH whose HMAC field
-   cannot be validated by local policies.  This includes obviously
-   packet with an SRH generated by a non-cooperative SR domain.
-
-   If the validation fails, then these packets MUST be dropped, ICMP
-   error messages (parameter problem) SHOULD be generated (but rate
-   limited) and SHOULD be logged.
-
-5.3.2.  Nodes outside of the SR domain
-
-   Nodes outside of the SR domain cannot be trusted for physical
-   security; hence, they need to request by some trusted means (outside
-   of the scope of this document) a complete SRH for each new connection
-   (i.e. new destination address).  The received SRH MUST include an
-   HMAC TLV which is computed correctly (see Section 5.2).
-
-   When an outside node sends a packet with an SRH and towards an SR
-   domain ingress node, the packet MUST contain the HMAC TLV (with a
-   Key-id and HMAC fields) and the the destination address MUST be an
-   address of an SR domain ingress node .
-
-   The ingress SR router, i.e., the router with an interface address
-   equals to the destination address, MUST verify the HMAC TLV.
-
-   If the validation is successful, then the packet is simply forwarded
-   as usual for an SR packet.  As long as the packet travels within the
-   SR domain, no further HMAC check needs to be done.  Subsequent
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 22]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   routers in the SR domain MAY verify the HMAC TLV when they process
-   the SRH (i.e. when they are the destination).
-
-   If the validation fails, then this packet MUST be dropped, an ICMP
-   error message (parameter problem) SHOULD be generated (but rate
-   limited) and SHOULD be logged.
-
-5.3.3.  SR path exposure
-
-   As the intermediate SR nodes addresses appears in the SRH, if this
-   SRH is visible to an outsider then he/she could reuse this knowledge
-   to launch an attack on the intermediate SR nodes or get some insider
-   knowledge on the topology.  This is especially applicable when the
-   path between the source node and the first SR domain ingress router
-   is on the public Internet.
-
-   The first remark is to state that 'security by obscurity' is never
-   enough; in other words, the security policy of the SR domain MUST
-   assume that the internal topology and addressing is known by the
-   attacker.  A simple traceroute will also give the same information
-   (with even more information as all intermediate nodes between SID
-   will also be exposed).  IPsec Encapsulating Security Payload
-   [RFC4303] cannot be use to protect the SRH as per RFC4303 the ESP
-   header must appear after any routing header (including SRH).
-
-   To prevent a user to leverage the gained knowledge by intercepting
-   SRH, it it recommended to apply an infrastructure Access Control List
-   (iACL) at the edge of the SR domain.  This iACL will drop all packets
-   from outside the SR-domain whose destination is any address of any
-   router inside the domain.  This security policy should be tuned for
-   local operations.
-
-5.3.4.  Impact of BCP-38
-
-   BCP-38 [RFC2827], also known as "Network Ingress Filtering", checks
-   whether the source address of packets received on an interface is
-   valid for this interface.  The use of loose source routing such as
-   SRH forces packets to follow a path which differs from the expected
-   routing.  Therefore, if BCP-38 was implemented in all routers inside
-   the SR domain, then SR packets could be received by an interface
-   which is not expected one and the packets could be dropped.
-
-   As an SR domain is usually a subset of one administrative domain, and
-   as BCP-38 is only deployed at the ingress routers of this
-   administrative domain and as packets arriving at those ingress
-   routers have been normally forwarded using the normal routing
-   information, then there is no reason why this ingress router should
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 23]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   drop the SRH packet based on BCP-38.  Routers inside the domain
-   commonly do not apply BCP-38; so, this is not a problem.
-
-6.  IANA Considerations
-
-   This document makes the following registrations in the Internet
-   Protocol Version 6 (IPv6) Parameters "Routing Type" registry
-   maintained by IANA:
-
-   Suggested            Description             Reference
-     Value
-   ----------------------------------------------------------
-      4         Segment Routing Header (SRH)    This document
-
-   In addition, this document request IANA to create and maintain a new
-   Registry: "Segment Routing Header Type-Value Objects".  The following
-   code-points are requested from the registry:
-
-   Registry: Segment Routing Header Type-Value Objects
-
-   Suggested         Description            Reference
-     Value
-   -----------------------------------------------------
-      1         Ingress Node TLV          This document
-      2         Egress Node  TLV          This document
-      3         Opaque Container TLV      This document
-      4         Padding TLV               This document
-      5         HMAC TLV                  This document
-
-7.  Manageability Considerations
-
-   TBD
-
-8.  Contributors
-
-   Dave Barach, John Leddy, John Brzozowski, Pierre Francois, Nagendra
-   Kumar, Mark Townsley, Christian Martin, Roberta Maglione, James
-   Connolly, Aloys Augustin contributed to the content of this document.
-
-9.  Acknowledgements
-
-   The authors would like to thank Ole Troan, Bob Hinden, Fred Baker,
-   Brian Carpenter, Alexandru Petrescu and Punit Kumar Jaiswal for their
-   comments to this document.
-
-
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 24]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-10.  References
-
-10.1.  Normative References
-
-   [FIPS180-4]
-              National Institute of Standards and Technology, "FIPS
-              180-4 Secure Hash Standard (SHS)", March 2012,
-              <http://csrc.nist.gov/publications/fips/fips180-4/
-              fips-180-4.pdf>.
-
-   [RFC2119]  Bradner, S., "Key words for use in RFCs to Indicate
-              Requirement Levels", BCP 14, RFC 2119,
-              DOI 10.17487/RFC2119, March 1997,
-              <http://www.rfc-editor.org/info/rfc2119>.
-
-   [RFC2460]  Deering, S. and R. Hinden, "Internet Protocol, Version 6
-              (IPv6) Specification", RFC 2460, DOI 10.17487/RFC2460,
-              December 1998, <http://www.rfc-editor.org/info/rfc2460>.
-
-   [RFC4303]  Kent, S., "IP Encapsulating Security Payload (ESP)",
-              RFC 4303, DOI 10.17487/RFC4303, December 2005,
-              <http://www.rfc-editor.org/info/rfc4303>.
-
-   [RFC5095]  Abley, J., Savola, P., and G. Neville-Neil, "Deprecation
-              of Type 0 Routing Headers in IPv6", RFC 5095,
-              DOI 10.17487/RFC5095, December 2007,
-              <http://www.rfc-editor.org/info/rfc5095>.
-
-   [RFC6407]  Weis, B., Rowles, S., and T. Hardjono, "The Group Domain
-              of Interpretation", RFC 6407, DOI 10.17487/RFC6407,
-              October 2011, <http://www.rfc-editor.org/info/rfc6407>.
-
-10.2.  Informative References
-
-   [I-D.ietf-isis-segment-routing-extensions]
-              Previdi, S., Filsfils, C., Bashandy, A., Gredler, H.,
-              Litkowski, S., Decraene, B., and j. jefftant@gmail.com,
-              "IS-IS Extensions for Segment Routing", draft-ietf-isis-
-              segment-routing-extensions-09 (work in progress), October
-              2016.
-
-   [I-D.ietf-ospf-ospfv3-segment-routing-extensions]
-              Psenak, P., Previdi, S., Filsfils, C., Gredler, H.,
-              Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3
-              Extensions for Segment Routing", draft-ietf-ospf-ospfv3-
-              segment-routing-extensions-07 (work in progress), October
-              2016.
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 25]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   [I-D.ietf-spring-ipv6-use-cases]
-              Brzozowski, J., Leddy, J., Townsley, W., Filsfils, C., and
-              R. Maglione, "IPv6 SPRING Use Cases", draft-ietf-spring-
-              ipv6-use-cases-08 (work in progress), January 2017.
-
-   [I-D.ietf-spring-resiliency-use-cases]
-              Filsfils, C., Previdi, S., Decraene, B., and R. Shakir,
-              "Resiliency use cases in SPRING networks", draft-ietf-
-              spring-resiliency-use-cases-08 (work in progress), October
-              2016.
-
-   [I-D.ietf-spring-segment-routing]
-              Filsfils, C., Previdi, S., Decraene, B., Litkowski, S.,
-              and R. Shakir, "Segment Routing Architecture", draft-ietf-
-              spring-segment-routing-10 (work in progress), November
-              2016.
-
-   [I-D.ietf-spring-segment-routing-mpls]
-              Filsfils, C., Previdi, S., Bashandy, A., Decraene, B.,
-              Litkowski, S., Horneffer, M., Shakir, R.,
-              jefftant@gmail.com, j., and E. Crabbe, "Segment Routing
-              with MPLS data plane", draft-ietf-spring-segment-routing-
-              mpls-06 (work in progress), January 2017.
-
-   [RFC1940]  Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D.
-              Zappala, "Source Demand Routing: Packet Format and
-              Forwarding Specification (Version 1)", RFC 1940,
-              DOI 10.17487/RFC1940, May 1996,
-              <http://www.rfc-editor.org/info/rfc1940>.
-
-   [RFC2104]  Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed-
-              Hashing for Message Authentication", RFC 2104,
-              DOI 10.17487/RFC2104, February 1997,
-              <http://www.rfc-editor.org/info/rfc2104>.
-
-   [RFC2827]  Ferguson, P. and D. Senie, "Network Ingress Filtering:
-              Defeating Denial of Service Attacks which employ IP Source
-              Address Spoofing", BCP 38, RFC 2827, DOI 10.17487/RFC2827,
-              May 2000, <http://www.rfc-editor.org/info/rfc2827>.
-
-   [RFC4942]  Davies, E., Krishnan, S., and P. Savola, "IPv6 Transition/
-              Co-existence Security Considerations", RFC 4942,
-              DOI 10.17487/RFC4942, September 2007,
-              <http://www.rfc-editor.org/info/rfc4942>.
-
-
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 26]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   [RFC6554]  Hui, J., Vasseur, JP., Culler, D., and V. Manral, "An IPv6
-              Routing Header for Source Routes with the Routing Protocol
-              for Low-Power and Lossy Networks (RPL)", RFC 6554,
-              DOI 10.17487/RFC6554, March 2012,
-              <http://www.rfc-editor.org/info/rfc6554>.
-
-   [RFC7855]  Previdi, S., Ed., Filsfils, C., Ed., Decraene, B.,
-              Litkowski, S., Horneffer, M., and R. Shakir, "Source
-              Packet Routing in Networking (SPRING) Problem Statement
-              and Requirements", RFC 7855, DOI 10.17487/RFC7855, May
-              2016, <http://www.rfc-editor.org/info/rfc7855>.
-
-Authors' Addresses
-
-   Stefano Previdi (editor)
-   Cisco Systems, Inc.
-   Via Del Serafico, 200
-   Rome  00142
-   Italy
-
-   Email: sprevidi@cisco.com
-
-
-   Clarence Filsfils
-   Cisco Systems, Inc.
-   Brussels
-   BE
-
-   Email: cfilsfil@cisco.com
-
-
-   Brian Field
-   Comcast
-   4100 East Dry Creek Road
-   Centennial, CO  80122
-   US
-
-   Email: Brian_Field@cable.comcast.com
-
-
-   Ida Leung
-   Rogers Communications
-   8200 Dixie Road
-   Brampton, ON  L6T 0C1
-   CA
-
-   Email: Ida.Leung@rci.rogers.com
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 27]
- 
-Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
-
-
-   Jen Linkova
-   Google
-   1600 Amphitheatre Parkway
-   Mountain View, CA 94043
-   US
-
-   Email: furry@google.com
-
-
-   Ebben Aries
-   Facebook
-   US
-
-   Email: exa@fb.com
-
-
-   Tomoya Kosugi
-   NTT
-   3-9-11, Midori-Cho Musashino-Shi,
-   Tokyo  180-8585
-   JP
-
-   Email: kosugi.tomoya@lab.ntt.co.jp
-
-
-   Eric Vyncke
-   Cisco Systems, Inc.
-   De Kleetlaann 6A
-   Diegem  1831
-   Belgium
-
-   Email: evyncke@cisco.com
-
-
-   David Lebrun
-   Universite Catholique de Louvain
-   Place Ste Barbe, 2
-   Louvain-la-Neuve, 1348
-   Belgium
-
-   Email: david.lebrun@uclouvain.be
-
-
-
-
-
-
-
-
-
-
-Previdi, et al.          Expires August 5, 2017                [Page 28]
\ No newline at end of file
diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api
deleted file mode 100644
index 9e900741..00000000
--- a/src/vnet/sr/sr.api
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/** \brief IPv6 SR LocalSID add/del request
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param is_del Boolean of whether its a delete instruction
-    @param localsid_addr IPv6 address of the localsid
-    @param end_psp Boolean of whether decapsulation is allowed in this function
-    @param behavior Type of behavior (function) for this localsid
-    @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
-    @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
-    @param fib_table  FIB table in which we should install the localsid entry
-    @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
-*/
-autoreply define sr_localsid_add_del
-{
-  u32 client_index;
-  u32 context;
-  u8 is_del;
-  u8 localsid_addr[16];
-  u8 end_psp;
-  u8 behavior;
-  u32 sw_if_index;
-  u32 vlan_index;
-  u32 fib_table;
-  u8 nh_addr[16];
-};
-
-/** \brief IPv6 SR policy add
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param bsid is the bindingSID of the SR Policy
-    @param weight is the weight of the sid list. optional.
-    @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation)
-    @param type is the type of the SR policy. (0.Default // 1.Spray)
-    @param fib_table is the VRF where to install the FIB entry for the BSID
-    @param segments is a vector of IPv6 address composing the segment list
-*/
-autoreply define sr_policy_add
-{
-  u32 client_index;
-  u32 context;
-  u8 bsid_addr[16];
-  u32 weight;
-  u8 is_encap;
-  u8 type;
-  u32 fib_table;
-  u8 n_segments;
-  u8 segments[0];
-};
-
-/** \brief IPv6 SR policy modification
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param bsid is the bindingSID of the SR Policy
-    @param sr_policy_index is the index of the SR policy
-    @param fib_table is the VRF where to install the FIB entry for the BSID
-    @param operation is the operation to perform (among the top ones)
-    @param segments is a vector of IPv6 address composing the segment list
-    @param sl_index is the index of the Segment List to modify/delete
-    @param weight is the weight of the sid list. optional.
-    @param is_encap Mode. Encapsulation or SRH insertion.
-*/
-autoreply define sr_policy_mod
-{
-  u32 client_index;
-  u32 context;
-  u8 bsid_addr[16];
-  u32 sr_policy_index;
-  u32 fib_table;
-  u8 operation;
-  u32 sl_index;
-  u32 weight;
-  u8 n_segments;
-  u8 segments[0];
-};
-
-/** \brief IPv6 SR policy deletion
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param bsid is the bindingSID of the SR Policy
-    @param index is the index of the SR policy
-*/
-autoreply define sr_policy_del
-{
-  u32 client_index;
-  u32 context;
-  u8 bsid_addr[16];
-  u32 sr_policy_index;
-};
-
-/** \brief IPv6 SR steering add/del
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param is_del
-    @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
-    @param sr_policy is the index of the SR Policy (alt to bsid)
-    @param table_id is the VRF where to install the FIB entry for the BSID
-    @param prefix is the IPv4/v6 address for L3 traffic type
-    @param mask_width is the mask for L3 traffic type
-    @param sw_if_index is the incoming interface for L2 traffic
-    @param traffic_type describes the type of traffic
-*/
-autoreply define sr_steering_add_del
-{
-  u32 client_index;
-  u32 context;
-  u8 is_del;
-  u8 bsid_addr[16];
-  u32 sr_policy_index;
-  u32 table_id;
-  u8 prefix_addr[16];
-  u32 mask_width;
-  u32 sw_if_index;
-  u8 traffic_type;
-};
-
-/** \brief Dump the list of SR LocalSIDs
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-*/
-/**define sr_localsids_dump
-{
-  u32 client_index;
-  u32 context;
-};*/
-
-/** \brief Details about a single SR LocalSID
-    @param context - returned sender context, to match reply w/ request
-    @param localsid_addr IPv6 address of the localsid
-    @param behavior Type of behavior (function) for this localsid
-    @param end_psp Boolean of whether decapsulation is allowed in this function
-    @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
-    @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
-    @param fib_table  FIB table in which we should install the localsid entry
-    @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
-*/
-/**manual_endian define sr_localsid_details
-{
-  u32 context;
-  u8 localsid_addr[16];
-  u8 behavior;
-  u8 end_psp;
-  u32 sw_if_index;
-  u32 vlan_index;
-  u32 fib_table;
-  u8 nh_addr[16];
-};*/
-
-/*
- * fd.io coding-style-patch-verification: ON
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c
deleted file mode 100755
index 34344fce..00000000
--- a/src/vnet/sr/sr.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * sr.c: ipv6 segment routing
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file
- * @brief Segment Routing initialization
- *
- */
-
-#include <vnet/vnet.h>
-#include <vnet/sr/sr.h>
-#include <vnet/fib/ip6_fib.h>
-#include <vnet/mfib/mfib_table.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/dpo/replicate_dpo.h>
-
-ip6_sr_main_t sr_main;
-
-/**
- * @brief no-op lock function.
- * The lifetime of the SR entry is managed by the control plane
- */
-void
-sr_dpo_lock (dpo_id_t * dpo)
-{
-}
-
-/**
- * @brief no-op unlock function.
- * The lifetime of the SR entry is managed by the control plane
- */
-void
-sr_dpo_unlock (dpo_id_t * dpo)
-{
-}
-
-/*
-* fd.io coding-style-patch-verification: ON
-*
-* Local Variables:
-* eval: (c-set-style "gnu")
-* End:
-*/
diff --git a/src/vnet/sr/sr.h b/src/vnet/sr/sr.h
deleted file mode 100755
index b832c0fc..00000000
--- a/src/vnet/sr/sr.h
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file
- * @brief Segment Routing data structures definitions
- *
- */
-
-#ifndef included_vnet_sr_h
-#define included_vnet_sr_h
-
-#include <vnet/vnet.h>
-#include <vnet/sr/sr_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <stdlib.h>
-#include <string.h>
-
-#define IPv6_DEFAULT_HEADER_LENGTH 40
-#define IPv6_DEFAULT_HOP_LIMIT 64
-#define IPv6_DEFAULT_MAX_MASK_WIDTH 128
-
-#define SR_BEHAVIOR_END 1
-#define SR_BEHAVIOR_X 2
-#define SR_BEHAVIOR_D_FIRST 3	/* Unused. Separator in between regular and D */
-#define SR_BEHAVIOR_DX2 4
-#define SR_BEHAVIOR_DX6 5
-#define SR_BEHAVIOR_DX4 6
-#define SR_BEHAVIOR_DT6 7
-#define SR_BEHAVIOR_DT4 8
-#define SR_BEHAVIOR_LAST 9	/* Must always be the last one */
-
-#define SR_STEER_L2 2
-#define SR_STEER_IPV4 4
-#define SR_STEER_IPV6 6
-
-#define SR_FUNCTION_SIZE 4
-#define SR_ARGUMENT_SIZE 4
-
-#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
-
-/**
- * @brief SR Segment List (SID list)
- */
-typedef struct
-{
-  ip6_address_t *segments;		/**< SIDs (key) */
-
-  u32 weight;						/**< SID list weight (wECMP / UCMP) */
-
-  u8 *rewrite;					/**< Precomputed rewrite header */
-  u8 *rewrite_bsid;				/**< Precomputed rewrite header for bindingSID */
-
-  dpo_id_t bsid_dpo;				/**< DPO for Encaps/Insert for BSID */
-  dpo_id_t ip6_dpo;				/**< DPO for Encaps/Insert IPv6 */
-  dpo_id_t ip4_dpo;				/**< DPO for Encaps IPv6 */
-} ip6_sr_sl_t;
-
-/* SR policy types */
-#define SR_POLICY_TYPE_DEFAULT 0
-#define SR_POLICY_TYPE_SPRAY 1
-/**
- * @brief SR Policy
- */
-typedef struct
-{
-  u32 *segments_lists;		/**< SID lists indexes (vector) */
-
-  ip6_address_t bsid;			/**< BindingSID (key) */
-
-  u8 type;					/**< Type (default is 0) */
-  /* SR Policy specific DPO                                                                               */
-  /* IF Type = DEFAULT Then Load Balancer DPO among SID lists     */
-  /* IF Type = SPRAY then Spray DPO with all SID lists                    */
-  dpo_id_t bsid_dpo;			/**< SR Policy specific DPO - BSID */
-  dpo_id_t ip4_dpo;			/**< SR Policy specific DPO - IPv6 */
-  dpo_id_t ip6_dpo;			/**< SR Policy specific DPO - IPv4 */
-
-  u32 fib_table;			/**< FIB table */
-
-  u8 is_encap;				/**< Mode (0 is SRH insert, 1 Encaps) */
-} ip6_sr_policy_t;
-
-/**
- * @brief SR LocalSID
- */
-typedef struct
-{
-  ip6_address_t localsid;		/**< LocalSID IPv6 address */
-
-  char end_psp;					/**< Combined with End.PSP? */
-
-  u16 behavior;					/**< Behavior associated to this localsid */
-
-  union
-  {
-    u32 sw_if_index;				/**< xconnect only */
-    u32 vrf_index;				/**< vrf only */
-  };
-
-  u32 fib_table;				/**< FIB table where localsid is registered */
-
-  u32 vlan_index;				/**< VLAN tag (not an index) */
-
-  ip46_address_t next_hop;		/**< Next_hop for xconnect usage only */
-
-  u32 nh_adj;						/**< Next_adj for xconnect usage only */
-
-  void *plugin_mem;				/**< Memory to be used by the plugin callback functions */
-} ip6_sr_localsid_t;
-
-typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid);
-
-/**
- * @brief SR LocalSID behavior registration
- */
-typedef struct
-{
-  u16 sr_localsid_function_number;			/**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */
-
-  u8 *function_name;							/**< Function name. (key). */
-
-  u8 *keyword_str;							/**< Behavior keyword (i.e. End.X) */
-
-  u8 *def_str;								/**< Behavior definition (i.e. Endpoint with cross-connect) */
-
-  u8 *params_str;							/**< Behavior parameters (i.e. <oif> <IP46next_hop>) */
-
-  dpo_type_t dpo;							/**< DPO type registration */
-
-  format_function_t *ls_format;				/**< LocalSID format function */
-
-  unformat_function_t *ls_unformat;			/**< LocalSID unformat function */
-
-  sr_plugin_callback_t *creation;			/**< Function within plugin that will be called after localsid creation*/
-
-  sr_plugin_callback_t *removal;			/**< Function within plugin that will be called before localsid removal */
-} sr_localsid_fn_registration_t;
-
-/**
- * @brief Steering db key
- *
- * L3 is IPv4/IPv6 + mask
- * L2 is sf_if_index + vlan
- */
-typedef struct
-{
-  union
-  {
-    struct
-    {
-      ip46_address_t prefix;			/**< IP address of the prefix */
-      u32 mask_width;					/**< Mask width of the prefix */
-      u32 fib_table;					/**< VRF of the prefix */
-    } l3;
-    struct
-    {
-      u32 sw_if_index;					/**< Incoming software interface */
-    } l2;
-  };
-  u8 traffic_type;					/**< Traffic type (IPv4, IPv6, L2) */
-  u8 padding[3];
-} sr_steering_key_t;
-
-typedef struct
-{
-  sr_steering_key_t classify;		/**< Traffic classification */
-  u32 sr_policy;					/**< SR Policy index */
-} ip6_sr_steering_policy_t;
-
-/**
- * @brief Segment Routing main datastructure
- */
-typedef struct
-{
-  /* L2-input -> SR rewrite next index */
-  u32 l2_sr_policy_rewrite_index;
-
-  /* SR SID lists */
-  ip6_sr_sl_t *sid_lists;
-
-  /* SR policies */
-  ip6_sr_policy_t *sr_policies;
-
-  /* Hash table mapping BindingSID to SR policy */
-  mhash_t sr_policies_index_hash;
-
-  /* Pool of SR localsid instances */
-  ip6_sr_localsid_t *localsids;
-
-  /* Hash table mapping LOC:FUNC to SR LocalSID instance */
-  mhash_t sr_localsids_index_hash;
-
-  /* Pool of SR steer policies instances */
-  ip6_sr_steering_policy_t *steer_policies;
-
-  /* Hash table mapping steering rules to SR steer instance */
-  mhash_t sr_steer_policies_hash;
-
-  /* L2 steering ifaces - sr_policies */
-  u32 *sw_iface_sr_policies;
-
-  /* Spray DPO */
-  dpo_type_t sr_pr_spray_dpo_type;
-
-  /* Plugin functions */
-  sr_localsid_fn_registration_t *plugin_functions;
-
-  /* Find plugin function by name */
-  uword *plugin_functions_by_key;
-
-  /* Counters */
-  vlib_combined_counter_main_t sr_ls_valid_counters;
-  vlib_combined_counter_main_t sr_ls_invalid_counters;
-
-  /* SR Policies FIBs */
-  u32 fib_table_ip6;
-  u32 fib_table_ip4;
-
-  /* convenience */
-  vlib_main_t *vlib_main;
-  vnet_main_t *vnet_main;
-} ip6_sr_main_t;
-
-ip6_sr_main_t sr_main;
-
-extern vlib_node_registration_t sr_policy_rewrite_encaps_node;
-extern vlib_node_registration_t sr_policy_rewrite_insert_node;
-extern vlib_node_registration_t sr_localsid_node;
-extern vlib_node_registration_t sr_localsid_d_node;
-
-void sr_dpo_lock (dpo_id_t * dpo);
-void sr_dpo_unlock (dpo_id_t * dpo);
-
-int sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name,
-				   u8 * keyword_str, u8 * def_str,
-				   u8 * params_str, dpo_type_t * dpo,
-				   format_function_t * ls_format,
-				   unformat_function_t * ls_unformat,
-				   sr_plugin_callback_t * creation_fn,
-				   sr_plugin_callback_t * removal_fn);
-
-int
-sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
-	       u32 weight, u8 behavior, u32 fib_table, u8 is_encap);
-int
-sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
-	       u8 operation, ip6_address_t * segments, u32 sl_index,
-	       u32 weight);
-int sr_policy_del (ip6_address_t * bsid, u32 index);
-
-int sr_cli_localsid (char is_del, ip6_address_t * localsid_addr,
-		     char end_psp, u8 behavior, u32 sw_if_index,
-		     u32 vlan_index, u32 fib_table, ip46_address_t * nh_addr,
-		     void *ls_plugin_mem);
-
-int
-sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
-		    u32 table_id, ip46_address_t * prefix, u32 mask_width,
-		    u32 sw_if_index, u8 traffic_type);
-
-/**
- * @brief SR rewrite string computation for SRH insertion (inline)
- *
- * @param sl is a vector of IPv6 addresses composing the Segment List
- *
- * @return precomputed rewrite string for SRH insertion
- */
-static inline u8 *
-ip6_sr_compute_rewrite_string_insert (ip6_address_t * sl)
-{
-  ip6_sr_header_t *srh;
-  ip6_address_t *addrp, *this_address;
-  u32 header_length = 0;
-  u8 *rs = NULL;
-
-  header_length = 0;
-  header_length += sizeof (ip6_sr_header_t);
-  header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t);
-
-  vec_validate (rs, header_length - 1);
-
-  srh = (ip6_sr_header_t *) rs;
-  srh->type = ROUTING_HEADER_TYPE_SR;
-  srh->segments_left = vec_len (sl);
-  srh->first_segment = vec_len (sl);
-  srh->length = ((sizeof (ip6_sr_header_t) +
-		  ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1;
-  srh->flags = 0x00;
-  srh->reserved = 0x0000;
-  addrp = srh->segments + vec_len (sl);
-  vec_foreach (this_address, sl)
-  {
-    clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
-    addrp--;
-  }
-  return rs;
-}
-
-
-#endif /* included_vnet_sr_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/sr/sr_api.c b/src/vnet/sr/sr_api.c
deleted file mode 100644
index f4e1c346..00000000
--- a/src/vnet/sr/sr_api.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- *------------------------------------------------------------------
- * sr_api.c - ipv6 segment routing api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vnet/sr/sr.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/feature/feature.h>
-
-#include <vnet/vnet_msg_enum.h>
-
-#define vl_typedefs		/* define message structures */
-#include <vnet/vnet_all_api_h.h>
-#undef vl_typedefs
-
-#define vl_endianfun		/* define message structures */
-#include <vnet/vnet_all_api_h.h>
-#undef vl_endianfun
-
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define vl_printfun
-#include <vnet/vnet_all_api_h.h>
-#undef vl_printfun
-
-#include <vlibapi/api_helper_macros.h>
-
-#define foreach_vpe_api_msg                             \
-_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del)             \
-_(SR_POLICY_DEL, sr_policy_del)                         \
-_(SR_STEERING_ADD_DEL, sr_steering_add_del)
-//_(SR_LOCALSIDS, sr_localsids_dump)
-//_(SR_LOCALSID_BEHAVIORS, sr_localsid_behaviors_dump)
-
-static void vl_api_sr_localsid_add_del_t_handler
-  (vl_api_sr_localsid_add_del_t * mp)
-{
-  vl_api_sr_localsid_add_del_reply_t *rmp;
-  int rv = 0;
-/*
- * int sr_cli_localsid (char is_del, ip6_address_t *localsid_addr,
- *  char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table,
- *  ip46_address_t *nh_addr, void *ls_plugin_mem)
- */
-  rv = sr_cli_localsid (mp->is_del,
-			(ip6_address_t *) & mp->localsid_addr,
-			mp->end_psp,
-			mp->behavior,
-			ntohl (mp->sw_if_index),
-			ntohl (mp->vlan_index),
-			ntohl (mp->fib_table),
-			(ip46_address_t *) & mp->nh_addr, NULL);
-
-  REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY);
-}
-
-static void
-vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp)
-{
-  vl_api_sr_policy_add_reply_t *rmp;
-  ip6_address_t *segments = 0, *seg;
-  ip6_address_t *this_address = (ip6_address_t *) mp->segments;
-
-  int i;
-  for (i = 0; i < mp->n_segments; i++)
-    {
-      vec_add2 (segments, seg, 1);
-      clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
-      this_address++;
-    }
-
-/*
- * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
- *                u32 weight, u8 behavior, u32 fib_table, u8 is_encap)
- */
-  int rv = 0;
-  rv = sr_policy_add ((ip6_address_t *) & mp->bsid_addr,
-		      segments,
-		      ntohl (mp->weight),
-		      mp->type, ntohl (mp->fib_table), mp->is_encap);
-
-  REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY);
-}
-
-static void
-vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp)
-{
-  vl_api_sr_policy_mod_reply_t *rmp;
-
-  ip6_address_t *segments = 0, *seg;
-  ip6_address_t *this_address = (ip6_address_t *) mp->segments;
-
-  int i;
-  for (i = 0; i < mp->n_segments; i++)
-    {
-      vec_add2 (segments, seg, 1);
-      clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
-      this_address++;
-    }
-
-  int rv = 0;
-/*
- * int
- * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
- *               u8 operation, ip6_address_t *segments, u32 sl_index,
- *               u32 weight, u8 is_encap)
- */
-  rv = sr_policy_mod ((ip6_address_t *) & mp->bsid_addr,
-		      ntohl (mp->sr_policy_index),
-		      ntohl (mp->fib_table),
-		      mp->operation,
-		      segments, ntohl (mp->sl_index), ntohl (mp->weight));
-
-  REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
-}
-
-static void
-vl_api_sr_policy_del_t_handler (vl_api_sr_policy_del_t * mp)
-{
-  vl_api_sr_policy_del_reply_t *rmp;
-  int rv = 0;
-/*
- * int
- * sr_policy_del (ip6_address_t *bsid, u32 index)
- */
-  rv = sr_policy_del ((ip6_address_t *) & mp->bsid_addr,
-		      ntohl (mp->sr_policy_index));
-
-  REPLY_MACRO (VL_API_SR_POLICY_DEL_REPLY);
-}
-
-static void vl_api_sr_steering_add_del_t_handler
-  (vl_api_sr_steering_add_del_t * mp)
-{
-  vl_api_sr_steering_add_del_reply_t *rmp;
-  int rv = 0;
-/*
- * int
- * sr_steering_policy(int is_del, ip6_address_t *bsid, u32 sr_policy_index,
- *  u32 table_id, ip46_address_t *prefix, u32 mask_width, u32 sw_if_index,
- *  u8 traffic_type)
- */
-  rv = sr_steering_policy (mp->is_del,
-			   (ip6_address_t *) & mp->bsid_addr,
-			   ntohl (mp->sr_policy_index),
-			   ntohl (mp->table_id),
-			   (ip46_address_t *) & mp->prefix_addr,
-			   ntohl (mp->mask_width),
-			   ntohl (mp->sw_if_index), mp->traffic_type);
-
-  REPLY_MACRO (VL_API_SR_STEERING_ADD_DEL_REPLY);
-}
-
-/*
- * sr_api_hookup
- * Add vpe's API message handlers to the table.
- * vlib has alread mapped shared memory and
- * added the client registration handlers.
- * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
- */
-#define vl_msg_name_crc_list
-#include <vnet/vnet_all_api_h.h>
-#undef vl_msg_name_crc_list
-
-static void
-setup_message_id_table (api_main_t * am)
-{
-#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
-  foreach_vl_msg_name_crc_sr;
-#undef _
-}
-
-static clib_error_t *
-sr_api_hookup (vlib_main_t * vm)
-{
-  api_main_t *am = &api_main;
-
-#define _(N,n)                                                  \
-    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
-                           vl_api_##n##_t_handler,              \
-                           vl_noop_handler,                     \
-                           vl_api_##n##_t_endian,               \
-                           vl_api_##n##_t_print,                \
-                           sizeof(vl_api_##n##_t), 1);
-  foreach_vpe_api_msg;
-#undef _
-
-  /*
-   * Manually register the sr policy add msg, so we trace
-   * enough bytes to capture a typical segment list
-   */
-  vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD,
-			   "sr_policy_add",
-			   vl_api_sr_policy_add_t_handler,
-			   vl_noop_handler,
-			   vl_api_sr_policy_add_t_endian,
-			   vl_api_sr_policy_add_t_print, 256, 1);
-
-  /*
-   * Manually register the sr policy mod msg, so we trace
-   * enough bytes to capture a typical segment list
-   */
-  vl_msg_api_set_handlers (VL_API_SR_POLICY_MOD,
-			   "sr_policy_mod",
-			   vl_api_sr_policy_mod_t_handler,
-			   vl_noop_handler,
-			   vl_api_sr_policy_mod_t_endian,
-			   vl_api_sr_policy_mod_t_print, 256, 1);
-
-  /*
-   * Set up the (msg_name, crc, message-id) table
-   */
-  setup_message_id_table (am);
-
-  return 0;
-}
-
-VLIB_API_INIT_FUNCTION (sr_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/sr/sr_doc.md b/src/vnet/sr/sr_doc.md
deleted file mode 100644
index fd92bdf2..00000000
--- a/src/vnet/sr/sr_doc.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# SRv6: Segment Routing for IPv6    {#srv6_doc}
-
-This is a memo intended to contain documentation of the VPP SRv6 implementation.
-Everything that is not directly obvious should come here.
-For any feedback on content that should be explained please mailto:pcamaril@cisco.com
-
-## Segment Routing
-
-Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior.
-
-Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era.
-
-Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements.
-
-Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design.
-
-Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
-
-**The implementation of Segment Routing in VPP only covers the IPv6 data plane (SRv6).**
-
-## Segment Routing terminology
-
-* Segment Routing Header (SRH): IPv6 routing extension header of type 'Segment Routing'. (draft-ietf-6man-segment-routing-header-05)
-* SegmentID (SID): is an IPv6 address.
-* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse.
-* SR Policy: defines the SRH that will be applied to a packet. A packet steered into an SR policy may either receive the SRH by IPv6 header encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it could be inserted within an existing IPv6 header. An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights.
-* Local SID: is a SID associated with a processing function on the local node, which may go from advancing to the next SID in the SRH, to complex user-defined behaviors. When a FIB lookup, either in the main FIB or in a specific VRF, returns a match on a local SID, the associated function is performed.
-* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with an IPv6 DA corresponding to a BindingSID, then the SR policy will be applied to such packet.
-
-## SRv6 Features in VPP
-
-The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-srv6-network-programming/">SRv6 Network Programming (*draft-filsfils-spring-srv6-network-programming*)</a> defines the SRv6 architecture.
-
-VPP supports the following SRv6 LocalSID functions: End, End.X, End.DX6, End.DT6, End.DX4, End.DT4, End.DX2, End.B6, End.B6.Encaps.
-
-For further information and how to configure each specific function: @subpage srv6_localsid_doc
-
-
-The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-segment-routing-policy/">Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*)</a> defines SR Policies.
-
-VPP supports SRv6 Policies with T.Insert and T.Encaps behaviors.
-
-For further information on how to create SR Policies: @subpage srv6_policy_doc
-
-For further information on how to steer traffic into SR Policies: @subpage srv6_steering_doc
-
-## SRv6 LocalSID development framework
-
-One of the *'key'* concepts about SRv6 is network programmability. This is why an SRv6 LocalSID is associated with an specific function. 
-
-However, the trully way to enable network programmability is allowing any developer **easily** create his own SRv6 LocalSID function. That is the reason why we have added some API calls such that any developer can code his own SRv6 LocalSID behaviors as plugins an add them to the running SRv6 code.
-
-The principle is that the developer only codes the behavior -the graph node-. However all the FIB handling, SR LocalSID instantiation and so on are done by the VPP SRv6 code.
-
-For more information please refer to: @subpage srv6_plugin_doc
diff --git a/src/vnet/sr/sr_localsid.c b/src/vnet/sr/sr_localsid.c
deleted file mode 100755
index 32fc5f82..00000000
--- a/src/vnet/sr/sr_localsid.c
+++ /dev/null
@@ -1,1492 +0,0 @@
-/*
- * sr_localsid.c: ipv6 segment routing Endpoint behaviors
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file
- * @brief Processing of packets with a SRH
- *
- * CLI to define new Segment Routing End processing functions.
- * Graph node to support such functions.
- *
- * Each function associates an SRv6 segment (IPv6 address) with an specific
- * Segment Routing function.
- *
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/sr/sr.h>
-#include <vnet/ip/ip.h>
-#include <vnet/sr/sr_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/fib/ip6_fib.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/adj/adj.h>
-
-#include <vppinfra/error.h>
-#include <vppinfra/elog.h>
-
-/**
- * @brief Dynamically added SR localsid DPO type
- */
-static dpo_type_t sr_localsid_dpo_type;
-static dpo_type_t sr_localsid_d_dpo_type;
-
-/**
- * @brief SR localsid add/del
- *
- * Function to add or delete SR LocalSIDs.
- *
- * @param is_del Boolean of whether its a delete instruction
- * @param localsid_addr IPv6 address of the localsid
- * @param is_decap Boolean of whether decapsulation is allowed in this function
- * @param behavior Type of behavior (function) for this localsid
- * @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
- * @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
- * @param fib_table  FIB table in which we should install the localsid entry
- * @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
- *
- * @return 0 on success, error otherwise.
- */
-int
-sr_cli_localsid (char is_del, ip6_address_t * localsid_addr,
-		 char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index,
-		 u32 fib_table, ip46_address_t * nh_addr, void *ls_plugin_mem)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  uword *p;
-  int rv;
-
-  ip6_sr_localsid_t *ls = 0;
-
-  dpo_id_t dpo = DPO_INVALID;
-
-  /* Search for the item */
-  p = mhash_get (&sm->sr_localsids_index_hash, localsid_addr);
-
-  if (p)
-    {
-      if (is_del)
-	{
-	  /* Retrieve localsid */
-	  ls = pool_elt_at_index (sm->localsids, p[0]);
-	  /* Delete FIB entry */
-	  fib_prefix_t pfx = {
-	    .fp_proto = FIB_PROTOCOL_IP6,
-	    .fp_len = 128,
-	    .fp_addr = {
-			.ip6 = *localsid_addr,
-			}
-	  };
-
-	  fib_table_entry_delete (fib_table_find (FIB_PROTOCOL_IP6,
-						  fib_table),
-				  &pfx, FIB_SOURCE_SR);
-
-	  /* In case it is a Xconnect iface remove the (OIF, NHOP) adj */
-	  if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX6
-	      || ls->behavior == SR_BEHAVIOR_DX4)
-	    adj_unlock (ls->nh_adj);
-
-	  if (ls->behavior >= SR_BEHAVIOR_LAST)
-	    {
-	      sr_localsid_fn_registration_t *plugin = 0;
-	      plugin = pool_elt_at_index (sm->plugin_functions,
-					  ls->behavior - SR_BEHAVIOR_LAST);
-
-	      /* Callback plugin removal function */
-	      rv = plugin->removal (ls);
-	    }
-
-	  /* Delete localsid registry */
-	  pool_put (sm->localsids, ls);
-	  mhash_unset (&sm->sr_localsids_index_hash, localsid_addr, NULL);
-	  return 1;
-	}
-      else			/* create with function already existing; complain */
-	return -1;
-    }
-  else
-    /* delete; localsid does not exist; complain */
-  if (is_del)
-    return -2;
-
-  /* Check whether there exists a FIB entry with such address */
-  fib_prefix_t pfx = {
-    .fp_proto = FIB_PROTOCOL_IP6,
-    .fp_len = 128,
-  };
-
-  pfx.fp_addr.as_u64[0] = localsid_addr->as_u64[0];
-  pfx.fp_addr.as_u64[1] = localsid_addr->as_u64[1];
-
-  /* Lookup the FIB index associated to the table id provided */
-  u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6, fib_table);
-  if (fib_index == ~0)
-    return -3;
-
-  /* Lookup the localsid in such FIB table */
-  fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx);
-  if (FIB_NODE_INDEX_INVALID != fei)
-    return -4;			//There is an entry for such address (the localsid addr)
-
-  /* Create a new localsid registry */
-  pool_get (sm->localsids, ls);
-  memset (ls, 0, sizeof (*ls));
-
-  clib_memcpy (&ls->localsid, localsid_addr, sizeof (ip6_address_t));
-  ls->end_psp = end_psp;
-  ls->behavior = behavior;
-  ls->nh_adj = (u32) ~ 0;
-  ls->fib_table = fib_table;
-  switch (behavior)
-    {
-    case SR_BEHAVIOR_END:
-      break;
-    case SR_BEHAVIOR_X:
-      ls->sw_if_index = sw_if_index;
-      clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t));
-      break;
-    case SR_BEHAVIOR_DX4:
-      ls->sw_if_index = sw_if_index;
-      clib_memcpy (&ls->next_hop.ip4, &nh_addr->ip4, sizeof (ip4_address_t));
-      break;
-    case SR_BEHAVIOR_DX6:
-      ls->sw_if_index = sw_if_index;
-      clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t));
-      break;
-    case SR_BEHAVIOR_DT6:
-      ls->vrf_index = sw_if_index;
-      break;
-    case SR_BEHAVIOR_DX2:
-      ls->sw_if_index = sw_if_index;
-      ls->vlan_index = vlan_index;
-      break;
-    }
-
-  /* Figure out the adjacency magic for Xconnect variants */
-  if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4
-      || ls->behavior == SR_BEHAVIOR_DX6)
-    {
-      adj_index_t nh_adj_index = ADJ_INDEX_INVALID;
-
-      /* Retrieve the adjacency corresponding to the (OIF, next_hop) */
-      if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X)
-	nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
-					    nh_addr, sw_if_index);
-
-      else if (ls->behavior == SR_BEHAVIOR_DX4)
-	nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4,
-					    nh_addr, sw_if_index);
-
-      /* Check for ADJ creation error. If so panic */
-      if (nh_adj_index == ADJ_INDEX_INVALID)
-	{
-	  pool_put (sm->localsids, ls);
-	  return -5;
-	}
-
-      ls->nh_adj = nh_adj_index;
-    }
-
-  /* Set DPO */
-  if (ls->behavior == SR_BEHAVIOR_END || ls->behavior == SR_BEHAVIOR_X)
-    dpo_set (&dpo, sr_localsid_dpo_type, DPO_PROTO_IP6, ls - sm->localsids);
-  else if (ls->behavior > SR_BEHAVIOR_D_FIRST
-	   && ls->behavior < SR_BEHAVIOR_LAST)
-    dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids);
-  else if (ls->behavior >= SR_BEHAVIOR_LAST)
-    {
-      sr_localsid_fn_registration_t *plugin = 0;
-      plugin = pool_elt_at_index (sm->plugin_functions,
-				  ls->behavior - SR_BEHAVIOR_LAST);
-      /* Copy the unformat memory result */
-      ls->plugin_mem = ls_plugin_mem;
-      /* Callback plugin creation function */
-      rv = plugin->creation (ls);
-      if (rv)
-	{
-	  pool_put (sm->localsids, ls);
-	  return -6;
-	}
-      dpo_set (&dpo, plugin->dpo, DPO_PROTO_IP6, ls - sm->localsids);
-    }
-
-  /* Set hash key for searching localsid by address */
-  mhash_set (&sm->sr_localsids_index_hash, localsid_addr, ls - sm->localsids,
-	     NULL);
-
-  fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_SR,
-				   FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
-  dpo_reset (&dpo);
-
-  /* Set counter to zero */
-  vlib_validate_combined_counter (&(sm->sr_ls_valid_counters),
-				  ls - sm->localsids);
-  vlib_validate_combined_counter (&(sm->sr_ls_invalid_counters),
-				  ls - sm->localsids);
-
-  vlib_zero_combined_counter (&(sm->sr_ls_valid_counters),
-			      ls - sm->localsids);
-  vlib_zero_combined_counter (&(sm->sr_ls_invalid_counters),
-			      ls - sm->localsids);
-
-  return 0;
-}
-
-/**
- * @brief SR LocalSID CLI function.
- *
- * @see sr_cli_localsid
- */
-static clib_error_t *
-sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
-			    vlib_cli_command_t * cmd)
-{
-  vnet_main_t *vnm = vnet_get_main ();
-  ip6_sr_main_t *sm = &sr_main;
-  u32 sw_if_index = (u32) ~ 0, vlan_index = (u32) ~ 0, fib_index = 0;
-  int is_del = 0;
-  int end_psp = 0;
-  ip6_address_t resulting_address;
-  ip46_address_t next_hop;
-  char address_set = 0;
-  char behavior = 0;
-  void *ls_plugin_mem = 0;
-
-  int rv;
-
-  memset (&resulting_address, 0, sizeof (ip6_address_t));
-  ip46_address_reset (&next_hop);
-
-  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
-    {
-      if (unformat (input, "del"))
-	is_del = 1;
-      else if (!address_set
-	       && unformat (input, "address %U", unformat_ip6_address,
-			    &resulting_address))
-	address_set = 1;
-      else if (!address_set
-	       && unformat (input, "addr %U", unformat_ip6_address,
-			    &resulting_address))
-	address_set = 1;
-      else if (unformat (input, "fib-table %u", &fib_index));
-      else if (vlan_index == (u32) ~ 0
-	       && unformat (input, "vlan %u", &vlan_index));
-      else if (!behavior && unformat (input, "behavior"))
-	{
-	  if (unformat (input, "end.x %U %U",
-			unformat_vnet_sw_interface, vnm, &sw_if_index,
-			unformat_ip6_address, &next_hop.ip6))
-	    behavior = SR_BEHAVIOR_X;
-	  else if (unformat (input, "end.dx6 %U %U",
-			     unformat_vnet_sw_interface, vnm, &sw_if_index,
-			     unformat_ip6_address, &next_hop.ip6))
-	    behavior = SR_BEHAVIOR_DX6;
-	  else if (unformat (input, "end.dx4 %U %U",
-			     unformat_vnet_sw_interface, vnm, &sw_if_index,
-			     unformat_ip4_address, &next_hop.ip4))
-	    behavior = SR_BEHAVIOR_DX4;
-	  else if (unformat (input, "end.dx2 %U",
-			     unformat_vnet_sw_interface, vnm, &sw_if_index))
-	    behavior = SR_BEHAVIOR_DX2;
-	  else if (unformat (input, "end.dt6 %u", &sw_if_index))
-	    behavior = SR_BEHAVIOR_DT6;
-	  else if (unformat (input, "end.dt4 %u", &sw_if_index))
-	    behavior = SR_BEHAVIOR_DT4;
-	  else
-	    {
-	      /* Loop over all the plugin behavior format functions */
-	      sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0;
-	      sr_localsid_fn_registration_t **plugin_it = 0;
-
-	      /* Create a vector out of the plugin pool as recommended */
-        /* *INDENT-OFF* */
-        pool_foreach (plugin, sm->plugin_functions,
-        {
-          vec_add1 (vec_plugins, plugin);
-        });
-        /* *INDENT-ON* */
-
-	      vec_foreach (plugin_it, vec_plugins)
-	      {
-		if (unformat
-		    (input, "%U", (*plugin_it)->ls_unformat, &ls_plugin_mem))
-		  {
-		    behavior = (*plugin_it)->sr_localsid_function_number;
-		    break;
-		  }
-	      }
-	    }
-
-	  if (!behavior)
-	    {
-	      if (unformat (input, "end"))
-		behavior = SR_BEHAVIOR_END;
-	      else
-		break;
-	    }
-	}
-      else if (!end_psp && unformat (input, "psp"))
-	end_psp = 1;
-      else
-	break;
-    }
-
-  if (!behavior && end_psp)
-    behavior = SR_BEHAVIOR_END;
-
-  if (!address_set)
-    return clib_error_return (0,
-			      "Error: SRv6 LocalSID address is mandatory.");
-  if (!is_del && !behavior)
-    return clib_error_return (0,
-			      "Error: SRv6 LocalSID behavior is mandatory.");
-  if (vlan_index != (u32) ~ 0)
-    return clib_error_return (0,
-			      "Error: SRv6 End.DX2 with rewrite VLAN tag not supported by now.");
-  if (end_psp && !(behavior == SR_BEHAVIOR_END || behavior == SR_BEHAVIOR_X))
-    return clib_error_return (0,
-			      "Error: SRv6 PSP only compatible with End and End.X");
-
-  rv = sr_cli_localsid (is_del, &resulting_address, end_psp, behavior,
-			sw_if_index, vlan_index, fib_index, &next_hop,
-			ls_plugin_mem);
-
-  switch (rv)
-    {
-    case 0:
-      break;
-    case 1:
-      return 0;
-    case -1:
-      return clib_error_return (0,
-				"Identical localsid already exists. Requested localsid not created.");
-    case -2:
-      return clib_error_return (0,
-				"The requested localsid could not be deleted. SR localsid not found");
-    case -3:
-      return clib_error_return (0, "FIB table %u does not exist", fib_index);
-    case -4:
-      return clib_error_return (0, "There is already one FIB entry for the"
-				"requested localsid non segment routing related");
-    case -5:
-      return clib_error_return (0,
-				"Could not create ARP/ND entry for such next_hop. Internal error.");
-    case -6:
-      return clib_error_return (0,
-				"Error on the plugin based localsid creation.");
-    default:
-      return clib_error_return (0, "BUG: sr localsid returns %d", rv);
-    }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (sr_localsid_command, static) = {
-  .path = "sr localsid",
-  .short_help = "sr localsid (del) address XX:XX::YY:YY"
-      "(fib-table 8) behavior STRING",
-  .long_help =
-    "Create SR LocalSID and binds it to a particular behavior\n"
-    "Arguments:\n"
-    "\tlocalSID IPv6_addr(128b)   LocalSID IPv6 address\n"
-    "\t(fib-table X)              Optional. VRF where to install SRv6 localsid\n"
-    "\tbehavior STRING            Specifies the behavior\n"
-    "\n\tBehaviors:\n"
-    "\tEnd\t-> Endpoint.\n"
-    "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n"
-    "\t\tParameters: '<iface> <ip6_next_hop>'\n"
-    "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n"
-    "\t\tParameters: '<iface>'\n"
-    "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n"
-    "\t\tParameters: '<iface> <ip6_next_hop>'\n"
-    "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n"
-    "\t\tParameters: '<iface> <ip4_next_hop>'\n"
-    "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n"
-    "\t\tParameters: '<ip6_fib_table>'\n"
-    "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n"
-    "\t\tParameters: '<ip4_fib_table>'\n",
-  .function = sr_cli_localsid_command_fn,
-};
-/* *INDENT-ON* */
-
-/**
- * @brief CLI function to 'show' all SR LocalSIDs on console.
- */
-static clib_error_t *
-show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
-			     vlib_cli_command_t * cmd)
-{
-  vnet_main_t *vnm = vnet_get_main ();
-  ip6_sr_main_t *sm = &sr_main;
-  ip6_sr_localsid_t **localsid_list = 0;
-  ip6_sr_localsid_t *ls;
-  int i;
-
-  vlib_cli_output (vm, "SRv6 - My LocalSID Table:");
-  vlib_cli_output (vm, "=========================");
-  /* *INDENT-OFF* */
-  pool_foreach (ls, sm->localsids, ({ vec_add1 (localsid_list, ls); }));
-  /* *INDENT-ON* */
-  for (i = 0; i < vec_len (localsid_list); i++)
-    {
-      ls = localsid_list[i];
-      switch (ls->behavior)
-	{
-	case SR_BEHAVIOR_END:
-	  vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd",
-			   format_ip6_address, &ls->localsid);
-	  break;
-	case SR_BEHAVIOR_X:
-	  vlib_cli_output (vm,
-			   "\tAddress: \t%U\n\tBehavior: \tX (Endpoint with Layer-3 cross-connect)"
-			   "\n\tIface:  \t%U\n\tNext hop: \t%U",
-			   format_ip6_address, &ls->localsid,
-			   format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
-			   format_ip6_address, &ls->next_hop.ip6);
-	  break;
-	case SR_BEHAVIOR_DX4:
-	  vlib_cli_output (vm,
-			   "\tAddress: \t%U\n\tBehavior: \tDX4 (Endpoint with decapsulation and IPv4 cross-connect)"
-			   "\n\tIface:  \t%U\n\tNext hop: \t%U",
-			   format_ip6_address, &ls->localsid,
-			   format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
-			   format_ip4_address, &ls->next_hop.ip4);
-	  break;
-	case SR_BEHAVIOR_DX6:
-	  vlib_cli_output (vm,
-			   "\tAddress: \t%U\n\tBehavior: \tDX6 (Endpoint with decapsulation and IPv6 cross-connect)"
-			   "\n\tIface:  \t%U\n\tNext hop: \t%U",
-			   format_ip6_address, &ls->localsid,
-			   format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
-			   format_ip6_address, &ls->next_hop.ip6);
-	  break;
-	case SR_BEHAVIOR_DX2:
-	  if (ls->vlan_index == (u32) ~ 0)
-	    vlib_cli_output (vm,
-			     "\tAddress: \t%U\n\tBehavior: \tDX2 (Endpoint with decapulation and Layer-2 cross-connect)"
-			     "\n\tIface:  \t%U", format_ip6_address,
-			     &ls->localsid, format_vnet_sw_if_index_name, vnm,
-			     ls->sw_if_index);
-	  else
-	    vlib_cli_output (vm,
-			     "Unsupported yet. (DX2 with egress VLAN rewrite)");
-	  break;
-	case SR_BEHAVIOR_DT6:
-	  vlib_cli_output (vm,
-			   "\tAddress: \t%U\n\tBehavior: \tDT6 (Endpoint with decapsulation and specific IPv6 table lookup)"
-			   "\n\tTable: %u", format_ip6_address, &ls->localsid,
-			   ls->fib_table);
-	  break;
-	case SR_BEHAVIOR_DT4:
-	  vlib_cli_output (vm,
-			   "\tAddress: \t%U\n\tBehavior: \tDT4 (Endpoint with decapsulation and specific IPv4 table lookup)"
-			   "\n\tTable: \t%u", format_ip6_address,
-			   &ls->localsid, ls->fib_table);
-	  break;
-	default:
-	  if (ls->behavior >= SR_BEHAVIOR_LAST)
-	    {
-	      sr_localsid_fn_registration_t *plugin =
-		pool_elt_at_index (sm->plugin_functions,
-				   ls->behavior - SR_BEHAVIOR_LAST);
-
-	      vlib_cli_output (vm, "\tAddress: \t%U\n"
-			       "\tBehavior: \t%s (%s)\n\t%U",
-			       format_ip6_address, &ls->localsid,
-			       plugin->keyword_str, plugin->def_str,
-			       plugin->ls_format, ls->plugin_mem);
-	    }
-	  else
-	    //Should never get here...
-	    vlib_cli_output (vm, "Internal error");
-	  break;
-	}
-      if (ls->end_psp)
-	vlib_cli_output (vm, "\tPSP: \tTrue\n");
-
-      /* Print counters */
-      vlib_counter_t valid, invalid;
-      vlib_get_combined_counter (&(sm->sr_ls_valid_counters), i, &valid);
-      vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), i, &invalid);
-      vlib_cli_output (vm, "\tGood traffic: \t[%Ld packets : %Ld bytes]\n",
-		       valid.packets, valid.bytes);
-      vlib_cli_output (vm, "\tBad traffic:  \t[%Ld packets : %Ld bytes]\n",
-		       invalid.packets, invalid.bytes);
-      vlib_cli_output (vm, "--------------------");
-    }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_sr_localsid_command, static) = {
-  .path = "show sr localsids",
-  .short_help = "show sr localsids",
-  .function = show_sr_localsid_command_fn,
-};
-/* *INDENT-ON* */
-
-/**
- * @brief Function to 'clear' ALL SR localsid counters
- */
-static clib_error_t *
-clear_sr_localsid_counters_command_fn (vlib_main_t * vm,
-				       unformat_input_t * input,
-				       vlib_cli_command_t * cmd)
-{
-  ip6_sr_main_t *sm = &sr_main;
-
-  vlib_clear_combined_counters (&(sm->sr_ls_valid_counters));
-  vlib_clear_combined_counters (&(sm->sr_ls_invalid_counters));
-
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = {
-  .path = "clear sr localsid counters",
-  .short_help = "clear sr localsid counters",
-  .function = clear_sr_localsid_counters_command_fn,
-};
-/* *INDENT-ON* */
-
-/************************ SR LocalSID graphs node ****************************/
-/**
- * @brief SR localsid node trace
- */
-typedef struct
-{
-  u32 localsid_index;
-  ip6_address_t src, out_dst;
-  u8 sr[256];
-  u8 num_segments;
-  u8 segments_left;
-  //With SRv6 header update include flags here.
-} sr_localsid_trace_t;
-
-#define foreach_sr_localsid_error                                   \
-_(NO_INNER_HEADER, "(SR-Error) No inner IP header")                 \
-_(NO_MORE_SEGMENTS, "(SR-Error) No more segments")                  \
-_(NO_SRH, "(SR-Error) No SR header")                                \
-_(NO_PSP, "(SR-Error) PSP Not available (segments left > 0)")       \
-_(NOT_LS, "(SR-Error) Decaps not available (segments left > 0)")    \
-_(L2, "(SR-Error) SRv6 decapsulated a L2 frame without dest")
-
-typedef enum
-{
-#define _(sym,str) SR_LOCALSID_ERROR_##sym,
-  foreach_sr_localsid_error
-#undef _
-    SR_LOCALSID_N_ERROR,
-} sr_localsid_error_t;
-
-static char *sr_localsid_error_strings[] = {
-#define _(sym,string) string,
-  foreach_sr_localsid_error
-#undef _
-};
-
-#define foreach_sr_localsid_next        \
-_(ERROR, "error-drop")                  \
-_(IP6_LOOKUP, "ip6-lookup")             \
-_(IP4_LOOKUP, "ip4-lookup")             \
-_(IP6_REWRITE, "ip6-rewrite")           \
-_(IP4_REWRITE, "ip4-rewrite")           \
-_(INTERFACE_OUTPUT, "interface-output")
-
-typedef enum
-{
-#define _(s,n) SR_LOCALSID_NEXT_##s,
-  foreach_sr_localsid_next
-#undef _
-    SR_LOCALSID_N_NEXT,
-} sr_localsid_next_t;
-
-/**
- * @brief SR LocalSID graph node trace function
- *
- * @see sr_localsid
- */
-u8 *
-format_sr_localsid_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  ip6_sr_main_t *sm = &sr_main;
-  sr_localsid_trace_t *t = va_arg (*args, sr_localsid_trace_t *);
-
-  ip6_sr_localsid_t *ls =
-    pool_elt_at_index (sm->localsids, t->localsid_index);
-
-  s =
-    format (s, "SR-LOCALSID:\n\tLocalsid: %U\n", format_ip6_address,
-	    &ls->localsid);
-  switch (ls->behavior)
-    {
-    case SR_BEHAVIOR_END:
-      s = format (s, "\tBehavior: End\n");
-      break;
-    case SR_BEHAVIOR_DX6:
-      s = format (s, "\tBehavior: Decapsulation with IPv6 L3 xconnect\n");
-      break;
-    case SR_BEHAVIOR_DX4:
-      s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n");
-      break;
-    case SR_BEHAVIOR_X:
-      s = format (s, "\tBehavior: IPv6 L3 xconnect\n");
-      break;
-    case SR_BEHAVIOR_DT6:
-      s = format (s, "\tBehavior: Decapsulation with IPv6 Table lookup\n");
-      break;
-    case SR_BEHAVIOR_DT4:
-      s = format (s, "\tBehavior: Decapsulation with IPv4 Table lookup\n");
-      break;
-    case SR_BEHAVIOR_DX2:
-      s = format (s, "\tBehavior: Decapsulation with L2 xconnect\n");
-      break;
-    default:
-      s = format (s, "\tBehavior: defined in plugin\n");	//TODO
-      break;
-    }
-  if (t->num_segments != 0xFF)
-    {
-      if (t->num_segments > 0)
-	{
-	  s = format (s, "\tSegments left: %d\n", t->num_segments);
-	  s = format (s, "\tSID list: [in ietf order]");
-	  int i = 0;
-	  for (i = 0; i < t->num_segments; i++)
-	    {
-	      s = format (s, "\n\t-> %U", format_ip6_address,
-			  (ip6_address_t *) & t->sr[i *
-						    sizeof (ip6_address_t)]);
-	    }
-	}
-    }
-  return s;
-}
-
-/**
- * @brief Function doing End processing.
- */
-static_always_inline void
-end_srh_processing (vlib_node_runtime_t * node,
-		    vlib_buffer_t * b0,
-		    ip6_header_t * ip0,
-		    ip6_sr_header_t * sr0,
-		    ip6_sr_localsid_t * ls0, u32 * next0)
-{
-  ip6_address_t *new_dst0;
-
-  if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
-    {
-      if (PREDICT_TRUE (sr0->segments_left != 0))
-	{
-	  sr0->segments_left -= 1;
-	  new_dst0 = (ip6_address_t *) (sr0->segments);
-	  new_dst0 += sr0->segments_left;
-	  ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
-	  ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
-
-	  if (ls0->behavior == SR_BEHAVIOR_X)
-	    {
-	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
-	      *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
-	    }
-	}
-      else
-	{
-	  *next0 = SR_LOCALSID_NEXT_ERROR;
-	  b0->error = node->errors[SR_LOCALSID_ERROR_NO_MORE_SEGMENTS];
-	}
-    }
-  else
-    {
-      /* Error. Routing header of type != SR */
-      *next0 = SR_LOCALSID_NEXT_ERROR;
-      b0->error = node->errors[SR_LOCALSID_ERROR_NO_SRH];
-    }
-}
-
-/*
- * @brief Function doing SRH processing for D* variants
- */
-//FixME. I must crosscheck that next_proto matches the localsid
-static_always_inline void
-end_decaps_srh_processing (vlib_node_runtime_t * node,
-			   vlib_buffer_t * b0,
-			   ip6_header_t * ip0,
-			   ip6_sr_header_t * sr0,
-			   ip6_sr_localsid_t * ls0, u32 * next0)
-{
-  /* Compute the size of the IPv6 header with all Ext. headers */
-  u8 next_proto;
-  ip6_ext_header_t *next_ext_header;
-  u16 total_size = 0;
-
-  next_proto = ip0->protocol;
-  next_ext_header = (void *) (ip0 + 1);
-  total_size = sizeof (ip6_header_t);
-  while (ip6_ext_hdr (next_proto))
-    {
-      total_size += ip6_ext_header_len (next_ext_header);
-      next_proto = next_ext_header->next_hdr;
-      next_ext_header = ip6_ext_next_header (next_ext_header);
-    }
-
-  /* Ensure this is the last segment. Otherwise drop. */
-  if (sr0 && sr0->segments_left != 0)
-    {
-      *next0 = SR_LOCALSID_NEXT_ERROR;
-      b0->error = node->errors[SR_LOCALSID_ERROR_NOT_LS];
-      return;
-    }
-
-  switch (next_proto)
-    {
-    case IP_PROTOCOL_IPV6:
-      /* Encap-End IPv6. Pop outer IPv6 header. */
-      if (ls0->behavior == SR_BEHAVIOR_DX6)
-	{
-	  vlib_buffer_advance (b0, total_size);
-	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
-	  *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
-	  return;
-	}
-      else if (ls0->behavior == SR_BEHAVIOR_DT6)
-	{
-	  vlib_buffer_advance (b0, total_size);
-	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table;
-	  return;
-	}
-      break;
-    case IP_PROTOCOL_IP_IN_IP:
-      /* Encap-End IPv4. Pop outer IPv6 header */
-      if (ls0->behavior == SR_BEHAVIOR_DX4)
-	{
-	  vlib_buffer_advance (b0, total_size);
-	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
-	  *next0 = SR_LOCALSID_NEXT_IP4_REWRITE;
-	  return;
-	}
-      else if (ls0->behavior == SR_BEHAVIOR_DT4)
-	{
-	  vlib_buffer_advance (b0, total_size);
-	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table;
-	  *next0 = SR_LOCALSID_NEXT_IP4_LOOKUP;
-	  return;
-	}
-      break;
-    case IP_PROTOCOL_IP6_NONXT:
-      /* L2 encaps */
-      if (ls0->behavior == SR_BEHAVIOR_DX2)
-	{
-	  vlib_buffer_advance (b0, total_size);
-	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->sw_if_index;
-	  *next0 = SR_LOCALSID_NEXT_INTERFACE_OUTPUT;
-	  return;
-	}
-      break;
-    }
-  *next0 = SR_LOCALSID_NEXT_ERROR;
-  b0->error = node->errors[SR_LOCALSID_ERROR_NO_INNER_HEADER];
-  return;
-}
-
-/**
- * @brief Function doing End processing with PSP
- */
-static_always_inline void
-end_psp_srh_processing (vlib_node_runtime_t * node,
-			vlib_buffer_t * b0,
-			ip6_header_t * ip0,
-			ip6_ext_header_t * prev0,
-			ip6_sr_header_t * sr0,
-			ip6_sr_localsid_t * ls0, u32 * next0)
-{
-  u32 new_l0, sr_len;
-  u64 *copy_dst0, *copy_src0;
-  u32 copy_len_u64s0 = 0;
-  int i;
-
-  if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
-    {
-      if (PREDICT_TRUE (sr0->segments_left == 1))
-	{
-	  ip0->dst_address.as_u64[0] = sr0->segments->as_u64[0];
-	  ip0->dst_address.as_u64[1] = sr0->segments->as_u64[1];
-
-	  /* Remove the SRH taking care of the rest of IPv6 ext header */
-	  if (prev0)
-	    prev0->next_hdr = sr0->protocol;
-	  else
-	    ip0->protocol = sr0->protocol;
-
-	  sr_len = ip6_ext_header_len (sr0);
-	  vlib_buffer_advance (b0, sr_len);
-	  new_l0 = clib_net_to_host_u16 (ip0->payload_length) - sr_len;
-	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
-	  copy_src0 = (u64 *) ip0;
-	  copy_dst0 = copy_src0 + (sr0->length + 1);
-	  /* number of 8 octet units to copy
-	   * By default in absence of extension headers it is equal to length of ip6 header
-	   * With extension headers it number of 8 octet units of ext headers preceding
-	   * SR header
-	   */
-	  copy_len_u64s0 =
-	    (((u8 *) sr0 - (u8 *) ip0) - sizeof (ip6_header_t)) >> 3;
-	  copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
-	  copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
-	  copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
-	  copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
-	  copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
-
-	  for (i = copy_len_u64s0 - 1; i >= 0; i--)
-	    {
-	      copy_dst0[i] = copy_src0[i];
-	    }
-
-	  if (ls0->behavior == SR_BEHAVIOR_X)
-	    {
-	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
-	      *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
-	    }
-	  return;
-	}
-    }
-  /* Error. Routing header of type != SR */
-  *next0 = SR_LOCALSID_NEXT_ERROR;
-  b0->error = node->errors[SR_LOCALSID_ERROR_NO_PSP];
-}
-
-/**
- * @brief SR LocalSID graph node. Supports all default SR Endpoint variants
- */
-static uword
-sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
-		  vlib_frame_t * from_frame)
-{
-  u32 n_left_from, next_index, *from, *to_next;
-  ip6_sr_main_t *sm = &sr_main;
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-  next_index = node->cached_next_index;
-  u32 thread_index = vlib_get_thread_index ();
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      /* Quad - Loop */
-      while (n_left_from >= 8 && n_left_to_next >= 4)
-	{
-	  u32 bi0, bi1, bi2, bi3;
-	  vlib_buffer_t *b0, *b1, *b2, *b3;
-	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
-	  ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
-	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
-	  u32 next0, next1, next2, next3;
-	  next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP;
-	  ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p4, *p5, *p6, *p7;
-
-	    p4 = vlib_get_buffer (vm, from[4]);
-	    p5 = vlib_get_buffer (vm, from[5]);
-	    p6 = vlib_get_buffer (vm, from[6]);
-	    p7 = vlib_get_buffer (vm, from[7]);
-
-	    /* Prefetch the buffer header and packet for the N+4 loop iteration */
-	    vlib_prefetch_buffer_header (p4, LOAD);
-	    vlib_prefetch_buffer_header (p5, LOAD);
-	    vlib_prefetch_buffer_header (p6, LOAD);
-	    vlib_prefetch_buffer_header (p7, LOAD);
-
-	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  to_next[2] = bi2 = from[2];
-	  to_next[3] = bi3 = from[3];
-	  from += 4;
-	  to_next += 4;
-	  n_left_from -= 4;
-	  n_left_to_next -= 4;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-	  b2 = vlib_get_buffer (vm, bi2);
-	  b3 = vlib_get_buffer (vm, bi3);
-
-	  ls0 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ls1 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ls2 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ls3 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-
-	  ip0 = vlib_buffer_get_current (b0);
-	  ip1 = vlib_buffer_get_current (b1);
-	  ip2 = vlib_buffer_get_current (b2);
-	  ip3 = vlib_buffer_get_current (b3);
-
-	  ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
-	  ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE);
-	  ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE);
-	  ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE);
-
-	  end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0);
-	  end_decaps_srh_processing (node, b1, ip1, sr1, ls1, &next1);
-	  end_decaps_srh_processing (node, b2, ip2, sr2, ls2, &next2);
-	  end_decaps_srh_processing (node, b3, ip3, sr3, ls3, &next3);
-
-	  //TODO: trace.
-
-	  vlib_increment_combined_counter
-	    (((next0 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b0));
-
-	  vlib_increment_combined_counter
-	    (((next1 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b1));
-
-	  vlib_increment_combined_counter
-	    (((next2 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b2));
-
-	  vlib_increment_combined_counter
-	    (((next3 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b3));
-
-	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, bi1, bi2, bi3,
-					   next0, next1, next2, next3);
-	}
-
-      /* Single loop for potentially the last three packets */
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  ip6_header_t *ip0;
-	  ip6_ext_header_t *prev0;
-	  ip6_sr_header_t *sr0;
-	  u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP;
-	  ip6_sr_localsid_t *ls0;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  ip0 = vlib_buffer_get_current (b0);
-
-	  /* Lookup the SR End behavior based on IP DA (adj) */
-	  ls0 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-
-	  /* Find SRH as well as previous header */
-	  ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
-
-	  /* SRH processing and End variants */
-	  end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0);
-
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      sr_localsid_trace_t *tr =
-		vlib_add_trace (vm, node, b0, sizeof (*tr));
-	      tr->num_segments = 0;
-	      tr->localsid_index = ls0 - sm->localsids;
-
-	      if (ip0 == vlib_buffer_get_current (b0))
-		{
-		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8,
-			       sizeof (tr->out_dst.as_u8));
-		  if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
-		      && sr0->type == ROUTING_HEADER_TYPE_SR)
-		    {
-		      clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
-		      tr->num_segments =
-			sr0->length * 8 / sizeof (ip6_address_t);
-		      tr->segments_left = sr0->segments_left;
-		    }
-		}
-	      else
-		tr->num_segments = 0xFF;
-	    }
-
-	  /* Increase the counters */
-	  vlib_increment_combined_counter
-	    (((next0 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b0));
-
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, next0);
-	}
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-  return from_frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sr_localsid_d_node) = {
-  .function = sr_localsid_d_fn,
-  .name = "sr-localsid-d",
-  .vector_size = sizeof (u32),
-  .format_trace = format_sr_localsid_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = SR_LOCALSID_N_ERROR,
-  .error_strings = sr_localsid_error_strings,
-  .n_next_nodes = SR_LOCALSID_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [SR_LOCALSID_NEXT_##s] = n,
-    foreach_sr_localsid_next
-#undef _
-  },
-};
-/* *INDENT-ON* */
-
-/**
- * @brief SR LocalSID graph node. Supports all default SR Endpoint variants
- */
-static uword
-sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
-		vlib_frame_t * from_frame)
-{
-  u32 n_left_from, next_index, *from, *to_next;
-  ip6_sr_main_t *sm = &sr_main;
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-  next_index = node->cached_next_index;
-  u32 thread_index = vlib_get_thread_index ();
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      /* Quad - Loop */
-      while (n_left_from >= 8 && n_left_to_next >= 4)
-	{
-	  u32 bi0, bi1, bi2, bi3;
-	  vlib_buffer_t *b0, *b1, *b2, *b3;
-	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
-	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
-	  ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
-	  u32 next0, next1, next2, next3;
-	  next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP;
-	  ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p4, *p5, *p6, *p7;
-
-	    p4 = vlib_get_buffer (vm, from[4]);
-	    p5 = vlib_get_buffer (vm, from[5]);
-	    p6 = vlib_get_buffer (vm, from[6]);
-	    p7 = vlib_get_buffer (vm, from[7]);
-
-	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
-	    vlib_prefetch_buffer_header (p4, LOAD);
-	    vlib_prefetch_buffer_header (p5, LOAD);
-	    vlib_prefetch_buffer_header (p6, LOAD);
-	    vlib_prefetch_buffer_header (p7, LOAD);
-
-	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  to_next[2] = bi2 = from[2];
-	  to_next[3] = bi3 = from[3];
-	  from += 4;
-	  to_next += 4;
-	  n_left_from -= 4;
-	  n_left_to_next -= 4;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-	  b2 = vlib_get_buffer (vm, bi2);
-	  b3 = vlib_get_buffer (vm, bi3);
-
-	  ip0 = vlib_buffer_get_current (b0);
-	  ip1 = vlib_buffer_get_current (b1);
-	  ip2 = vlib_buffer_get_current (b2);
-	  ip3 = vlib_buffer_get_current (b3);
-
-	  ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
-	  ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE);
-	  ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE);
-	  ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE);
-
-	  ls0 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ls1 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ls2 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ls3 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-
-	  if (ls0->end_psp)
-	    end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0);
-	  else
-	    end_srh_processing (node, b0, ip0, sr0, ls0, &next0);
-
-	  if (ls1->end_psp)
-	    end_psp_srh_processing (node, b1, ip1, prev1, sr1, ls1, &next1);
-	  else
-	    end_srh_processing (node, b1, ip1, sr1, ls1, &next1);
-
-	  if (ls2->end_psp)
-	    end_psp_srh_processing (node, b2, ip2, prev2, sr2, ls2, &next2);
-	  else
-	    end_srh_processing (node, b2, ip2, sr2, ls2, &next2);
-
-	  if (ls3->end_psp)
-	    end_psp_srh_processing (node, b3, ip3, prev3, sr3, ls3, &next3);
-	  else
-	    end_srh_processing (node, b3, ip3, sr3, ls3, &next3);
-
-	  //TODO: proper trace.
-
-	  vlib_increment_combined_counter
-	    (((next0 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b0));
-
-	  vlib_increment_combined_counter
-	    (((next1 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b1));
-
-	  vlib_increment_combined_counter
-	    (((next2 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b2));
-
-	  vlib_increment_combined_counter
-	    (((next3 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b3));
-
-	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, bi1, bi2, bi3,
-					   next0, next1, next2, next3);
-	}
-
-      /* Single loop for potentially the last three packets */
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  ip6_header_t *ip0 = 0;
-	  ip6_ext_header_t *prev0;
-	  ip6_sr_header_t *sr0;
-	  u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP;
-	  ip6_sr_localsid_t *ls0;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  ip0 = vlib_buffer_get_current (b0);
-	  ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
-
-	  /* Lookup the SR End behavior based on IP DA (adj) */
-	  ls0 =
-	    pool_elt_at_index (sm->localsids,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-
-	  /* SRH processing */
-	  if (ls0->end_psp)
-	    end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0);
-	  else
-	    end_srh_processing (node, b0, ip0, sr0, ls0, &next0);
-
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      sr_localsid_trace_t *tr =
-		vlib_add_trace (vm, node, b0, sizeof (*tr));
-	      tr->num_segments = 0;
-	      tr->localsid_index = ls0 - sm->localsids;
-
-	      if (ip0 == vlib_buffer_get_current (b0))
-		{
-		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8,
-			       sizeof (tr->out_dst.as_u8));
-		  if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
-		      && sr0->type == ROUTING_HEADER_TYPE_SR)
-		    {
-		      clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
-		      tr->num_segments =
-			sr0->length * 8 / sizeof (ip6_address_t);
-		      tr->segments_left = sr0->segments_left;
-		    }
-		}
-	      else
-		{
-		  tr->num_segments = 0xFF;
-		}
-	    }
-
-	  vlib_increment_combined_counter
-	    (((next0 ==
-	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
-	     1, vlib_buffer_length_in_chain (vm, b0));
-
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, next0);
-	}
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-  return from_frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sr_localsid_node) = {
-  .function = sr_localsid_fn,
-  .name = "sr-localsid",
-  .vector_size = sizeof (u32),
-  .format_trace = format_sr_localsid_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = SR_LOCALSID_N_ERROR,
-  .error_strings = sr_localsid_error_strings,
-  .n_next_nodes = SR_LOCALSID_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [SR_LOCALSID_NEXT_##s] = n,
-    foreach_sr_localsid_next
-#undef _
-  },
-};
-/* *INDENT-ON* */
-
-static u8 *
-format_sr_dpo (u8 * s, va_list * args)
-{
-  index_t index = va_arg (*args, index_t);
-  CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
-
-  return (format (s, "SR: localsid_index:[%d]", index));
-}
-
-const static dpo_vft_t sr_loc_vft = {
-  .dv_lock = sr_dpo_lock,
-  .dv_unlock = sr_dpo_unlock,
-  .dv_format = format_sr_dpo,
-};
-
-const static char *const sr_loc_ip6_nodes[] = {
-  "sr-localsid",
-  NULL,
-};
-
-const static char *const *const sr_loc_nodes[DPO_PROTO_NUM] = {
-  [DPO_PROTO_IP6] = sr_loc_ip6_nodes,
-};
-
-const static char *const sr_loc_d_ip6_nodes[] = {
-  "sr-localsid-d",
-  NULL,
-};
-
-const static char *const *const sr_loc_d_nodes[DPO_PROTO_NUM] = {
-  [DPO_PROTO_IP6] = sr_loc_d_ip6_nodes,
-};
-
-
-/*************************** SR LocalSID plugins ******************************/
-/**
- * @brief SR LocalSID plugin registry
- */
-int
-sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name,
-			       u8 * keyword_str, u8 * def_str,
-			       u8 * params_str, dpo_type_t * dpo,
-			       format_function_t * ls_format,
-			       unformat_function_t * ls_unformat,
-			       sr_plugin_callback_t * creation_fn,
-			       sr_plugin_callback_t * removal_fn)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  uword *p;
-
-  sr_localsid_fn_registration_t *plugin;
-
-  /* Did this function exist? If so update it */
-  p = hash_get_mem (sm->plugin_functions_by_key, fn_name);
-  if (p)
-    {
-      plugin = pool_elt_at_index (sm->plugin_functions, p[0]);
-    }
-  /* Else create a new one and set hash key */
-  else
-    {
-      pool_get (sm->plugin_functions, plugin);
-      hash_set_mem (sm->plugin_functions_by_key, fn_name,
-		    plugin - sm->plugin_functions);
-    }
-
-  memset (plugin, 0, sizeof (*plugin));
-
-  plugin->sr_localsid_function_number = (plugin - sm->plugin_functions);
-  plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST;
-  plugin->ls_format = ls_format;
-  plugin->ls_unformat = ls_unformat;
-  plugin->creation = creation_fn;
-  plugin->removal = removal_fn;
-  clib_memcpy (&plugin->dpo, dpo, sizeof (dpo_type_t));
-  plugin->function_name = format (0, "%s%c", fn_name, 0);
-  plugin->keyword_str = format (0, "%s%c", keyword_str, 0);
-  plugin->def_str = format (0, "%s%c", def_str, 0);
-  plugin->params_str = format (0, "%s%c", params_str, 0);
-
-  return plugin->sr_localsid_function_number;
-}
-
-/**
- * @brief CLI function to 'show' all available SR LocalSID behaviors
- */
-static clib_error_t *
-show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
-				       unformat_input_t * input,
-				       vlib_cli_command_t * cmd)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  sr_localsid_fn_registration_t *plugin;
-  sr_localsid_fn_registration_t **plugins_vec = 0;
-  int i;
-
-  vlib_cli_output (vm,
-		   "SR LocalSIDs behaviors:\n-----------------------\n\n");
-
-  /* *INDENT-OFF* */
-  pool_foreach (plugin, sm->plugin_functions,
-    ({ vec_add1 (plugins_vec, plugin); }));
-  /* *INDENT-ON* */
-
-  /* Print static behaviors */
-  vlib_cli_output (vm, "Default behaviors:\n"
-		   "\tEnd\t-> Endpoint.\n"
-		   "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n"
-		   "\t\tParameters: '<iface> <ip6_next_hop>'\n"
-		   "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n"
-		   "\t\tParameters: '<iface>'\n"
-		   "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n"
-		   "\t\tParameters: '<iface> <ip6_next_hop>'\n"
-		   "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n"
-		   "\t\tParameters: '<iface> <ip4_next_hop>'\n"
-		   "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n"
-		   "\t\tParameters: '<ip6_fib_table>'\n"
-		   "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n"
-		   "\t\tParameters: '<ip4_fib_table>'\n");
-  vlib_cli_output (vm, "Plugin behaviors:\n");
-  for (i = 0; i < vec_len (plugins_vec); i++)
-    {
-      plugin = plugins_vec[i];
-      vlib_cli_output (vm, "\t%s\t-> %s.\n", plugin->keyword_str,
-		       plugin->def_str);
-      vlib_cli_output (vm, "\t\tParameters: '%s'\n", plugin->params_str);
-    }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = {
-  .path = "show sr localsids behaviors",
-  .short_help = "show sr localsids behaviors",
-  .function = show_sr_localsid_behaviors_command_fn,
-};
-/* *INDENT-ON* */
-
-/**
- * @brief SR LocalSID initialization
- */
-clib_error_t *
-sr_localsids_init (vlib_main_t * vm)
-{
-  /* Init memory for function keys */
-  ip6_sr_main_t *sm = &sr_main;
-  mhash_init (&sm->sr_localsids_index_hash, sizeof (uword),
-	      sizeof (ip6_address_t));
-  /* Init SR behaviors DPO type */
-  sr_localsid_dpo_type = dpo_register_new_type (&sr_loc_vft, sr_loc_nodes);
-  /* Init SR behaviors DPO type */
-  sr_localsid_d_dpo_type =
-    dpo_register_new_type (&sr_loc_vft, sr_loc_d_nodes);
-  /* Init memory for localsid plugins */
-  sm->plugin_functions_by_key = hash_create_string (0, sizeof (uword));
-  return 0;
-}
-
-VLIB_INIT_FUNCTION (sr_localsids_init);
-/*
-* fd.io coding-style-patch-verification: ON
-*
-* Local Variables:
-* eval: (c-set-style "gnu")
-* End:
-*/
diff --git a/src/vnet/sr/sr_localsid.md b/src/vnet/sr/sr_localsid.md
deleted file mode 100644
index 340af4a3..00000000
--- a/src/vnet/sr/sr_localsid.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# SR LocalSIDs    {#srv6_localsid_doc}
-
-A local SID is associated to a Segment Routing behavior -or function- on the current node.
-
-The most basic behavior is called END. It simply activates the next SID in the current packet, by decrementing the Segments Left value and updating the IPv6 DA.
-
-A local END SID is instantiated using the following CLI:
-
-    sr localsid (del) address XX::YY behavior end
-
-This creates a new entry in the main FIB for IPv6 address XX::YY. All packets whose IPv6 DA matches this FIB entry are redirected to the sr-localsid node, where they are processed as described above.
-
-Other examples of local SIDs are the following:
-
-    sr localsid (del) address XX::YY behavior end
-    sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a
-    sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a
-    sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1
-    sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0
-    sr localsid (del) address XX::YY behavior end.dt6 5
-    sr localsid (del) address XX::YY behavior end.dt6 5
-
-Note that all of these behaviors match the definitions of the SRv6 architecture (*draft-filsfils-spring-srv6-network-programming*). Please refer to this document for a detailed description of each behavior.
-
-Note also that you can configure the PSP flavor of the End and End.X behaviors by typing:
-    
-    sr localsid (del) address XX::YY behavior end psp
-    sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a psp
-
-Help on the available local SID behaviors and their usage can be obtained with:
-    
-    help sr localsid
-
-Alternatively they can be obtained using.
-
-    show sr localsids behavior
-
-The difference in between those two commands is that the first one will only display the SR LocalSID behaviors that are built-in VPP, while the latter will display those behaviors plus the ones added with the SR LocalSID Development Framework.
-
-
-VPP keeps a 'My LocalSID Table' where it stores all the SR local SIDs instantiated as well as their parameters. Every time a new local SID is instantiated, a new entry is added to this table. In addition, counters for correctly and incorrectly processed traffic are maintained for each local SID. The counters store both the number of packets and bytes.
-
-The contents of the 'My LocalSID Table' is shown with:
-
-    vpp# show sr localsid
-    SRv6 - My LocalSID Table:
-    =========================
-            Address:        c3::1
-            Behavior:       DX6 (Endpoint with decapsulation and IPv6 cross-connect)
-            Iface:          GigabitEthernet0/5/0
-            Next hop:       b:c3::b
-            Good traffic:   [51277 packets : 5332808 bytes]
-            Bad traffic:    [0 packets : 0 bytes]
-    --------------------
-
-The traffic counters can be reset with:
-
-    vpp# clear sr localsid counters
diff --git a/src/vnet/sr/sr_packet.h b/src/vnet/sr/sr_packet.h
deleted file mode 100755
index 7af4ad4d..00000000
--- a/src/vnet/sr/sr_packet.h
+++ /dev/null
@@ -1,159 +0,0 @@
-#ifndef included_vnet_sr_packet_h
-#define included_vnet_sr_packet_h
-
-#include <vnet/ip/ip.h>
-
-/*
- * ipv6 segment-routing header format
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- *   The Segment Routing Header (SRH) is defined as follows:
- *
- *    0                   1                   2                   3
- *    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *   | Next Header   |  Hdr Ext Len  | Routing Type  | Segments Left |
- *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *   | First Segment |     Flags     |           RESERVED            |
- *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *   |                                                               |
- *   |            Segment List[0] (128 bits IPv6 address)            |
- *   |                                                               |
- *   |                                                               |
- *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *   |                                                               |
- *   |                                                               |
- *                                 ...
- *   |                                                               |
- *   |                                                               |
- *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *   |                                                               |
- *   |            Segment List[n] (128 bits IPv6 address)            |
- *   |                                                               |
- *   |                                                               |
- *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *   //                                                             //
- *   //         Optional Type Length Value objects (variable)       //
- *   //                                                             //
- *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- *   where:
- *
- *   o  Next Header: 8-bit selector.  Identifies the type of header
- *      immediately following the SRH.
- *
- *   o  Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
- *      header in 8-octet units, not including the first 8 octets.
- *
- *   o  Routing Type: TBD, to be assigned by IANA (suggested value: 4).
- *
- *   o  Segments Left.  Defined in [RFC2460], it contains the index, in
- *      the Segment List, of the next segment to inspect.  Segments Left
- *      is decremented at each segment.
- *
- *   o  First Segment: contains the index, in the Segment List, of the
- *      first segment of the path which is in fact the last element of the
- *      Segment List.
- *
- *   o  Flags: 8 bits of flags.  Following flags are defined:
- *
- *         0 1 2 3 4 5 6 7
- *        +-+-+-+-+-+-+-+-+
- *        |U|P|O|A|H|  U  |
- *        +-+-+-+-+-+-+-+-+
- *
- *        U: Unused and for future use.  SHOULD be unset on transmission
- *        and MUST be ignored on receipt.
- *
- *        P-flag: Protected flag.  Set when the packet has been rerouted
- *        through FRR mechanism by an SR endpoint node.
- *
- *        O-flag: OAM flag.  When set, it indicates that this packet is
- *        an operations and management (OAM) packet.
- *
- *        A-flag: Alert flag.  If present, it means important Type Length
- *        Value (TLV) objects are present.  See Section 3.1 for details
- *        on TLVs objects.
- *
- *        H-flag: HMAC flag.  If set, the HMAC TLV is present and is
- *        encoded as the last TLV of the SRH.  In other words, the last
- *        36 octets of the SRH represent the HMAC information.  See
- *        Section 3.1.5 for details on the HMAC TLV.
- *
- *   o  RESERVED: SHOULD be unset on transmission and MUST be ignored on
- *      receipt.
- *
- *   o  Segment List[n]: 128 bit IPv6 addresses representing the nth
- *      segment in the Segment List.  The Segment List is encoded starting
- *      from the last segment of the path.  I.e., the first element of the
- *      segment list (Segment List [0]) contains the last segment of the
- *      path while the last segment of the Segment List (Segment List[n])
- *      contains the first segment of the path.  The index contained in
- *      "Segments Left" identifies the current active segment.
- *
- *   o  Type Length Value (TLV) are described in Section 3.1.
- *
- */
-
-#ifndef IPPROTO_IPV6_ROUTE
-#define IPPROTO_IPV6_ROUTE        43
-#endif
-
-#define ROUTING_HEADER_TYPE_SR    4
-
-typedef struct
-{
-  /* Protocol for next header. */
-  u8 protocol;
-  /*
-   * Length of routing header in 8 octet units,
-   * not including the first 8 octets
-   */
-  u8 length;
-
-  /* Type of routing header; type 4 = segement routing */
-  u8 type;
-
-  /* Next segment in the segment list */
-  u8 segments_left;
-
-  /* Pointer to the first segment in the header */
-  u8 first_segment;
-
-  /* Flag bits */
-#define IP6_SR_HEADER_FLAG_PROTECTED  (0x40)
-#define IP6_SR_HEADER_FLAG_OAM        (0x20)
-#define IP6_SR_HEADER_FLAG_ALERT      (0x10)
-#define IP6_SR_HEADER_FLAG_HMAC       (0x80)
-
-  /* values 0x0, 0x4 - 0x7 are reserved */
-  u8 flags;
-  u16 reserved;
-
-  /* The segment elts */
-  ip6_address_t segments[0];
-} __attribute__ ((packed)) ip6_sr_header_t;
-
-/*
-* fd.io coding-style-patch-verification: ON
-*
-* Local Variables:
-* eval: (c-set-style "gnu")
-* End:
-*/
-
-#endif /* included_vnet_sr_packet_h */
diff --git a/src/vnet/sr/sr_policy.md b/src/vnet/sr/sr_policy.md
deleted file mode 100644
index 521b8461..00000000
--- a/src/vnet/sr/sr_policy.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Creating a SR Policy    {#srv6_policy_doc}
-
-An SR Policy is defined by a Binding SID and a weighted set of Segment Lists.
-
-A new SR policy is created with a first SID list using:
-
-    sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3)
-
-* The weight parameter is only used if more than one SID list is associated with the policy.
-* The fib-table parameter specifies in which table (VRF) the Binding SID is to be installed.
-
-An SR policy is deleted with:
-
-    sr policy del bsid 2001::1
-    sr policy del index 1
-
-The existing SR policies are listed with:
-
-    show sr policies
-
-## Adding/Removing SID Lists from an SR policy
-
-An additional SID list is associated with an existing SR policy with:
-
-    sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3)
-    sr policy mod index 3      add sl next A2:: next B2:: next C2:: (weight 3)
-
-Conversely, a SID list can be removed from an SR policy with:
-
-    sr policy mod bsid 2001::1 del sl index 1
-    sr policy mod index 3      del sl index 1
-
-Note that this cannot be used to remove the last SID list of a policy.
-
-The weight of a SID list can also be modified with:
-
-    sr policy mod bsid 2001::1 mod sl index 1 weight 4
-    sr policy mod index 3      mod sl index 1 weight 4
-
-## SR Policies: Spray policies
-
-Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them.
-
-SID list weights are ignored with this type of policies.
-
-A Spray policy is instantiated by appending the keyword **spray** to a regular SR policy command, as in:
-
-    sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray
-
-Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node.  
-
-## Encapsulation SR policies
-
-In case the user decides to create an SR policy an IPv6 Source Address must be specified for the encapsulated traffic. In order to do so the user might use the following command:
-    
-    set sr encaps source addr XXXX::YYYY
diff --git a/src/vnet/sr/sr_policy_rewrite.c b/src/vnet/sr/sr_policy_rewrite.c
deleted file mode 100755
index c4024070..00000000
--- a/src/vnet/sr/sr_policy_rewrite.c
+++ /dev/null
@@ -1,3227 +0,0 @@
-/*
- * sr_policy_rewrite.c: ipv6 sr policy creation
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file
- * @brief SR policy creation and application
- *
- * Create an SR policy.
- * An SR policy can be either of 'default' type or 'spray' type
- * An SR policy has attached a list of SID lists.
- * In case the SR policy is a default one it will load balance among them.
- * An SR policy has associated a BindingSID.
- * In case any packet arrives with IPv6 DA == BindingSID then the SR policy
- * associated to such bindingSID will be applied to such packet.
- *
- * SR policies can be applied either by using IPv6 encapsulation or
- * SRH insertion. Both methods can be found on this file.
- *
- * Traffic input usually is IPv6 packets. However it is possible to have
- * IPv4 packets or L2 frames. (that are encapsulated into IPv6 with SRH)
- *
- * This file provides the appropiates VPP graph nodes to do any of these
- * methods.
- *
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/sr/sr.h>
-#include <vnet/ip/ip.h>
-#include <vnet/sr/sr_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/fib/ip6_fib.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/dpo/replicate_dpo.h>
-
-#include <vppinfra/error.h>
-#include <vppinfra/elog.h>
-
-/**
- * @brief SR policy rewrite trace
- */
-typedef struct
-{
-  ip6_address_t src, dst;
-} sr_policy_rewrite_trace_t;
-
-/* Graph arcs */
-#define foreach_sr_policy_rewrite_next     \
-_(IP6_LOOKUP, "ip6-lookup")         \
-_(ERROR, "error-drop")
-
-typedef enum
-{
-#define _(s,n) SR_POLICY_REWRITE_NEXT_##s,
-  foreach_sr_policy_rewrite_next
-#undef _
-    SR_POLICY_REWRITE_N_NEXT,
-} sr_policy_rewrite_next_t;
-
-/* SR rewrite errors */
-#define foreach_sr_policy_rewrite_error                     \
-_(INTERNAL_ERROR, "Segment Routing undefined error")        \
-_(BSID_ZERO, "BSID with SL = 0")                            \
-_(COUNTER_TOTAL, "SR steered IPv6 packets")                 \
-_(COUNTER_ENCAP, "SR: Encaps packets")                      \
-_(COUNTER_INSERT, "SR: SRH inserted packets")               \
-_(COUNTER_BSID, "SR: BindingSID steered packets")
-
-typedef enum
-{
-#define _(sym,str) SR_POLICY_REWRITE_ERROR_##sym,
-  foreach_sr_policy_rewrite_error
-#undef _
-    SR_POLICY_REWRITE_N_ERROR,
-} sr_policy_rewrite_error_t;
-
-static char *sr_policy_rewrite_error_strings[] = {
-#define _(sym,string) string,
-  foreach_sr_policy_rewrite_error
-#undef _
-};
-
-/**
- * @brief Dynamically added SR SL DPO type
- */
-static dpo_type_t sr_pr_encaps_dpo_type;
-static dpo_type_t sr_pr_insert_dpo_type;
-static dpo_type_t sr_pr_bsid_encaps_dpo_type;
-static dpo_type_t sr_pr_bsid_insert_dpo_type;
-
-/**
- * @brief IPv6 SA for encapsulated packets
- */
-static ip6_address_t sr_pr_encaps_src;
-
-/******************* SR rewrite set encaps IPv6 source addr *******************/
-/* Note:  This is temporal. We don't know whether to follow this path or
-          take the ip address of a loopback interface or even the OIF         */
-
-static clib_error_t *
-set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input,
-		       vlib_cli_command_t * cmd)
-{
-  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
-    {
-      if (unformat
-	  (input, "addr %U", unformat_ip6_address, &sr_pr_encaps_src))
-	return 0;
-      else
-	return clib_error_return (0, "No address specified");
-    }
-  return clib_error_return (0, "No address specified");
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_sr_src_command, static) = {
-  .path = "set sr encaps source",
-  .short_help = "set sr encaps source addr <ip6_addr>",
-  .function = set_sr_src_command_fn,
-};
-/* *INDENT-ON* */
-
-/*********************** SR rewrite string computation ************************/
-/**
- * @brief SR rewrite string computation for IPv6 encapsulation (inline)
- *
- * @param sl is a vector of IPv6 addresses composing the Segment List
- *
- * @return precomputed rewrite string for encapsulation
- */
-static inline u8 *
-compute_rewrite_encaps (ip6_address_t * sl)
-{
-  ip6_header_t *iph;
-  ip6_sr_header_t *srh;
-  ip6_address_t *addrp, *this_address;
-  u32 header_length = 0;
-  u8 *rs = NULL;
-
-  header_length = 0;
-  header_length += IPv6_DEFAULT_HEADER_LENGTH;
-  if (vec_len (sl) > 1)
-    {
-      header_length += sizeof (ip6_sr_header_t);
-      header_length += vec_len (sl) * sizeof (ip6_address_t);
-    }
-
-  vec_validate (rs, header_length - 1);
-
-  iph = (ip6_header_t *) rs;
-  iph->ip_version_traffic_class_and_flow_label =
-    clib_host_to_net_u32 (0 | ((6 & 0xF) << 28));
-  iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0];
-  iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1];
-  iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH;
-  iph->protocol = IP_PROTOCOL_IPV6;
-  iph->hop_limit = IPv6_DEFAULT_HOP_LIMIT;
-
-  srh = (ip6_sr_header_t *) (iph + 1);
-  iph->protocol = IP_PROTOCOL_IPV6_ROUTE;
-  srh->protocol = IP_PROTOCOL_IPV6;
-  srh->type = ROUTING_HEADER_TYPE_SR;
-  srh->segments_left = vec_len (sl) - 1;
-  srh->first_segment = vec_len (sl) - 1;
-  srh->length = ((sizeof (ip6_sr_header_t) +
-		  (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1;
-  srh->flags = 0x00;
-  srh->reserved = 0x00;
-  addrp = srh->segments + vec_len (sl) - 1;
-  vec_foreach (this_address, sl)
-  {
-    clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
-    addrp--;
-  }
-  iph->dst_address.as_u64[0] = sl->as_u64[0];
-  iph->dst_address.as_u64[1] = sl->as_u64[1];
-  return rs;
-}
-
-/**
- * @brief SR rewrite string computation for SRH insertion (inline)
- *
- * @param sl is a vector of IPv6 addresses composing the Segment List
- *
- * @return precomputed rewrite string for SRH insertion
- */
-static inline u8 *
-compute_rewrite_insert (ip6_address_t * sl)
-{
-  ip6_sr_header_t *srh;
-  ip6_address_t *addrp, *this_address;
-  u32 header_length = 0;
-  u8 *rs = NULL;
-
-  header_length = 0;
-  header_length += sizeof (ip6_sr_header_t);
-  header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t);
-
-  vec_validate (rs, header_length - 1);
-
-  srh = (ip6_sr_header_t *) rs;
-  srh->type = ROUTING_HEADER_TYPE_SR;
-  srh->segments_left = vec_len (sl);
-  srh->first_segment = vec_len (sl);
-  srh->length = ((sizeof (ip6_sr_header_t) +
-		  ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1;
-  srh->flags = 0x00;
-  srh->reserved = 0x0000;
-  addrp = srh->segments + vec_len (sl);
-  vec_foreach (this_address, sl)
-  {
-    clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
-    addrp--;
-  }
-  return rs;
-}
-
-/**
- * @brief SR rewrite string computation for SRH insertion with BSID (inline)
- *
- * @param sl is a vector of IPv6 addresses composing the Segment List
- *
- * @return precomputed rewrite string for SRH insertion with BSID
- */
-static inline u8 *
-compute_rewrite_bsid (ip6_address_t * sl)
-{
-  ip6_sr_header_t *srh;
-  ip6_address_t *addrp, *this_address;
-  u32 header_length = 0;
-  u8 *rs = NULL;
-
-  header_length = 0;
-  header_length += sizeof (ip6_sr_header_t);
-  header_length += vec_len (sl) * sizeof (ip6_address_t);
-
-  vec_validate (rs, header_length - 1);
-
-  srh = (ip6_sr_header_t *) rs;
-  srh->type = ROUTING_HEADER_TYPE_SR;
-  srh->segments_left = vec_len (sl) - 1;
-  srh->first_segment = vec_len (sl) - 1;
-  srh->length = ((sizeof (ip6_sr_header_t) +
-		  (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1;
-  srh->flags = 0x00;
-  srh->reserved = 0x0000;
-  addrp = srh->segments + vec_len (sl) - 1;
-  vec_foreach (this_address, sl)
-  {
-    clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
-    addrp--;
-  }
-  return rs;
-}
-
-/***************************  SR LB helper functions **************************/
-/**
- * @brief Creates a Segment List and adds it to an SR policy
- *
- * Creates a Segment List and adds it to the SR policy. Notice that the SL are
- * not necessarily unique. Hence there might be two Segment List within the
- * same SR Policy with exactly the same segments and same weight.
- *
- * @param sr_policy is the SR policy where the SL will be added
- * @param sl is a vector of IPv6 addresses composing the Segment List
- * @param weight is the weight of the SegmentList (for load-balancing purposes)
- * @param is_encap represents the mode (SRH insertion vs Encapsulation)
- *
- * @return pointer to the just created segment list
- */
-static inline ip6_sr_sl_t *
-create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
-	   u8 is_encap)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  ip6_sr_sl_t *segment_list;
-
-  pool_get (sm->sid_lists, segment_list);
-  memset (segment_list, 0, sizeof (*segment_list));
-
-  vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);
-
-  /* Fill in segment list */
-  segment_list->weight =
-    (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
-  segment_list->segments = vec_dup (sl);
-
-  if (is_encap)
-    {
-      segment_list->rewrite = compute_rewrite_encaps (sl);
-      segment_list->rewrite_bsid = segment_list->rewrite;
-    }
-  else
-    {
-      segment_list->rewrite = compute_rewrite_insert (sl);
-      segment_list->rewrite_bsid = compute_rewrite_bsid (sl);
-    }
-
-  /* Create DPO */
-  dpo_reset (&segment_list->bsid_dpo);
-  dpo_reset (&segment_list->ip6_dpo);
-  dpo_reset (&segment_list->ip4_dpo);
-
-  if (is_encap)
-    {
-      dpo_set (&segment_list->ip6_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP6,
-	       segment_list - sm->sid_lists);
-      dpo_set (&segment_list->ip4_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP4,
-	       segment_list - sm->sid_lists);
-      dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_encaps_dpo_type,
-	       DPO_PROTO_IP6, segment_list - sm->sid_lists);
-    }
-  else
-    {
-      dpo_set (&segment_list->ip6_dpo, sr_pr_insert_dpo_type, DPO_PROTO_IP6,
-	       segment_list - sm->sid_lists);
-      dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_insert_dpo_type,
-	       DPO_PROTO_IP6, segment_list - sm->sid_lists);
-    }
-
-  return segment_list;
-}
-
-/**
- * @brief Updates the Load Balancer after an SR Policy change
- *
- * @param sr_policy is the modified SR Policy
- */
-static inline void
-update_lb (ip6_sr_policy_t * sr_policy)
-{
-  flow_hash_config_t fhc;
-  u32 *sl_index;
-  ip6_sr_sl_t *segment_list;
-  ip6_sr_main_t *sm = &sr_main;
-  load_balance_path_t path;
-  path.path_index = FIB_NODE_INDEX_INVALID;
-  load_balance_path_t *ip4_path_vector = 0;
-  load_balance_path_t *ip6_path_vector = 0;
-  load_balance_path_t *b_path_vector = 0;
-
-  /* In case LB does not exist, create it */
-  if (!dpo_id_is_valid (&sr_policy->bsid_dpo))
-    {
-      fib_prefix_t pfx = {
-	.fp_proto = FIB_PROTOCOL_IP6,
-	.fp_len = 128,
-	.fp_addr = {
-		    .ip6 = sr_policy->bsid,
-		    }
-      };
-
-      /* Add FIB entry for BSID */
-      fhc = fib_table_get_flow_hash_config (sr_policy->fib_table,
-					    dpo_proto_to_fib (DPO_PROTO_IP6));
-
-      dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
-	       load_balance_create (0, DPO_PROTO_IP6, fhc));
-
-      dpo_set (&sr_policy->ip6_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
-	       load_balance_create (0, DPO_PROTO_IP6, fhc));
-
-      /* Update FIB entry's to point to the LB DPO in the main FIB and hidden one */
-      fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6,
-							  sr_policy->fib_table),
-					  &pfx, FIB_SOURCE_SR,
-					  FIB_ENTRY_FLAG_EXCLUSIVE,
-					  &sr_policy->bsid_dpo);
-
-      fib_table_entry_special_dpo_update (sm->fib_table_ip6,
-					  &pfx,
-					  FIB_SOURCE_SR,
-					  FIB_ENTRY_FLAG_EXCLUSIVE,
-					  &sr_policy->ip6_dpo);
-
-      if (sr_policy->is_encap)
-	{
-	  dpo_set (&sr_policy->ip4_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP4,
-		   load_balance_create (0, DPO_PROTO_IP4, fhc));
-
-	  fib_table_entry_special_dpo_update (sm->fib_table_ip4,
-					      &pfx,
-					      FIB_SOURCE_SR,
-					      FIB_ENTRY_FLAG_EXCLUSIVE,
-					      &sr_policy->ip4_dpo);
-	}
-
-    }
-
-  /* Create the LB path vector */
-  //path_vector = vec_new(load_balance_path_t, vec_len(sr_policy->segments_lists));
-  vec_foreach (sl_index, sr_policy->segments_lists)
-  {
-    segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
-    path.path_dpo = segment_list->bsid_dpo;
-    path.path_weight = segment_list->weight;
-    vec_add1 (b_path_vector, path);
-    path.path_dpo = segment_list->ip6_dpo;
-    vec_add1 (ip6_path_vector, path);
-    if (sr_policy->is_encap)
-      {
-	path.path_dpo = segment_list->ip4_dpo;
-	vec_add1 (ip4_path_vector, path);
-      }
-  }
-
-  /* Update LB multipath */
-  load_balance_multipath_update (&sr_policy->bsid_dpo, b_path_vector,
-				 LOAD_BALANCE_FLAG_NONE);
-  load_balance_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector,
-				 LOAD_BALANCE_FLAG_NONE);
-  if (sr_policy->is_encap)
-    load_balance_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector,
-				   LOAD_BALANCE_FLAG_NONE);
-
-  /* Cleanup */
-  vec_free (b_path_vector);
-  vec_free (ip6_path_vector);
-  vec_free (ip4_path_vector);
-
-}
-
-/**
- * @brief Updates the Replicate DPO after an SR Policy change
- *
- * @param sr_policy is the modified SR Policy (type spray)
- */
-static inline void
-update_replicate (ip6_sr_policy_t * sr_policy)
-{
-  u32 *sl_index;
-  ip6_sr_sl_t *segment_list;
-  ip6_sr_main_t *sm = &sr_main;
-  load_balance_path_t path;
-  path.path_index = FIB_NODE_INDEX_INVALID;
-  load_balance_path_t *b_path_vector = 0;
-  load_balance_path_t *ip6_path_vector = 0;
-  load_balance_path_t *ip4_path_vector = 0;
-
-  /* In case LB does not exist, create it */
-  if (!dpo_id_is_valid (&sr_policy->bsid_dpo))
-    {
-      dpo_set (&sr_policy->bsid_dpo, DPO_REPLICATE,
-	       DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6));
-
-      dpo_set (&sr_policy->ip6_dpo, DPO_REPLICATE,
-	       DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6));
-
-      /* Update FIB entry's DPO to point to SR without LB */
-      fib_prefix_t pfx = {
-	.fp_proto = FIB_PROTOCOL_IP6,
-	.fp_len = 128,
-	.fp_addr = {
-		    .ip6 = sr_policy->bsid,
-		    }
-      };
-      fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6,
-							  sr_policy->fib_table),
-					  &pfx, FIB_SOURCE_SR,
-					  FIB_ENTRY_FLAG_EXCLUSIVE,
-					  &sr_policy->bsid_dpo);
-
-      fib_table_entry_special_dpo_update (sm->fib_table_ip6,
-					  &pfx,
-					  FIB_SOURCE_SR,
-					  FIB_ENTRY_FLAG_EXCLUSIVE,
-					  &sr_policy->ip6_dpo);
-
-      if (sr_policy->is_encap)
-	{
-	  dpo_set (&sr_policy->ip4_dpo, DPO_REPLICATE, DPO_PROTO_IP4,
-		   replicate_create (0, DPO_PROTO_IP4));
-
-	  fib_table_entry_special_dpo_update (sm->fib_table_ip4,
-					      &pfx,
-					      FIB_SOURCE_SR,
-					      FIB_ENTRY_FLAG_EXCLUSIVE,
-					      &sr_policy->ip4_dpo);
-	}
-
-    }
-
-  /* Create the replicate path vector */
-  path.path_weight = 1;
-  vec_foreach (sl_index, sr_policy->segments_lists)
-  {
-    segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
-    path.path_dpo = segment_list->bsid_dpo;
-    vec_add1 (b_path_vector, path);
-    path.path_dpo = segment_list->ip6_dpo;
-    vec_add1 (ip6_path_vector, path);
-    if (sr_policy->is_encap)
-      {
-	path.path_dpo = segment_list->ip4_dpo;
-	vec_add1 (ip4_path_vector, path);
-      }
-  }
-
-  /* Update replicate multipath */
-  replicate_multipath_update (&sr_policy->bsid_dpo, b_path_vector);
-  replicate_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector);
-  if (sr_policy->is_encap)
-    replicate_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector);
-}
-
-/******************************* SR rewrite API *******************************/
-/* Three functions for handling sr policies:
- *   -> sr_policy_add
- *   -> sr_policy_del
- *   -> sr_policy_mod
- * All of them are API. CLI function on sr_policy_command_fn                  */
-
-/**
- * @brief Create a new SR policy
- *
- * @param bsid is the bindingSID of the SR Policy
- * @param segments is a vector of IPv6 address composing the segment list
- * @param weight is the weight of the sid list. optional.
- * @param behavior is the behavior of the SR policy. (default//spray)
- * @param fib_table is the VRF where to install the FIB entry for the BSID
- * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion
- *
- * @return 0 if correct, else error
- */
-int
-sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
-	       u32 weight, u8 behavior, u32 fib_table, u8 is_encap)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  ip6_sr_policy_t *sr_policy = 0;
-  uword *p;
-
-  /* Search for existing keys (BSID) */
-  p = mhash_get (&sm->sr_policies_index_hash, bsid);
-  if (p)
-    {
-      /* Add SR policy that already exists; complain */
-      return -12;
-    }
-
-  /* Search collision in FIB entries */
-  /* Explanation: It might be possible that some other entity has already
-   * created a route for the BSID. This in theory is impossible, but in
-   * practise we could see it. Assert it and scream if needed */
-  fib_prefix_t pfx = {
-    .fp_proto = FIB_PROTOCOL_IP6,
-    .fp_len = 128,
-    .fp_addr = {
-		.ip6 = *bsid,
-		}
-  };
-
-  /* Lookup the FIB index associated to the table selected */
-  u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6,
-				  (fib_table != (u32) ~ 0 ? fib_table : 0));
-  if (fib_index == ~0)
-    return -13;
-
-  /* Lookup whether there exists an entry for the BSID */
-  fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx);
-  if (FIB_NODE_INDEX_INVALID != fei)
-    return -12;			//There is an entry for such lookup
-
-  /* Add an SR policy object */
-  pool_get (sm->sr_policies, sr_policy);
-  memset (sr_policy, 0, sizeof (*sr_policy));
-  clib_memcpy (&sr_policy->bsid, bsid, sizeof (ip6_address_t));
-  sr_policy->type = behavior;
-  sr_policy->fib_table = (fib_table != (u32) ~ 0 ? fib_table : 0);	//Is default FIB 0 ?
-  sr_policy->is_encap = is_encap;
-
-  /* Copy the key */
-  mhash_set (&sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies,
-	     NULL);
-
-  /* Create a segment list and add the index to the SR policy */
-  create_sl (sr_policy, segments, weight, is_encap);
-
-  /* If FIB doesnt exist, create them */
-  if (sm->fib_table_ip6 == (u32) ~ 0)
-    {
-      sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
-						     "SRv6 steering of IP6 prefixes through BSIDs");
-      sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
-						     "SRv6 steering of IP4 prefixes through BSIDs");
-    }
-
-  /* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */
-  if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
-    update_lb (sr_policy);
-  else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
-    update_replicate (sr_policy);
-  return 0;
-}
-
-/**
- * @brief Delete a SR policy
- *
- * @param bsid is the bindingSID of the SR Policy
- * @param index is the index of the SR policy
- *
- * @return 0 if correct, else error
- */
-int
-sr_policy_del (ip6_address_t * bsid, u32 index)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  ip6_sr_policy_t *sr_policy = 0;
-  ip6_sr_sl_t *segment_list;
-  u32 *sl_index;
-  uword *p;
-
-  if (bsid)
-    {
-      p = mhash_get (&sm->sr_policies_index_hash, bsid);
-      if (p)
-	sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
-      else
-	return -1;
-    }
-  else
-    {
-      sr_policy = pool_elt_at_index (sm->sr_policies, index);
-      if (!sr_policy)
-	return -1;
-    }
-
-  /* Remove BindingSID FIB entry */
-  fib_prefix_t pfx = {
-    .fp_proto = FIB_PROTOCOL_IP6,
-    .fp_len = 128,
-    .fp_addr = {
-		.ip6 = sr_policy->bsid,
-		}
-    ,
-  };
-
-  fib_table_entry_special_remove (fib_table_find (FIB_PROTOCOL_IP6,
-						  sr_policy->fib_table),
-				  &pfx, FIB_SOURCE_SR);
-
-  fib_table_entry_special_remove (sm->fib_table_ip6, &pfx, FIB_SOURCE_SR);
-
-  if (sr_policy->is_encap)
-    fib_table_entry_special_remove (sm->fib_table_ip4, &pfx, FIB_SOURCE_SR);
-
-  if (dpo_id_is_valid (&sr_policy->bsid_dpo))
-    {
-      dpo_reset (&sr_policy->bsid_dpo);
-      dpo_reset (&sr_policy->ip4_dpo);
-      dpo_reset (&sr_policy->ip6_dpo);
-    }
-
-  /* Clean SID Lists */
-  vec_foreach (sl_index, sr_policy->segments_lists)
-  {
-    segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
-    vec_free (segment_list->segments);
-    vec_free (segment_list->rewrite);
-    vec_free (segment_list->rewrite_bsid);
-    pool_put_index (sm->sid_lists, *sl_index);
-  }
-
-  /* Remove SR policy entry */
-  mhash_unset (&sm->sr_policies_index_hash, &sr_policy->bsid, NULL);
-  pool_put (sm->sr_policies, sr_policy);
-
-  /* If FIB empty unlock it */
-  if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
-    {
-      fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6);
-      fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6);
-      sm->fib_table_ip6 = (u32) ~ 0;
-      sm->fib_table_ip4 = (u32) ~ 0;
-    }
-
-  return 0;
-}
-
-/**
- * @brief Modify an existing SR policy
- *
- * The possible modifications are adding a new Segment List, modifying an
- * existing Segment List (modify the weight only) and delete a given
- * Segment List from the SR Policy.
- *
- * @param bsid is the bindingSID of the SR Policy
- * @param index is the index of the SR policy
- * @param fib_table is the VRF where to install the FIB entry for the BSID
- * @param operation is the operation to perform (among the top ones)
- * @param segments is a vector of IPv6 address composing the segment list
- * @param sl_index is the index of the Segment List to modify/delete
- * @param weight is the weight of the sid list. optional.
- * @param is_encap Mode. Encapsulation or SRH insertion.
- *
- * @return 0 if correct, else error
- */
-int
-sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
-	       u8 operation, ip6_address_t * segments, u32 sl_index,
-	       u32 weight)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  ip6_sr_policy_t *sr_policy = 0;
-  ip6_sr_sl_t *segment_list;
-  u32 *sl_index_iterate;
-  uword *p;
-
-  if (bsid)
-    {
-      p = mhash_get (&sm->sr_policies_index_hash, bsid);
-      if (p)
-	sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
-      else
-	return -1;
-    }
-  else
-    {
-      sr_policy = pool_elt_at_index (sm->sr_policies, index);
-      if (!sr_policy)
-	return -1;
-    }
-
-  if (operation == 1)		/* Add SR List to an existing SR policy */
-    {
-      /* Create the new SL */
-      segment_list =
-	create_sl (sr_policy, segments, weight, sr_policy->is_encap);
-
-      /* Create a new LB DPO */
-      if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
-	update_lb (sr_policy);
-      else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
-	update_replicate (sr_policy);
-    }
-  else if (operation == 2)	/* Delete SR List from an existing SR policy */
-    {
-      /* Check that currently there are more than one SID list */
-      if (vec_len (sr_policy->segments_lists) == 1)
-	return -21;
-
-      /* Check that the SR list does exist and is assigned to the sr policy */
-      vec_foreach (sl_index_iterate, sr_policy->segments_lists)
-	if (*sl_index_iterate == sl_index)
-	break;
-
-      if (*sl_index_iterate != sl_index)
-	return -22;
-
-      /* Remove the lucky SR list that is being kicked out */
-      segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
-      vec_free (segment_list->segments);
-      vec_free (segment_list->rewrite);
-      vec_free (segment_list->rewrite_bsid);
-      pool_put_index (sm->sid_lists, sl_index);
-      vec_del1 (sr_policy->segments_lists,
-		sl_index_iterate - sr_policy->segments_lists);
-
-      /* Create a new LB DPO */
-      if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
-	update_lb (sr_policy);
-      else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
-	update_replicate (sr_policy);
-    }
-  else if (operation == 3)	/* Modify the weight of an existing SR List */
-    {
-      /* Find the corresponding SL */
-      vec_foreach (sl_index_iterate, sr_policy->segments_lists)
-	if (*sl_index_iterate == sl_index)
-	break;
-
-      if (*sl_index_iterate != sl_index)
-	return -32;
-
-      /* Change the weight */
-      segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
-      segment_list->weight = weight;
-
-      /* Update LB */
-      if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
-	update_lb (sr_policy);
-    }
-  else				/* Incorrect op. */
-    return -1;
-
-  return 0;
-}
-
-/**
- * @brief CLI for 'sr policies' command family
- */
-static clib_error_t *
-sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
-		      vlib_cli_command_t * cmd)
-{
-  int rv = -1;
-  char is_del = 0, is_add = 0, is_mod = 0;
-  char policy_set = 0;
-  ip6_address_t bsid, next_address;
-  u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0;
-  u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0;
-  ip6_address_t *segments = 0, *this_seg;
-  u8 operation = 0;
-  char is_encap = 1;
-  char is_spray = 0;
-
-  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
-    {
-      if (!is_add && !is_mod && !is_del && unformat (input, "add"))
-	is_add = 1;
-      else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
-	is_del = 1;
-      else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
-	is_mod = 1;
-      else if (!policy_set
-	       && unformat (input, "bsid %U", unformat_ip6_address, &bsid))
-	policy_set = 1;
-      else if (!is_add && !policy_set
-	       && unformat (input, "index %d", &sr_policy_index))
-	policy_set = 1;
-      else if (unformat (input, "weight %d", &weight));
-      else
-	if (unformat (input, "next %U", unformat_ip6_address, &next_address))
-	{
-	  vec_add2 (segments, this_seg, 1);
-	  clib_memcpy (this_seg->as_u8, next_address.as_u8,
-		       sizeof (*this_seg));
-	}
-      else if (unformat (input, "add sl"))
-	operation = 1;
-      else if (unformat (input, "del sl index %d", &sl_index))
-	operation = 2;
-      else if (unformat (input, "mod sl index %d", &sl_index))
-	operation = 3;
-      else if (fib_table == (u32) ~ 0
-	       && unformat (input, "fib-table %d", &fib_table));
-      else if (unformat (input, "encap"))
-	is_encap = 1;
-      else if (unformat (input, "insert"))
-	is_encap = 0;
-      else if (unformat (input, "spray"))
-	is_spray = 1;
-      else
-	break;
-    }
-
-  if (!is_add && !is_mod && !is_del)
-    return clib_error_return (0, "Incorrect CLI");
-
-  if (!policy_set)
-    return clib_error_return (0, "No SR policy BSID or index specified");
-
-  if (is_add)
-    {
-      if (vec_len (segments) == 0)
-	return clib_error_return (0, "No Segment List specified");
-      rv = sr_policy_add (&bsid, segments, weight,
-			  (is_spray ? SR_POLICY_TYPE_SPRAY :
-			   SR_POLICY_TYPE_DEFAULT), fib_table, is_encap);
-    }
-  else if (is_del)
-    rv = sr_policy_del ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
-			sr_policy_index);
-  else if (is_mod)
-    {
-      if (!operation)
-	return clib_error_return (0, "No SL modification specified");
-      if (operation != 1 && sl_index == (u32) ~ 0)
-	return clib_error_return (0, "No Segment List index specified");
-      if (operation == 1 && vec_len (segments) == 0)
-	return clib_error_return (0, "No Segment List specified");
-      if (operation == 3 && weight == (u32) ~ 0)
-	return clib_error_return (0, "No new weight for the SL specified");
-      rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
-			  sr_policy_index, fib_table, operation, segments,
-			  sl_index, weight);
-    }
-
-  switch (rv)
-    {
-    case 0:
-      break;
-    case 1:
-      return 0;
-    case -12:
-      return clib_error_return (0,
-				"There is already a FIB entry for the BindingSID address.\n"
-				"The SR policy could not be created.");
-    case -13:
-      return clib_error_return (0, "The specified FIB table does not exist.");
-    case -21:
-      return clib_error_return (0,
-				"The selected SR policy only contains ONE segment list. "
-				"Please remove the SR policy instead");
-    case -22:
-      return clib_error_return (0,
-				"Could not delete the segment list. "
-				"It is not associated with that SR policy.");
-    case -32:
-      return clib_error_return (0,
-				"Could not modify the segment list. "
-				"The given SL is not associated with such SR policy.");
-    default:
-      return clib_error_return (0, "BUG: sr policy returns %d", rv);
-    }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (sr_policy_command, static) = {
-  .path = "sr policy",
-  .short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] "
-    "next A:: next B:: next C:: (weight 1) (fib-table 2) (encap|insert)",
-  .long_help =
-    "Manipulation of SR policies.\n"
-    "A Segment Routing policy may contain several SID lists. Each SID list has\n"
-    "an associated weight (default 1), which will result in wECMP (uECMP).\n"
-    "Segment Routing policies might be of type encapsulation or srh insertion\n"
-    "Each SR policy will be associated with a unique BindingSID.\n"
-    "A BindingSID is a locally allocated SegmentID. For every packet that arrives\n"
-    "with IPv6_DA:BSID such traffic will be steered into the SR policy.\n"
-    "The add command will create a SR policy with its first segment list (sl)\n"
-    "The mod command allows you to add, remove, or modify the existing segment lists\n"
-    "within an SR policy.\n"
-    "The del command allows you to delete a SR policy along with all its associated\n"
-    "SID lists.\n",
-  .function = sr_policy_command_fn,
-};
-/* *INDENT-ON* */
-
-/**
- * @brief CLI to display onscreen all the SR policies
- */
-static clib_error_t *
-show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
-			     vlib_cli_command_t * cmd)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  u32 *sl_index;
-  ip6_sr_sl_t *segment_list = 0;
-  ip6_sr_policy_t *sr_policy = 0;
-  ip6_sr_policy_t **vec_policies = 0;
-  ip6_address_t *addr;
-  u8 *s;
-  int i = 0;
-
-  vlib_cli_output (vm, "SR policies:");
-
-  /* *INDENT-OFF* */
-  pool_foreach  (sr_policy, sm->sr_policies,
-                {vec_add1 (vec_policies, sr_policy); } );
-  /* *INDENT-ON* */
-
-  vec_foreach_index (i, vec_policies)
-  {
-    sr_policy = vec_policies[i];
-    vlib_cli_output (vm, "[%u].-\tBSID: %U",
-		     (u32) (sr_policy - sm->sr_policies),
-		     format_ip6_address, &sr_policy->bsid);
-    vlib_cli_output (vm, "\tBehavior: %s",
-		     (sr_policy->is_encap ? "Encapsulation" :
-		      "SRH insertion"));
-    vlib_cli_output (vm, "\tType: %s",
-		     (sr_policy->type ==
-		      SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
-    vlib_cli_output (vm, "\tFIB table: %u",
-		     (sr_policy->fib_table !=
-		      (u32) ~ 0 ? sr_policy->fib_table : 0));
-    vlib_cli_output (vm, "\tSegment Lists:");
-    vec_foreach (sl_index, sr_policy->segments_lists)
-    {
-      s = NULL;
-      s = format (s, "\t[%u].- ", *sl_index);
-      segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
-      s = format (s, "< ");
-      vec_foreach (addr, segment_list->segments)
-      {
-	s = format (s, "%U, ", format_ip6_address, addr);
-      }
-      s = format (s, "\b\b > ");
-      s = format (s, "weight: %u", segment_list->weight);
-      vlib_cli_output (vm, "  %s", s);
-    }
-    vlib_cli_output (vm, "-----------");
-  }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_sr_policies_command, static) = {
-  .path = "show sr policies",
-  .short_help = "show sr policies",
-  .function = show_sr_policies_command_fn,
-};
-/* *INDENT-ON* */
-
-/*************************** SR rewrite graph node ****************************/
-/**
- * @brief Trace for the SR Policy Rewrite graph node
- */
-static u8 *
-format_sr_policy_rewrite_trace (u8 * s, va_list * args)
-{
-  //TODO
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  sr_policy_rewrite_trace_t *t = va_arg (*args, sr_policy_rewrite_trace_t *);
-
-  s = format
-    (s, "SR-policy-rewrite: src %U dst %U",
-     format_ip6_address, &t->src, format_ip6_address, &t->dst);
-
-  return s;
-}
-
-/**
- * @brief IPv6 encapsulation processing as per RFC2473
- */
-static_always_inline void
-encaps_processing_v6 (vlib_node_runtime_t * node,
-		      vlib_buffer_t * b0,
-		      ip6_header_t * ip0, ip6_header_t * ip0_encap)
-{
-  u32 new_l0;
-
-  ip0_encap->hop_limit -= 1;
-  new_l0 =
-    ip0->payload_length + sizeof (ip6_header_t) +
-    clib_net_to_host_u16 (ip0_encap->payload_length);
-  ip0->payload_length = clib_host_to_net_u16 (new_l0);
-  ip0->ip_version_traffic_class_and_flow_label =
-    ip0_encap->ip_version_traffic_class_and_flow_label;
-}
-
-/**
- * @brief Graph node for applying a SR policy into an IPv6 packet. Encapsulation
- */
-static uword
-sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
-			  vlib_frame_t * from_frame)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  u32 n_left_from, next_index, *from, *to_next;
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-
-  int encap_pkts = 0, bsid_pkts = 0;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      /* Quad - Loop */
-      while (n_left_from >= 8 && n_left_to_next >= 4)
-	{
-	  u32 bi0, bi1, bi2, bi3;
-	  vlib_buffer_t *b0, *b1, *b2, *b3;
-	  u32 next0, next1, next2, next3;
-	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
-	  ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
-	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p4, *p5, *p6, *p7;
-
-	    p4 = vlib_get_buffer (vm, from[4]);
-	    p5 = vlib_get_buffer (vm, from[5]);
-	    p6 = vlib_get_buffer (vm, from[6]);
-	    p7 = vlib_get_buffer (vm, from[7]);
-
-	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
-	    vlib_prefetch_buffer_header (p4, LOAD);
-	    vlib_prefetch_buffer_header (p5, LOAD);
-	    vlib_prefetch_buffer_header (p6, LOAD);
-	    vlib_prefetch_buffer_header (p7, LOAD);
-
-	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  to_next[2] = bi2 = from[2];
-	  to_next[3] = bi3 = from[3];
-	  from += 4;
-	  to_next += 4;
-	  n_left_from -= 4;
-	  n_left_to_next -= 4;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-	  b2 = vlib_get_buffer (vm, bi2);
-	  b3 = vlib_get_buffer (vm, bi3);
-
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  sl1 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
-	  sl2 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
-	  sl3 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
-
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl1->rewrite));
-	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl2->rewrite));
-	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl3->rewrite));
-
-	  ip0_encap = vlib_buffer_get_current (b0);
-	  ip1_encap = vlib_buffer_get_current (b1);
-	  ip2_encap = vlib_buffer_get_current (b2);
-	  ip3_encap = vlib_buffer_get_current (b3);
-
-	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
-		       sl0->rewrite, vec_len (sl0->rewrite));
-	  clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
-		       sl1->rewrite, vec_len (sl1->rewrite));
-	  clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
-		       sl2->rewrite, vec_len (sl2->rewrite));
-	  clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
-		       sl3->rewrite, vec_len (sl3->rewrite));
-
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
-	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
-	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-	  ip1 = vlib_buffer_get_current (b1);
-	  ip2 = vlib_buffer_get_current (b2);
-	  ip3 = vlib_buffer_get_current (b3);
-
-	  encaps_processing_v6 (node, b0, ip0, ip0_encap);
-	  encaps_processing_v6 (node, b1, ip1, ip1_encap);
-	  encaps_processing_v6 (node, b2, ip2, ip2_encap);
-	  encaps_processing_v6 (node, b3, ip3, ip3_encap);
-
-	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
-	    {
-	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b0, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b1, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b2, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b3, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-	    }
-
-	  encap_pkts += 4;
-	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, bi1, bi2, bi3,
-					   next0, next1, next2, next3);
-	}
-
-      /* Single loop for potentially the last three packets */
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  ip6_header_t *ip0 = 0, *ip0_encap = 0;
-	  ip6_sr_sl_t *sl0;
-	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-	  b0 = vlib_get_buffer (vm, bi0);
-
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-
-	  ip0_encap = vlib_buffer_get_current (b0);
-
-	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
-		       sl0->rewrite, vec_len (sl0->rewrite));
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-
-	  encaps_processing_v6 (node, b0, ip0, ip0_encap);
-
-	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
-	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      sr_policy_rewrite_trace_t *tr =
-		vlib_add_trace (vm, node, b0, sizeof (*tr));
-	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			   sizeof (tr->src.as_u8));
-	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			   sizeof (tr->dst.as_u8));
-	    }
-
-	  encap_pkts++;
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  /* Update counters */
-  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
-			       encap_pkts);
-  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
-			       bsid_pkts);
-
-  return from_frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
-  .function = sr_policy_rewrite_encaps,
-  .name = "sr-pl-rewrite-encaps",
-  .vector_size = sizeof (u32),
-  .format_trace = format_sr_policy_rewrite_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = SR_POLICY_REWRITE_N_ERROR,
-  .error_strings = sr_policy_rewrite_error_strings,
-  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
-    foreach_sr_policy_rewrite_next
-#undef _
-  },
-};
-/* *INDENT-ON* */
-
-/**
- * @brief IPv4 encapsulation processing as per RFC2473
- */
-static_always_inline void
-encaps_processing_v4 (vlib_node_runtime_t * node,
-		      vlib_buffer_t * b0,
-		      ip6_header_t * ip0, ip4_header_t * ip0_encap)
-{
-  u32 new_l0;
-  ip6_sr_header_t *sr0;
-
-  u32 checksum0;
-
-  /* Inner IPv4: Decrement TTL & update checksum */
-  ip0_encap->ttl -= 1;
-  checksum0 = ip0_encap->checksum + clib_host_to_net_u16 (0x0100);
-  checksum0 += checksum0 >= 0xffff;
-  ip0_encap->checksum = checksum0;
-
-  /* Outer IPv6: Update length, FL, proto */
-  new_l0 = ip0->payload_length + clib_net_to_host_u16 (ip0_encap->length);
-  ip0->payload_length = clib_host_to_net_u16 (new_l0);
-  ip0->ip_version_traffic_class_and_flow_label =
-    clib_host_to_net_u32 (0 | ((6 & 0xF) << 28) |
-			  ((ip0_encap->tos & 0xFF) << 20));
-  sr0 = (void *) (ip0 + 1);
-  sr0->protocol = IP_PROTOCOL_IP_IN_IP;
-}
-
-/**
- * @brief Graph node for applying a SR policy into an IPv4 packet. Encapsulation
- */
-static uword
-sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node,
-			     vlib_frame_t * from_frame)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  u32 n_left_from, next_index, *from, *to_next;
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-
-  int encap_pkts = 0, bsid_pkts = 0;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      /* Quad - Loop */
-      while (n_left_from >= 8 && n_left_to_next >= 4)
-	{
-	  u32 bi0, bi1, bi2, bi3;
-	  vlib_buffer_t *b0, *b1, *b2, *b3;
-	  u32 next0, next1, next2, next3;
-	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
-	  ip4_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
-	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p4, *p5, *p6, *p7;
-
-	    p4 = vlib_get_buffer (vm, from[4]);
-	    p5 = vlib_get_buffer (vm, from[5]);
-	    p6 = vlib_get_buffer (vm, from[6]);
-	    p7 = vlib_get_buffer (vm, from[7]);
-
-	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
-	    vlib_prefetch_buffer_header (p4, LOAD);
-	    vlib_prefetch_buffer_header (p5, LOAD);
-	    vlib_prefetch_buffer_header (p6, LOAD);
-	    vlib_prefetch_buffer_header (p7, LOAD);
-
-	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  to_next[2] = bi2 = from[2];
-	  to_next[3] = bi3 = from[3];
-	  from += 4;
-	  to_next += 4;
-	  n_left_from -= 4;
-	  n_left_to_next -= 4;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-	  b2 = vlib_get_buffer (vm, bi2);
-	  b3 = vlib_get_buffer (vm, bi3);
-
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  sl1 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
-	  sl2 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
-	  sl3 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl1->rewrite));
-	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl2->rewrite));
-	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl3->rewrite));
-
-	  ip0_encap = vlib_buffer_get_current (b0);
-	  ip1_encap = vlib_buffer_get_current (b1);
-	  ip2_encap = vlib_buffer_get_current (b2);
-	  ip3_encap = vlib_buffer_get_current (b3);
-
-	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
-		       sl0->rewrite, vec_len (sl0->rewrite));
-	  clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
-		       sl1->rewrite, vec_len (sl1->rewrite));
-	  clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
-		       sl2->rewrite, vec_len (sl2->rewrite));
-	  clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
-		       sl3->rewrite, vec_len (sl3->rewrite));
-
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
-	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
-	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-	  ip1 = vlib_buffer_get_current (b1);
-	  ip2 = vlib_buffer_get_current (b2);
-	  ip3 = vlib_buffer_get_current (b3);
-
-	  encaps_processing_v4 (node, b0, ip0, ip0_encap);
-	  encaps_processing_v4 (node, b1, ip1, ip1_encap);
-	  encaps_processing_v4 (node, b2, ip2, ip2_encap);
-	  encaps_processing_v4 (node, b3, ip3, ip3_encap);
-
-	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
-	    {
-	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b0, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b1, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b2, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b3, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-	    }
-
-	  encap_pkts += 4;
-	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, bi1, bi2, bi3,
-					   next0, next1, next2, next3);
-	}
-
-      /* Single loop for potentially the last three packets */
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  ip6_header_t *ip0 = 0;
-	  ip4_header_t *ip0_encap = 0;
-	  ip6_sr_sl_t *sl0;
-	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-	  b0 = vlib_get_buffer (vm, bi0);
-
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-
-	  ip0_encap = vlib_buffer_get_current (b0);
-
-	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
-		       sl0->rewrite, vec_len (sl0->rewrite));
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-
-	  encaps_processing_v4 (node, b0, ip0, ip0_encap);
-
-	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
-	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      sr_policy_rewrite_trace_t *tr =
-		vlib_add_trace (vm, node, b0, sizeof (*tr));
-	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			   sizeof (tr->src.as_u8));
-	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			   sizeof (tr->dst.as_u8));
-	    }
-
-	  encap_pkts++;
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  /* Update counters */
-  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
-			       encap_pkts);
-  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
-			       bsid_pkts);
-
-  return from_frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
-  .function = sr_policy_rewrite_encaps_v4,
-  .name = "sr-pl-rewrite-encaps-v4",
-  .vector_size = sizeof (u32),
-  .format_trace = format_sr_policy_rewrite_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = SR_POLICY_REWRITE_N_ERROR,
-  .error_strings = sr_policy_rewrite_error_strings,
-  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
-    foreach_sr_policy_rewrite_next
-#undef _
-  },
-};
-/* *INDENT-ON* */
-
-always_inline u32
-ip_flow_hash (void *data)
-{
-  ip4_header_t *iph = (ip4_header_t *) data;
-
-  if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
-    return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
-  else
-    return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
-}
-
-always_inline u64
-mac_to_u64 (u8 * m)
-{
-  return (*((u64 *) m) & 0xffffffffffff);
-}
-
-always_inline u32
-l2_flow_hash (vlib_buffer_t * b0)
-{
-  ethernet_header_t *eh;
-  u64 a, b, c;
-  uword is_ip, eh_size;
-  u16 eh_type;
-
-  eh = vlib_buffer_get_current (b0);
-  eh_type = clib_net_to_host_u16 (eh->type);
-  eh_size = ethernet_buffer_header_size (b0);
-
-  is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
-
-  /* since we have 2 cache lines, use them */
-  if (is_ip)
-    a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
-  else
-    a = eh->type;
-
-  b = mac_to_u64 ((u8 *) eh->dst_address);
-  c = mac_to_u64 ((u8 *) eh->src_address);
-  hash_mix64 (a, b, c);
-
-  return (u32) c;
-}
-
-/**
- * @brief Graph node for applying a SR policy into a L2 frame
- */
-static uword
-sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node,
-			     vlib_frame_t * from_frame)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  u32 n_left_from, next_index, *from, *to_next;
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-
-  int encap_pkts = 0, bsid_pkts = 0;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      /* Quad - Loop */
-      while (n_left_from >= 8 && n_left_to_next >= 4)
-	{
-	  u32 bi0, bi1, bi2, bi3;
-	  vlib_buffer_t *b0, *b1, *b2, *b3;
-	  u32 next0, next1, next2, next3;
-	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-	  ethernet_header_t *en0, *en1, *en2, *en3;
-	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
-	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
-	  ip6_sr_policy_t *sp0, *sp1, *sp2, *sp3;
-	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p4, *p5, *p6, *p7;
-
-	    p4 = vlib_get_buffer (vm, from[4]);
-	    p5 = vlib_get_buffer (vm, from[5]);
-	    p6 = vlib_get_buffer (vm, from[6]);
-	    p7 = vlib_get_buffer (vm, from[7]);
-
-	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
-	    vlib_prefetch_buffer_header (p4, LOAD);
-	    vlib_prefetch_buffer_header (p5, LOAD);
-	    vlib_prefetch_buffer_header (p6, LOAD);
-	    vlib_prefetch_buffer_header (p7, LOAD);
-
-	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  to_next[2] = bi2 = from[2];
-	  to_next[3] = bi3 = from[3];
-	  from += 4;
-	  to_next += 4;
-	  n_left_from -= 4;
-	  n_left_to_next -= 4;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-	  b2 = vlib_get_buffer (vm, bi2);
-	  b3 = vlib_get_buffer (vm, bi3);
-
-	  sp0 = pool_elt_at_index (sm->sr_policies,
-				   sm->sw_iface_sr_policies[vnet_buffer
-							    (b0)->sw_if_index
-							    [VLIB_RX]]);
-
-	  sp1 = pool_elt_at_index (sm->sr_policies,
-				   sm->sw_iface_sr_policies[vnet_buffer
-							    (b1)->sw_if_index
-							    [VLIB_RX]]);
-
-	  sp2 = pool_elt_at_index (sm->sr_policies,
-				   sm->sw_iface_sr_policies[vnet_buffer
-							    (b2)->sw_if_index
-							    [VLIB_RX]]);
-
-	  sp3 = pool_elt_at_index (sm->sr_policies,
-				   sm->sw_iface_sr_policies[vnet_buffer
-							    (b3)->sw_if_index
-							    [VLIB_RX]]);
-
-	  if (vec_len (sp0->segments_lists) == 1)
-	    vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0];
-	  else
-	    {
-	      vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
-	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
-		sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash &
-				     (vec_len (sp0->segments_lists) - 1))];
-	    }
-
-	  if (vec_len (sp1->segments_lists) == 1)
-	    vnet_buffer (b1)->ip.adj_index[VLIB_TX] = sp1->segments_lists[1];
-	  else
-	    {
-	      vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1);
-	      vnet_buffer (b1)->ip.adj_index[VLIB_TX] =
-		sp1->segments_lists[(vnet_buffer (b1)->ip.flow_hash &
-				     (vec_len (sp1->segments_lists) - 1))];
-	    }
-
-	  if (vec_len (sp2->segments_lists) == 1)
-	    vnet_buffer (b2)->ip.adj_index[VLIB_TX] = sp2->segments_lists[2];
-	  else
-	    {
-	      vnet_buffer (b2)->ip.flow_hash = l2_flow_hash (b2);
-	      vnet_buffer (b2)->ip.adj_index[VLIB_TX] =
-		sp2->segments_lists[(vnet_buffer (b2)->ip.flow_hash &
-				     (vec_len (sp2->segments_lists) - 1))];
-	    }
-
-	  if (vec_len (sp3->segments_lists) == 1)
-	    vnet_buffer (b3)->ip.adj_index[VLIB_TX] = sp3->segments_lists[3];
-	  else
-	    {
-	      vnet_buffer (b3)->ip.flow_hash = l2_flow_hash (b3);
-	      vnet_buffer (b3)->ip.adj_index[VLIB_TX] =
-		sp3->segments_lists[(vnet_buffer (b3)->ip.flow_hash &
-				     (vec_len (sp3->segments_lists) - 1))];
-	    }
-
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  sl1 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
-	  sl2 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
-	  sl3 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
-
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl1->rewrite));
-	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl2->rewrite));
-	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl3->rewrite));
-
-	  en0 = vlib_buffer_get_current (b0);
-	  en1 = vlib_buffer_get_current (b1);
-	  en2 = vlib_buffer_get_current (b2);
-	  en3 = vlib_buffer_get_current (b3);
-
-	  clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite,
-		       vec_len (sl0->rewrite));
-	  clib_memcpy (((u8 *) en1) - vec_len (sl1->rewrite), sl1->rewrite,
-		       vec_len (sl1->rewrite));
-	  clib_memcpy (((u8 *) en2) - vec_len (sl2->rewrite), sl2->rewrite,
-		       vec_len (sl2->rewrite));
-	  clib_memcpy (((u8 *) en3) - vec_len (sl3->rewrite), sl3->rewrite,
-		       vec_len (sl3->rewrite));
-
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
-	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
-	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-	  ip1 = vlib_buffer_get_current (b1);
-	  ip2 = vlib_buffer_get_current (b2);
-	  ip3 = vlib_buffer_get_current (b3);
-
-	  ip0->payload_length =
-	    clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
-	  ip1->payload_length =
-	    clib_host_to_net_u16 (b1->current_length - sizeof (ip6_header_t));
-	  ip2->payload_length =
-	    clib_host_to_net_u16 (b2->current_length - sizeof (ip6_header_t));
-	  ip3->payload_length =
-	    clib_host_to_net_u16 (b3->current_length - sizeof (ip6_header_t));
-
-	  sr0 = (void *) (ip0 + 1);
-	  sr1 = (void *) (ip1 + 1);
-	  sr2 = (void *) (ip2 + 1);
-	  sr3 = (void *) (ip3 + 1);
-
-	  sr0->protocol = sr1->protocol = sr2->protocol = sr3->protocol =
-	    IP_PROTOCOL_IP6_NONXT;
-
-	  /* Which Traffic class and flow label do I set ? */
-	  //ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(0|((6&0xF)<<28)|((ip0_encap->tos&0xFF)<<20));
-
-	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
-	    {
-	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b0, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b1, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b2, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b3, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-	    }
-
-	  encap_pkts += 4;
-	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, bi1, bi2, bi3,
-					   next0, next1, next2, next3);
-	}
-
-      /* Single loop for potentially the last three packets */
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  ip6_header_t *ip0 = 0;
-	  ip6_sr_header_t *sr0;
-	  ethernet_header_t *en0;
-	  ip6_sr_policy_t *sp0;
-	  ip6_sr_sl_t *sl0;
-	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-	  b0 = vlib_get_buffer (vm, bi0);
-
-	  /* Find the SR policy */
-	  sp0 = pool_elt_at_index (sm->sr_policies,
-				   sm->sw_iface_sr_policies[vnet_buffer
-							    (b0)->sw_if_index
-							    [VLIB_RX]]);
-
-	  /* In case there is more than one SL, LB among them */
-	  if (vec_len (sp0->segments_lists) == 1)
-	    vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0];
-	  else
-	    {
-	      vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
-	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
-		sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash &
-				     (vec_len (sp0->segments_lists) - 1))];
-	    }
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-
-	  en0 = vlib_buffer_get_current (b0);
-
-	  clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite,
-		       vec_len (sl0->rewrite));
-
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-
-	  ip0->payload_length =
-	    clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
-
-	  sr0 = (void *) (ip0 + 1);
-	  sr0->protocol = IP_PROTOCOL_IP6_NONXT;
-
-	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
-	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      sr_policy_rewrite_trace_t *tr =
-		vlib_add_trace (vm, node, b0, sizeof (*tr));
-	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			   sizeof (tr->src.as_u8));
-	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			   sizeof (tr->dst.as_u8));
-	    }
-
-	  encap_pkts++;
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  /* Update counters */
-  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
-			       encap_pkts);
-  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
-			       bsid_pkts);
-
-  return from_frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
-  .function = sr_policy_rewrite_encaps_l2,
-  .name = "sr-pl-rewrite-encaps-l2",
-  .vector_size = sizeof (u32),
-  .format_trace = format_sr_policy_rewrite_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = SR_POLICY_REWRITE_N_ERROR,
-  .error_strings = sr_policy_rewrite_error_strings,
-  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
-    foreach_sr_policy_rewrite_next
-#undef _
-  },
-};
-/* *INDENT-ON* */
-
-/**
- * @brief Graph node for applying a SR policy into a packet. SRH insertion.
- */
-static uword
-sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
-			  vlib_frame_t * from_frame)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  u32 n_left_from, next_index, *from, *to_next;
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-
-  int insert_pkts = 0, bsid_pkts = 0;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      /* Quad - Loop */
-      while (n_left_from >= 8 && n_left_to_next >= 4)
-	{
-	  u32 bi0, bi1, bi2, bi3;
-	  vlib_buffer_t *b0, *b1, *b2, *b3;
-	  u32 next0, next1, next2, next3;
-	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
-	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
-	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
-	  u16 new_l0, new_l1, new_l2, new_l3;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p4, *p5, *p6, *p7;
-
-	    p4 = vlib_get_buffer (vm, from[4]);
-	    p5 = vlib_get_buffer (vm, from[5]);
-	    p6 = vlib_get_buffer (vm, from[6]);
-	    p7 = vlib_get_buffer (vm, from[7]);
-
-	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
-	    vlib_prefetch_buffer_header (p4, LOAD);
-	    vlib_prefetch_buffer_header (p5, LOAD);
-	    vlib_prefetch_buffer_header (p6, LOAD);
-	    vlib_prefetch_buffer_header (p7, LOAD);
-
-	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  to_next[2] = bi2 = from[2];
-	  to_next[3] = bi3 = from[3];
-	  from += 4;
-	  to_next += 4;
-	  n_left_from -= 4;
-	  n_left_to_next -= 4;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-	  b2 = vlib_get_buffer (vm, bi2);
-	  b3 = vlib_get_buffer (vm, bi3);
-
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  sl1 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
-	  sl2 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
-	  sl3 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl1->rewrite));
-	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl2->rewrite));
-	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl3->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-	  ip1 = vlib_buffer_get_current (b1);
-	  ip2 = vlib_buffer_get_current (b2);
-	  ip3 = vlib_buffer_get_current (b3);
-
-	  if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr0 =
-	      (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
-				   ip6_ext_header_len (ip0 + 1));
-	  else
-	    sr0 = (ip6_sr_header_t *) (ip0 + 1);
-
-	  if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr1 =
-	      (ip6_sr_header_t *) (((void *) (ip1 + 1)) +
-				   ip6_ext_header_len (ip1 + 1));
-	  else
-	    sr1 = (ip6_sr_header_t *) (ip1 + 1);
-
-	  if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr2 =
-	      (ip6_sr_header_t *) (((void *) (ip2 + 1)) +
-				   ip6_ext_header_len (ip2 + 1));
-	  else
-	    sr2 = (ip6_sr_header_t *) (ip2 + 1);
-
-	  if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr3 =
-	      (ip6_sr_header_t *) (((void *) (ip3 + 1)) +
-				   ip6_ext_header_len (ip3 + 1));
-	  else
-	    sr3 = (ip6_sr_header_t *) (ip3 + 1);
-
-	  clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0,
-		       (void *) sr0 - (void *) ip0);
-	  clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite), (u8 *) ip1,
-		       (void *) sr1 - (void *) ip1);
-	  clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite), (u8 *) ip2,
-		       (void *) sr2 - (void *) ip2);
-	  clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite), (u8 *) ip3,
-		       (void *) sr3 - (void *) ip3);
-
-	  clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite,
-		       vec_len (sl0->rewrite));
-	  clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite)), sl1->rewrite,
-		       vec_len (sl1->rewrite));
-	  clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite)), sl2->rewrite,
-		       vec_len (sl2->rewrite));
-	  clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite)), sl3->rewrite,
-		       vec_len (sl3->rewrite));
-
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
-	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
-	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
-
-	  ip0 = ((void *) ip0) - vec_len (sl0->rewrite);
-	  ip1 = ((void *) ip1) - vec_len (sl1->rewrite);
-	  ip2 = ((void *) ip2) - vec_len (sl2->rewrite);
-	  ip3 = ((void *) ip3) - vec_len (sl3->rewrite);
-
-	  ip0->hop_limit -= 1;
-	  ip1->hop_limit -= 1;
-	  ip2->hop_limit -= 1;
-	  ip3->hop_limit -= 1;
-
-	  new_l0 =
-	    clib_net_to_host_u16 (ip0->payload_length) +
-	    vec_len (sl0->rewrite);
-	  new_l1 =
-	    clib_net_to_host_u16 (ip1->payload_length) +
-	    vec_len (sl1->rewrite);
-	  new_l2 =
-	    clib_net_to_host_u16 (ip2->payload_length) +
-	    vec_len (sl2->rewrite);
-	  new_l3 =
-	    clib_net_to_host_u16 (ip3->payload_length) +
-	    vec_len (sl3->rewrite);
-
-	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
-	  ip1->payload_length = clib_host_to_net_u16 (new_l1);
-	  ip2->payload_length = clib_host_to_net_u16 (new_l2);
-	  ip3->payload_length = clib_host_to_net_u16 (new_l3);
-
-	  sr0 = ((void *) sr0) - vec_len (sl0->rewrite);
-	  sr1 = ((void *) sr1) - vec_len (sl1->rewrite);
-	  sr2 = ((void *) sr2) - vec_len (sl2->rewrite);
-	  sr3 = ((void *) sr3) - vec_len (sl3->rewrite);
-
-	  sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0];
-	  sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1];
-	  sr1->segments->as_u64[0] = ip1->dst_address.as_u64[0];
-	  sr1->segments->as_u64[1] = ip1->dst_address.as_u64[1];
-	  sr2->segments->as_u64[0] = ip2->dst_address.as_u64[0];
-	  sr2->segments->as_u64[1] = ip2->dst_address.as_u64[1];
-	  sr3->segments->as_u64[0] = ip3->dst_address.as_u64[0];
-	  sr3->segments->as_u64[1] = ip3->dst_address.as_u64[1];
-
-	  ip0->dst_address.as_u64[0] =
-	    (sr0->segments + sr0->segments_left)->as_u64[0];
-	  ip0->dst_address.as_u64[1] =
-	    (sr0->segments + sr0->segments_left)->as_u64[1];
-	  ip1->dst_address.as_u64[0] =
-	    (sr1->segments + sr1->segments_left)->as_u64[0];
-	  ip1->dst_address.as_u64[1] =
-	    (sr1->segments + sr1->segments_left)->as_u64[1];
-	  ip2->dst_address.as_u64[0] =
-	    (sr2->segments + sr2->segments_left)->as_u64[0];
-	  ip2->dst_address.as_u64[1] =
-	    (sr2->segments + sr2->segments_left)->as_u64[1];
-	  ip3->dst_address.as_u64[0] =
-	    (sr3->segments + sr3->segments_left)->as_u64[0];
-	  ip3->dst_address.as_u64[1] =
-	    (sr3->segments + sr3->segments_left)->as_u64[1];
-
-	  ip6_ext_header_t *ip_ext;
-	  if (ip0 + 1 == (void *) sr0)
-	    {
-	      sr0->protocol = ip0->protocol;
-	      ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip_ext = (void *) (ip0 + 1);
-	      sr0->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  if (ip1 + 1 == (void *) sr1)
-	    {
-	      sr1->protocol = ip1->protocol;
-	      ip1->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip_ext = (void *) (ip2 + 1);
-	      sr2->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  if (ip2 + 1 == (void *) sr2)
-	    {
-	      sr2->protocol = ip2->protocol;
-	      ip2->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip_ext = (void *) (ip2 + 1);
-	      sr2->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  if (ip3 + 1 == (void *) sr3)
-	    {
-	      sr3->protocol = ip3->protocol;
-	      ip3->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip_ext = (void *) (ip3 + 1);
-	      sr3->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  insert_pkts += 4;
-
-	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
-	    {
-	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b0, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b1, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b2, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b3, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-	    }
-
-	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, bi1, bi2, bi3,
-					   next0, next1, next2, next3);
-	}
-
-      /* Single loop for potentially the last three packets */
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  ip6_header_t *ip0 = 0;
-	  ip6_sr_header_t *sr0 = 0;
-	  ip6_sr_sl_t *sl0;
-	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-	  u16 new_l0 = 0;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-
-	  if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr0 =
-	      (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
-				   ip6_ext_header_len (ip0 + 1));
-	  else
-	    sr0 = (ip6_sr_header_t *) (ip0 + 1);
-
-	  clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0,
-		       (void *) sr0 - (void *) ip0);
-	  clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite,
-		       vec_len (sl0->rewrite));
-
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-
-	  ip0 = ((void *) ip0) - vec_len (sl0->rewrite);
-	  ip0->hop_limit -= 1;
-	  new_l0 =
-	    clib_net_to_host_u16 (ip0->payload_length) +
-	    vec_len (sl0->rewrite);
-	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
-
-	  sr0 = ((void *) sr0) - vec_len (sl0->rewrite);
-	  sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0];
-	  sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1];
-
-	  ip0->dst_address.as_u64[0] =
-	    (sr0->segments + sr0->segments_left)->as_u64[0];
-	  ip0->dst_address.as_u64[1] =
-	    (sr0->segments + sr0->segments_left)->as_u64[1];
-
-	  if (ip0 + 1 == (void *) sr0)
-	    {
-	      sr0->protocol = ip0->protocol;
-	      ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip6_ext_header_t *ip_ext = (void *) (ip0 + 1);
-	      sr0->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
-	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      sr_policy_rewrite_trace_t *tr =
-		vlib_add_trace (vm, node, b0, sizeof (*tr));
-	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			   sizeof (tr->src.as_u8));
-	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			   sizeof (tr->dst.as_u8));
-	    }
-
-	  insert_pkts++;
-
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  /* Update counters */
-  vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
-			       insert_pkts);
-  vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
-			       bsid_pkts);
-  return from_frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
-  .function = sr_policy_rewrite_insert,
-  .name = "sr-pl-rewrite-insert",
-  .vector_size = sizeof (u32),
-  .format_trace = format_sr_policy_rewrite_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = SR_POLICY_REWRITE_N_ERROR,
-  .error_strings = sr_policy_rewrite_error_strings,
-  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
-    foreach_sr_policy_rewrite_next
-#undef _
-  },
-};
-/* *INDENT-ON* */
-
-/**
- * @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion.
- */
-static uword
-sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
-			    vlib_frame_t * from_frame)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  u32 n_left_from, next_index, *from, *to_next;
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-
-  int insert_pkts = 0, bsid_pkts = 0;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      /* Quad - Loop */
-      while (n_left_from >= 8 && n_left_to_next >= 4)
-	{
-	  u32 bi0, bi1, bi2, bi3;
-	  vlib_buffer_t *b0, *b1, *b2, *b3;
-	  u32 next0, next1, next2, next3;
-	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
-	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
-	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
-	  u16 new_l0, new_l1, new_l2, new_l3;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p4, *p5, *p6, *p7;
-
-	    p4 = vlib_get_buffer (vm, from[4]);
-	    p5 = vlib_get_buffer (vm, from[5]);
-	    p6 = vlib_get_buffer (vm, from[6]);
-	    p7 = vlib_get_buffer (vm, from[7]);
-
-	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
-	    vlib_prefetch_buffer_header (p4, LOAD);
-	    vlib_prefetch_buffer_header (p5, LOAD);
-	    vlib_prefetch_buffer_header (p6, LOAD);
-	    vlib_prefetch_buffer_header (p7, LOAD);
-
-	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  to_next[2] = bi2 = from[2];
-	  to_next[3] = bi3 = from[3];
-	  from += 4;
-	  to_next += 4;
-	  n_left_from -= 4;
-	  n_left_to_next -= 4;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-	  b2 = vlib_get_buffer (vm, bi2);
-	  b3 = vlib_get_buffer (vm, bi3);
-
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  sl1 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
-	  sl2 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
-	  sl3 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite_bsid));
-	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl1->rewrite_bsid));
-	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl2->rewrite_bsid));
-	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl3->rewrite_bsid));
-
-	  ip0 = vlib_buffer_get_current (b0);
-	  ip1 = vlib_buffer_get_current (b1);
-	  ip2 = vlib_buffer_get_current (b2);
-	  ip3 = vlib_buffer_get_current (b3);
-
-	  if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr0 =
-	      (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
-				   ip6_ext_header_len (ip0 + 1));
-	  else
-	    sr0 = (ip6_sr_header_t *) (ip0 + 1);
-
-	  if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr1 =
-	      (ip6_sr_header_t *) (((void *) (ip1 + 1)) +
-				   ip6_ext_header_len (ip1 + 1));
-	  else
-	    sr1 = (ip6_sr_header_t *) (ip1 + 1);
-
-	  if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr2 =
-	      (ip6_sr_header_t *) (((void *) (ip2 + 1)) +
-				   ip6_ext_header_len (ip2 + 1));
-	  else
-	    sr2 = (ip6_sr_header_t *) (ip2 + 1);
-
-	  if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr3 =
-	      (ip6_sr_header_t *) (((void *) (ip3 + 1)) +
-				   ip6_ext_header_len (ip3 + 1));
-	  else
-	    sr3 = (ip6_sr_header_t *) (ip3 + 1);
-
-	  clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0,
-		       (void *) sr0 - (void *) ip0);
-	  clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite_bsid), (u8 *) ip1,
-		       (void *) sr1 - (void *) ip1);
-	  clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite_bsid), (u8 *) ip2,
-		       (void *) sr2 - (void *) ip2);
-	  clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite_bsid), (u8 *) ip3,
-		       (void *) sr3 - (void *) ip3);
-
-	  clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)),
-		       sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid));
-	  clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite_bsid)),
-		       sl1->rewrite_bsid, vec_len (sl1->rewrite_bsid));
-	  clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite_bsid)),
-		       sl2->rewrite_bsid, vec_len (sl2->rewrite_bsid));
-	  clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite_bsid)),
-		       sl3->rewrite_bsid, vec_len (sl3->rewrite_bsid));
-
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid));
-	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite_bsid));
-	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite_bsid));
-	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite_bsid));
-
-	  ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid);
-	  ip1 = ((void *) ip1) - vec_len (sl1->rewrite_bsid);
-	  ip2 = ((void *) ip2) - vec_len (sl2->rewrite_bsid);
-	  ip3 = ((void *) ip3) - vec_len (sl3->rewrite_bsid);
-
-	  ip0->hop_limit -= 1;
-	  ip1->hop_limit -= 1;
-	  ip2->hop_limit -= 1;
-	  ip3->hop_limit -= 1;
-
-	  new_l0 =
-	    clib_net_to_host_u16 (ip0->payload_length) +
-	    vec_len (sl0->rewrite_bsid);
-	  new_l1 =
-	    clib_net_to_host_u16 (ip1->payload_length) +
-	    vec_len (sl1->rewrite_bsid);
-	  new_l2 =
-	    clib_net_to_host_u16 (ip2->payload_length) +
-	    vec_len (sl2->rewrite_bsid);
-	  new_l3 =
-	    clib_net_to_host_u16 (ip3->payload_length) +
-	    vec_len (sl3->rewrite_bsid);
-
-	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
-	  ip1->payload_length = clib_host_to_net_u16 (new_l1);
-	  ip2->payload_length = clib_host_to_net_u16 (new_l2);
-	  ip3->payload_length = clib_host_to_net_u16 (new_l3);
-
-	  sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid);
-	  sr1 = ((void *) sr1) - vec_len (sl1->rewrite_bsid);
-	  sr2 = ((void *) sr2) - vec_len (sl2->rewrite_bsid);
-	  sr3 = ((void *) sr3) - vec_len (sl3->rewrite_bsid);
-
-	  ip0->dst_address.as_u64[0] =
-	    (sr0->segments + sr0->segments_left)->as_u64[0];
-	  ip0->dst_address.as_u64[1] =
-	    (sr0->segments + sr0->segments_left)->as_u64[1];
-	  ip1->dst_address.as_u64[0] =
-	    (sr1->segments + sr1->segments_left)->as_u64[0];
-	  ip1->dst_address.as_u64[1] =
-	    (sr1->segments + sr1->segments_left)->as_u64[1];
-	  ip2->dst_address.as_u64[0] =
-	    (sr2->segments + sr2->segments_left)->as_u64[0];
-	  ip2->dst_address.as_u64[1] =
-	    (sr2->segments + sr2->segments_left)->as_u64[1];
-	  ip3->dst_address.as_u64[0] =
-	    (sr3->segments + sr3->segments_left)->as_u64[0];
-	  ip3->dst_address.as_u64[1] =
-	    (sr3->segments + sr3->segments_left)->as_u64[1];
-
-	  ip6_ext_header_t *ip_ext;
-	  if (ip0 + 1 == (void *) sr0)
-	    {
-	      sr0->protocol = ip0->protocol;
-	      ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip_ext = (void *) (ip0 + 1);
-	      sr0->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  if (ip1 + 1 == (void *) sr1)
-	    {
-	      sr1->protocol = ip1->protocol;
-	      ip1->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip_ext = (void *) (ip2 + 1);
-	      sr2->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  if (ip2 + 1 == (void *) sr2)
-	    {
-	      sr2->protocol = ip2->protocol;
-	      ip2->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip_ext = (void *) (ip2 + 1);
-	      sr2->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  if (ip3 + 1 == (void *) sr3)
-	    {
-	      sr3->protocol = ip3->protocol;
-	      ip3->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip_ext = (void *) (ip3 + 1);
-	      sr3->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  insert_pkts += 4;
-
-	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
-	    {
-	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b0, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b1, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b2, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b3, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-	    }
-
-	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, bi1, bi2, bi3,
-					   next0, next1, next2, next3);
-	}
-
-      /* Single loop for potentially the last three packets */
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  ip6_header_t *ip0 = 0;
-	  ip6_sr_header_t *sr0 = 0;
-	  ip6_sr_sl_t *sl0;
-	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-	  u16 new_l0 = 0;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite_bsid));
-
-	  ip0 = vlib_buffer_get_current (b0);
-
-	  if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-	    sr0 =
-	      (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
-				   ip6_ext_header_len (ip0 + 1));
-	  else
-	    sr0 = (ip6_sr_header_t *) (ip0 + 1);
-
-	  clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0,
-		       (void *) sr0 - (void *) ip0);
-	  clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)),
-		       sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid));
-
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid));
-
-	  ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid);
-	  ip0->hop_limit -= 1;
-	  new_l0 =
-	    clib_net_to_host_u16 (ip0->payload_length) +
-	    vec_len (sl0->rewrite_bsid);
-	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
-
-	  sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid);
-
-	  ip0->dst_address.as_u64[0] =
-	    (sr0->segments + sr0->segments_left)->as_u64[0];
-	  ip0->dst_address.as_u64[1] =
-	    (sr0->segments + sr0->segments_left)->as_u64[1];
-
-	  if (ip0 + 1 == (void *) sr0)
-	    {
-	      sr0->protocol = ip0->protocol;
-	      ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-	  else
-	    {
-	      ip6_ext_header_t *ip_ext = (void *) (ip0 + 1);
-	      sr0->protocol = ip_ext->next_hdr;
-	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
-	    }
-
-	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
-	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      sr_policy_rewrite_trace_t *tr =
-		vlib_add_trace (vm, node, b0, sizeof (*tr));
-	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			   sizeof (tr->src.as_u8));
-	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			   sizeof (tr->dst.as_u8));
-	    }
-
-	  insert_pkts++;
-
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  /* Update counters */
-  vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
-			       insert_pkts);
-  vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
-			       bsid_pkts);
-  return from_frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
-  .function = sr_policy_rewrite_b_insert,
-  .name = "sr-pl-rewrite-b-insert",
-  .vector_size = sizeof (u32),
-  .format_trace = format_sr_policy_rewrite_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = SR_POLICY_REWRITE_N_ERROR,
-  .error_strings = sr_policy_rewrite_error_strings,
-  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
-    foreach_sr_policy_rewrite_next
-#undef _
-  },
-};
-/* *INDENT-ON* */
-
-/**
- * @brief Function BSID encapsulation
- */
-static_always_inline void
-end_bsid_encaps_srh_processing (vlib_node_runtime_t * node,
-				vlib_buffer_t * b0,
-				ip6_header_t * ip0,
-				ip6_sr_header_t * sr0, u32 * next0)
-{
-  ip6_address_t *new_dst0;
-
-  if (PREDICT_FALSE (!sr0))
-    goto error_bsid_encaps;
-
-  if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
-    {
-      if (PREDICT_TRUE (sr0->segments_left != 0))
-	{
-	  sr0->segments_left -= 1;
-	  new_dst0 = (ip6_address_t *) (sr0->segments);
-	  new_dst0 += sr0->segments_left;
-	  ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
-	  ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
-	  return;
-	}
-    }
-
-error_bsid_encaps:
-  *next0 = SR_POLICY_REWRITE_NEXT_ERROR;
-  b0->error = node->errors[SR_POLICY_REWRITE_ERROR_BSID_ZERO];
-}
-
-/**
- * @brief Graph node for applying a SR policy BSID - Encapsulation
- */
-static uword
-sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
-			    vlib_frame_t * from_frame)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  u32 n_left_from, next_index, *from, *to_next;
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-
-  int encap_pkts = 0, bsid_pkts = 0;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      /* Quad - Loop */
-      while (n_left_from >= 8 && n_left_to_next >= 4)
-	{
-	  u32 bi0, bi1, bi2, bi3;
-	  vlib_buffer_t *b0, *b1, *b2, *b3;
-	  u32 next0, next1, next2, next3;
-	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
-	  ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
-	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
-	  ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
-	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p4, *p5, *p6, *p7;
-
-	    p4 = vlib_get_buffer (vm, from[4]);
-	    p5 = vlib_get_buffer (vm, from[5]);
-	    p6 = vlib_get_buffer (vm, from[6]);
-	    p7 = vlib_get_buffer (vm, from[7]);
-
-	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
-	    vlib_prefetch_buffer_header (p4, LOAD);
-	    vlib_prefetch_buffer_header (p5, LOAD);
-	    vlib_prefetch_buffer_header (p6, LOAD);
-	    vlib_prefetch_buffer_header (p7, LOAD);
-
-	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  to_next[2] = bi2 = from[2];
-	  to_next[3] = bi3 = from[3];
-	  from += 4;
-	  to_next += 4;
-	  n_left_from -= 4;
-	  n_left_to_next -= 4;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-	  b2 = vlib_get_buffer (vm, bi2);
-	  b3 = vlib_get_buffer (vm, bi3);
-
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  sl1 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
-	  sl2 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
-	  sl3 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl1->rewrite));
-	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl2->rewrite));
-	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl3->rewrite));
-
-	  ip0_encap = vlib_buffer_get_current (b0);
-	  ip1_encap = vlib_buffer_get_current (b1);
-	  ip2_encap = vlib_buffer_get_current (b2);
-	  ip3_encap = vlib_buffer_get_current (b3);
-
-	  ip6_ext_header_find_t (ip0_encap, prev0, sr0,
-				 IP_PROTOCOL_IPV6_ROUTE);
-	  ip6_ext_header_find_t (ip1_encap, prev1, sr1,
-				 IP_PROTOCOL_IPV6_ROUTE);
-	  ip6_ext_header_find_t (ip2_encap, prev2, sr2,
-				 IP_PROTOCOL_IPV6_ROUTE);
-	  ip6_ext_header_find_t (ip3_encap, prev3, sr3,
-				 IP_PROTOCOL_IPV6_ROUTE);
-
-	  end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
-	  end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1);
-	  end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2);
-	  end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3);
-
-	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
-		       sl0->rewrite, vec_len (sl0->rewrite));
-	  clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
-		       sl1->rewrite, vec_len (sl1->rewrite));
-	  clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
-		       sl2->rewrite, vec_len (sl2->rewrite));
-	  clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
-		       sl3->rewrite, vec_len (sl3->rewrite));
-
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
-	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
-	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-	  ip1 = vlib_buffer_get_current (b1);
-	  ip2 = vlib_buffer_get_current (b2);
-	  ip3 = vlib_buffer_get_current (b3);
-
-	  encaps_processing_v6 (node, b0, ip0, ip0_encap);
-	  encaps_processing_v6 (node, b1, ip1, ip1_encap);
-	  encaps_processing_v6 (node, b2, ip2, ip2_encap);
-	  encaps_processing_v6 (node, b3, ip3, ip3_encap);
-
-	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
-	    {
-	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b0, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b1, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b2, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-
-	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  sr_policy_rewrite_trace_t *tr =
-		    vlib_add_trace (vm, node, b3, sizeof (*tr));
-		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
-			       sizeof (tr->src.as_u8));
-		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
-			       sizeof (tr->dst.as_u8));
-		}
-	    }
-
-	  encap_pkts += 4;
-	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, bi1, bi2, bi3,
-					   next0, next1, next2, next3);
-	}
-
-      /* Single loop for potentially the last three packets */
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  ip6_header_t *ip0 = 0, *ip0_encap = 0;
-	  ip6_ext_header_t *prev0;
-	  ip6_sr_header_t *sr0;
-	  ip6_sr_sl_t *sl0;
-	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-	  b0 = vlib_get_buffer (vm, bi0);
-
-	  sl0 =
-	    pool_elt_at_index (sm->sid_lists,
-			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
-		  vec_len (sl0->rewrite));
-
-	  ip0_encap = vlib_buffer_get_current (b0);
-	  ip6_ext_header_find_t (ip0_encap, prev0, sr0,
-				 IP_PROTOCOL_IPV6_ROUTE);
-	  end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
-
-	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
-		       sl0->rewrite, vec_len (sl0->rewrite));
-	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
-
-	  ip0 = vlib_buffer_get_current (b0);
-
-	  encaps_processing_v6 (node, b0, ip0, ip0_encap);
-
-	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
-	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      sr_policy_rewrite_trace_t *tr =
-		vlib_add_trace (vm, node, b0, sizeof (*tr));
-	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
-			   sizeof (tr->src.as_u8));
-	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
-			   sizeof (tr->dst.as_u8));
-	    }
-
-	  encap_pkts++;
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-					   n_left_to_next, bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  /* Update counters */
-  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
-			       encap_pkts);
-  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
-			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
-			       bsid_pkts);
-
-  return from_frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
-  .function = sr_policy_rewrite_b_encaps,
-  .name = "sr-pl-rewrite-b-encaps",
-  .vector_size = sizeof (u32),
-  .format_trace = format_sr_policy_rewrite_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = SR_POLICY_REWRITE_N_ERROR,
-  .error_strings = sr_policy_rewrite_error_strings,
-  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
-    foreach_sr_policy_rewrite_next
-#undef _
-  },
-};
-/* *INDENT-ON* */
-
-/*************************** SR Segment Lists DPOs ****************************/
-static u8 *
-format_sr_segment_list_dpo (u8 * s, va_list * args)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  ip6_address_t *addr;
-  ip6_sr_sl_t *sl;
-
-  index_t index = va_arg (*args, index_t);
-  CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
-  s = format (s, "SR: Segment List index:[%d]", index);
-  s = format (s, "\n\tSegments:");
-
-  sl = pool_elt_at_index (sm->sid_lists, index);
-
-  s = format (s, "< ");
-  vec_foreach (addr, sl->segments)
-  {
-    s = format (s, "%U, ", format_ip6_address, addr);
-  }
-  s = format (s, "\b\b > - ");
-  s = format (s, "Weight: %u", sl->weight);
-
-  return s;
-}
-
-const static dpo_vft_t sr_policy_rewrite_vft = {
-  .dv_lock = sr_dpo_lock,
-  .dv_unlock = sr_dpo_unlock,
-  .dv_format = format_sr_segment_list_dpo,
-};
-
-const static char *const sr_pr_encaps_ip6_nodes[] = {
-  "sr-pl-rewrite-encaps",
-  NULL,
-};
-
-const static char *const sr_pr_encaps_ip4_nodes[] = {
-  "sr-pl-rewrite-encaps-v4",
-  NULL,
-};
-
-const static char *const *const sr_pr_encaps_nodes[DPO_PROTO_NUM] = {
-  [DPO_PROTO_IP6] = sr_pr_encaps_ip6_nodes,
-  [DPO_PROTO_IP4] = sr_pr_encaps_ip4_nodes,
-};
-
-const static char *const sr_pr_insert_ip6_nodes[] = {
-  "sr-pl-rewrite-insert",
-  NULL,
-};
-
-const static char *const *const sr_pr_insert_nodes[DPO_PROTO_NUM] = {
-  [DPO_PROTO_IP6] = sr_pr_insert_ip6_nodes,
-};
-
-const static char *const sr_pr_bsid_insert_ip6_nodes[] = {
-  "sr-pl-rewrite-b-insert",
-  NULL,
-};
-
-const static char *const *const sr_pr_bsid_insert_nodes[DPO_PROTO_NUM] = {
-  [DPO_PROTO_IP6] = sr_pr_bsid_insert_ip6_nodes,
-};
-
-const static char *const sr_pr_bsid_encaps_ip6_nodes[] = {
-  "sr-pl-rewrite-b-encaps",
-  NULL,
-};
-
-const static char *const *const sr_pr_bsid_encaps_nodes[DPO_PROTO_NUM] = {
-  [DPO_PROTO_IP6] = sr_pr_bsid_encaps_ip6_nodes,
-};
-
-/********************* SR Policy Rewrite initialization ***********************/
-/**
- * @brief SR Policy Rewrite initialization
- */
-clib_error_t *
-sr_policy_rewrite_init (vlib_main_t * vm)
-{
-  ip6_sr_main_t *sm = &sr_main;
-
-  /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
-  mhash_init (&sm->sr_policies_index_hash, sizeof (uword),
-	      sizeof (ip6_address_t));
-
-  /* Init SR VPO DPOs type */
-  sr_pr_encaps_dpo_type =
-    dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_encaps_nodes);
-
-  sr_pr_insert_dpo_type =
-    dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_insert_nodes);
-
-  sr_pr_bsid_encaps_dpo_type =
-    dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_encaps_nodes);
-
-  sr_pr_bsid_insert_dpo_type =
-    dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_insert_nodes);
-
-  /* Register the L2 encaps node used in HW redirect */
-  sm->l2_sr_policy_rewrite_index = sr_policy_rewrite_encaps_node.index;
-
-  sm->fib_table_ip6 = (u32) ~ 0;
-  sm->fib_table_ip4 = (u32) ~ 0;
-
-  return 0;
-}
-
-VLIB_INIT_FUNCTION (sr_policy_rewrite_init);
-
-
-/*
-* fd.io coding-style-patch-verification: ON
-*
-* Local Variables:
-* eval: (c-set-style "gnu")
-* End:
-*/
diff --git a/src/vnet/sr/sr_steering.c b/src/vnet/sr/sr_steering.c
deleted file mode 100755
index 04646198..00000000
--- a/src/vnet/sr/sr_steering.c
+++ /dev/null
@@ -1,573 +0,0 @@
-/*
- * sr_steering.c: ipv6 segment routing steering into SR policy
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file
- * @brief Packet steering into SR Policies
- *
- * This file is in charge of handling the FIB appropiatly to steer packets
- * through SR Policies as defined in 'sr_policy_rewrite.c'. Notice that here
- * we are only doing steering. SR policy application is done in
- * sr_policy_rewrite.c
- *
- * Supports:
- *  - Steering of IPv6 traffic Destination Address based
- *  - Steering of IPv4 traffic Destination Address based
- *  - Steering of L2 frames, interface based (sw interface)
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/sr/sr.h>
-#include <vnet/ip/ip.h>
-#include <vnet/sr/sr_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/fib/ip6_fib.h>
-#include <vnet/dpo/dpo.h>
-
-#include <vppinfra/error.h>
-#include <vppinfra/elog.h>
-
-/**
- * @brief Steer traffic L2 and L3 traffic through a given SR policy
- *
- * @param is_del
- * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
- * @param sr_policy is the index of the SR Policy (alt to bsid)
- * @param table_id is the VRF where to install the FIB entry for the BSID
- * @param prefix is the IPv4/v6 address for L3 traffic type
- * @param mask_width is the mask for L3 traffic type
- * @param sw_if_index is the incoming interface for L2 traffic
- * @param traffic_type describes the type of traffic
- *
- * @return 0 if correct, else error
- */
-int
-sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
-		    u32 table_id, ip46_address_t * prefix, u32 mask_width,
-		    u32 sw_if_index, u8 traffic_type)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  sr_steering_key_t key;
-  ip6_sr_steering_policy_t *steer_pl;
-  fib_prefix_t pfx = { 0 };
-
-  ip6_sr_policy_t *sr_policy = 0;
-  uword *p = 0;
-
-  memset (&key, 0, sizeof (sr_steering_key_t));
-
-  /* Compute the steer policy key */
-  if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
-    {
-      key.l3.prefix.as_u64[0] = prefix->as_u64[0];
-      key.l3.prefix.as_u64[1] = prefix->as_u64[1];
-      key.l3.mask_width = mask_width;
-      key.l3.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
-    }
-  else if (traffic_type == SR_STEER_L2)
-    {
-      key.l2.sw_if_index = sw_if_index;
-
-      /* Sanitise the SW_IF_INDEX */
-      if (pool_is_free_index (sm->vnet_main->interface_main.sw_interfaces,
-			      sw_if_index))
-	return -3;
-
-      vnet_sw_interface_t *sw =
-	vnet_get_sw_interface (sm->vnet_main, sw_if_index);
-      if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
-	return -3;
-    }
-  else
-    return -1;
-
-  key.traffic_type = traffic_type;
-
-  /* Search for the item */
-  p = mhash_get (&sm->sr_steer_policies_hash, &key);
-
-  if (p)
-    {
-      /* Retrieve Steer Policy function */
-      steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
-
-      if (is_del)
-	{
-	  if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
-	    {
-	      /* Remove FIB entry */
-	      pfx.fp_proto = FIB_PROTOCOL_IP6;
-	      pfx.fp_len = steer_pl->classify.l3.mask_width;
-	      pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
-
-	      fib_table_entry_delete (fib_table_find
-				      (FIB_PROTOCOL_IP6,
-				       steer_pl->classify.l3.fib_table),
-				      &pfx, FIB_SOURCE_SR);
-	    }
-	  else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
-	    {
-	      /* Remove FIB entry */
-	      pfx.fp_proto = FIB_PROTOCOL_IP4;
-	      pfx.fp_len = steer_pl->classify.l3.mask_width;
-	      pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
-
-	      fib_table_entry_delete (fib_table_find
-				      (FIB_PROTOCOL_IP4,
-				       steer_pl->classify.l3.fib_table), &pfx,
-				      FIB_SOURCE_SR);
-	    }
-	  else if (steer_pl->classify.traffic_type == SR_STEER_L2)
-	    {
-	      /* Remove HW redirection */
-	      vnet_feature_enable_disable ("device-input",
-					   "sr-policy-rewrite-encaps-l2",
-					   sw_if_index, 0, 0, 0);
-	      sm->sw_iface_sr_policies[sw_if_index] = ~(u32) 0;
-
-	      /* Remove promiscous mode from interface */
-	      vnet_main_t *vnm = vnet_get_main ();
-	      ethernet_main_t *em = &ethernet_main;
-	      ethernet_interface_t *eif =
-		ethernet_get_interface (em, sw_if_index);
-
-	      if (!eif)
-		goto cleanup_error_redirection;
-
-	      ethernet_set_flags (vnm, sw_if_index, 0);
-	    }
-
-	  /* Delete SR steering policy entry */
-	  pool_put (sm->steer_policies, steer_pl);
-	  mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
-
-	  /* If no more SR policies or steering policies */
-	  if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
-	    {
-	      fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6);
-	      fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6);
-	      sm->fib_table_ip6 = (u32) ~ 0;
-	      sm->fib_table_ip4 = (u32) ~ 0;
-	    }
-
-	  return 1;
-	}
-      else			/* It means user requested to update an existing SR steering policy */
-	{
-	  /* Retrieve SR steering policy */
-	  if (bsid)
-	    {
-	      p = mhash_get (&sm->sr_policies_index_hash, bsid);
-	      if (p)
-		sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
-	      else
-		return -2;
-	    }
-	  else
-	    sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
-
-	  if (!sr_policy)
-	    return -2;
-
-	  steer_pl->sr_policy = sr_policy - sm->sr_policies;
-
-	  /* Remove old FIB/hw redirection and create a new one */
-	  if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
-	    {
-	      /* Remove FIB entry */
-	      pfx.fp_proto = FIB_PROTOCOL_IP6;
-	      pfx.fp_len = steer_pl->classify.l3.mask_width;
-	      pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
-
-	      fib_table_entry_delete (fib_table_find
-				      (FIB_PROTOCOL_IP6,
-				       steer_pl->classify.l3.fib_table),
-				      &pfx, FIB_SOURCE_SR);
-
-	      /* Create a new one */
-	      goto update_fib;
-	    }
-	  else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
-	    {
-	      /* Remove FIB entry */
-	      pfx.fp_proto = FIB_PROTOCOL_IP4;
-	      pfx.fp_len = steer_pl->classify.l3.mask_width;
-	      pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
-
-	      fib_table_entry_delete (fib_table_find
-				      (FIB_PROTOCOL_IP4,
-				       steer_pl->classify.l3.fib_table),
-				      &pfx, FIB_SOURCE_SR);
-
-	      /* Create a new one */
-	      goto update_fib;
-	    }
-	  else if (steer_pl->classify.traffic_type == SR_STEER_L2)
-	    {
-	      /* Update L2-HW redirection */
-	      goto update_fib;
-	    }
-	}
-    }
-  else
-    /* delete; steering policy does not exist; complain */
-  if (is_del)
-    return -4;
-
-  /* Retrieve SR policy */
-  if (bsid)
-    {
-      p = mhash_get (&sm->sr_policies_index_hash, bsid);
-      if (p)
-	sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
-      else
-	return -2;
-    }
-  else
-    sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
-
-  /* Create a new steering policy */
-  pool_get (sm->steer_policies, steer_pl);
-  memset (steer_pl, 0, sizeof (*steer_pl));
-
-  if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
-    {
-      clib_memcpy (&steer_pl->classify.l3.prefix, prefix,
-		   sizeof (ip46_address_t));
-      steer_pl->classify.l3.mask_width = mask_width;
-      steer_pl->classify.l3.fib_table =
-	(table_id != (u32) ~ 0 ? table_id : 0);
-      steer_pl->classify.traffic_type = traffic_type;
-    }
-  else if (traffic_type == SR_STEER_L2)
-    {
-      steer_pl->classify.l2.sw_if_index = sw_if_index;
-      steer_pl->classify.traffic_type = traffic_type;
-    }
-  else
-    {
-      /* Incorrect API usage. Should never get here */
-      pool_put (sm->steer_policies, steer_pl);
-      mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
-      return -1;
-    }
-  steer_pl->sr_policy = sr_policy - sm->sr_policies;
-
-  /* Create and store key */
-  mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies,
-	     NULL);
-
-  if (traffic_type == SR_STEER_L2)
-    {
-      if (!sr_policy->is_encap)
-	goto cleanup_error_encap;
-
-      if (vnet_feature_enable_disable
-	  ("device-input", "sr-pl-rewrite-encaps-l2", sw_if_index, 1, 0, 0))
-	goto cleanup_error_redirection;
-
-      /* Set promiscous mode on interface */
-      vnet_main_t *vnm = vnet_get_main ();
-      ethernet_main_t *em = &ethernet_main;
-      ethernet_interface_t *eif = ethernet_get_interface (em, sw_if_index);
-
-      if (!eif)
-	goto cleanup_error_redirection;
-
-      ethernet_set_flags (vnm, sw_if_index,
-			  ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
-    }
-  else if (traffic_type == SR_STEER_IPV4)
-    if (!sr_policy->is_encap)
-      goto cleanup_error_encap;
-
-update_fib:
-  /* FIB API calls - Recursive route through the BindingSID */
-  if (traffic_type == SR_STEER_IPV6)
-    {
-      pfx.fp_proto = FIB_PROTOCOL_IP6;
-      pfx.fp_len = steer_pl->classify.l3.mask_width;
-      pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
-
-      fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP6,
-						(table_id !=
-						 (u32) ~ 0 ?
-						 table_id : 0)),
-				&pfx, FIB_SOURCE_SR,
-				FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
-				FIB_PROTOCOL_IP6,
-				(ip46_address_t *) & sr_policy->bsid, ~0,
-				sm->fib_table_ip6, 1, NULL,
-				FIB_ROUTE_PATH_FLAG_NONE);
-    }
-  else if (traffic_type == SR_STEER_IPV4)
-    {
-      pfx.fp_proto = FIB_PROTOCOL_IP4;
-      pfx.fp_len = steer_pl->classify.l3.mask_width;
-      pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
-
-      fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP4,
-						(table_id !=
-						 (u32) ~ 0 ?
-						 table_id : 0)),
-				&pfx, FIB_SOURCE_SR,
-				FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
-				FIB_PROTOCOL_IP6,
-				(ip46_address_t *) & sr_policy->bsid, ~0,
-				sm->fib_table_ip4, 1, NULL,
-				FIB_ROUTE_PATH_FLAG_NONE);
-    }
-  else if (traffic_type == SR_STEER_L2)
-    {
-      if (sw_if_index < vec_len (sm->sw_iface_sr_policies))
-	sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy;
-      else
-	{
-	  vec_resize (sm->sw_iface_sr_policies,
-		      (pool_len (sm->vnet_main->interface_main.sw_interfaces)
-		       - vec_len (sm->sw_iface_sr_policies)));
-	  sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy;
-	}
-    }
-
-  return 0;
-
-cleanup_error_encap:
-  pool_put (sm->steer_policies, steer_pl);
-  mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
-  return -5;
-
-cleanup_error_redirection:
-  pool_put (sm->steer_policies, steer_pl);
-  mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
-  return -3;
-}
-
-static clib_error_t *
-sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
-			    vlib_cli_command_t * cmd)
-{
-  vnet_main_t *vnm = vnet_get_main ();
-
-  int is_del = 0;
-
-  ip46_address_t prefix;
-  u32 dst_mask_width = 0;
-  u32 sw_if_index = (u32) ~ 0;
-  u8 traffic_type = 0;
-  u32 fib_table = (u32) ~ 0;
-
-  ip6_address_t bsid;
-  u32 sr_policy_index = (u32) ~ 0;
-
-  u8 sr_policy_set = 0;
-
-  memset (&prefix, 0, sizeof (ip46_address_t));
-
-  int rv;
-  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
-    {
-      if (unformat (input, "del"))
-	is_del = 1;
-      else if (!traffic_type
-	       && unformat (input, "l3 %U/%d", unformat_ip6_address,
-			    &prefix.ip6, &dst_mask_width))
-	traffic_type = SR_STEER_IPV6;
-      else if (!traffic_type
-	       && unformat (input, "l3 %U/%d", unformat_ip4_address,
-			    &prefix.ip4, &dst_mask_width))
-	traffic_type = SR_STEER_IPV4;
-      else if (!traffic_type
-	       && unformat (input, "l2 %U", unformat_vnet_sw_interface, vnm,
-			    &sw_if_index))
-	traffic_type = SR_STEER_L2;
-      else if (!sr_policy_set
-	       && unformat (input, "via sr policy index %d",
-			    &sr_policy_index))
-	sr_policy_set = 1;
-      else if (!sr_policy_set
-	       && unformat (input, "via sr policy bsid %U",
-			    unformat_ip6_address, &bsid))
-	sr_policy_set = 1;
-      else if (fib_table == (u32) ~ 0
-	       && unformat (input, "fib-table %d", &fib_table));
-      else
-	break;
-    }
-
-  if (!traffic_type)
-    return clib_error_return (0, "No L2/L3 traffic specified");
-  if (!sr_policy_set)
-    return clib_error_return (0, "No SR policy specified");
-
-  /* Make sure that the prefixes are clean */
-  if (traffic_type == SR_STEER_IPV4)
-    {
-      u32 mask =
-	(dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0);
-      prefix.ip4.as_u32 &= mask;
-    }
-  else if (traffic_type == SR_STEER_IPV6)
-    {
-      ip6_address_t mask;
-      ip6_address_mask_from_width (&mask, dst_mask_width);
-      ip6_address_mask (&prefix.ip6, &mask);
-    }
-
-  rv =
-    sr_steering_policy (is_del, (sr_policy_index == ~(u32) 0 ? &bsid : NULL),
-			sr_policy_index, fib_table, &prefix, dst_mask_width,
-			sw_if_index, traffic_type);
-
-  switch (rv)
-    {
-    case 0:
-      break;
-    case 1:
-      return 0;
-    case -1:
-      return clib_error_return (0, "Incorrect API usage.");
-    case -2:
-      return clib_error_return (0,
-				"The requested SR policy could not be located. Review the BSID/index.");
-    case -3:
-      return clib_error_return (0,
-				"Unable to do SW redirect. Incorrect interface.");
-    case -4:
-      return clib_error_return (0,
-				"The requested SR steering policy could not be deleted.");
-    case -5:
-      return clib_error_return (0,
-				"The SR policy is not an encapsulation one.");
-    default:
-      return clib_error_return (0, "BUG: sr steer policy returns %d", rv);
-    }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
-  .path = "sr steer",
-  .short_help = "sr steer (del) [l3 <ip_addr/mask>|l2 <sf_if>]"
-    "via sr policy [index <sr_policy_index>|bsid <bsid_ip6_addr>]"
-    "(fib-table <fib_table_index>)",
-  .long_help =
-    "\tSteer a L2 or L3 traffic through an existing SR policy.\n"
-    "\tExamples:\n"
-    "\t\tsr steer l3 2001::/64 via sr_policy index 5\n"
-    "\t\tsr steer l3 2001::/64 via sr_policy bsid 2010::9999:1\n"
-    "\t\tsr steer l2 GigabitEthernet0/5/0 via sr_policy index 5\n"
-    "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n",
-  .function = sr_steer_policy_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_sr_steering_policies_command_fn (vlib_main_t * vm,
-				      unformat_input_t * input,
-				      vlib_cli_command_t * cmd)
-{
-  ip6_sr_main_t *sm = &sr_main;
-  ip6_sr_steering_policy_t **steer_policies = 0;
-  ip6_sr_steering_policy_t *steer_pl;
-
-  vnet_main_t *vnm = vnet_get_main ();
-
-  ip6_sr_policy_t *pl = 0;
-  int i;
-
-  vlib_cli_output (vm, "SR steering policies:");
-  /* *INDENT-OFF* */
-  pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);}));
-  /* *INDENT-ON* */
-  vlib_cli_output (vm, "Traffic\t\tSR policy BSID");
-  for (i = 0; i < vec_len (steer_policies); i++)
-    {
-      steer_pl = steer_policies[i];
-      pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy);
-      if (steer_pl->classify.traffic_type == SR_STEER_L2)
-	{
-	  vlib_cli_output (vm, "L2 %U\t%U",
-			   format_vnet_sw_if_index_name, vnm,
-			   steer_pl->classify.l2.sw_if_index,
-			   format_ip6_address, &pl->bsid);
-	}
-      else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
-	{
-	  vlib_cli_output (vm, "L3 %U/%d\t%U",
-			   format_ip4_address,
-			   &steer_pl->classify.l3.prefix.ip4,
-			   steer_pl->classify.l3.mask_width,
-			   format_ip6_address, &pl->bsid);
-	}
-      else if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
-	{
-	  vlib_cli_output (vm, "L3 %U/%d\t%U",
-			   format_ip6_address,
-			   &steer_pl->classify.l3.prefix.ip6,
-			   steer_pl->classify.l3.mask_width,
-			   format_ip6_address, &pl->bsid);
-	}
-    }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = {
-  .path = "show sr steering policies",
-  .short_help = "show sr steering policies",
-  .function = show_sr_steering_policies_command_fn,
-};
-/* *INDENT-ON* */
-
-clib_error_t *
-sr_steering_init (vlib_main_t * vm)
-{
-  ip6_sr_main_t *sm = &sr_main;
-
-  /* Init memory for function keys */
-  mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
-	      sizeof (sr_steering_key_t));
-
-  sm->sw_iface_sr_policies = 0;
-
-  sm->vnet_main = vnet_get_main ();
-
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (sr_steering_init);
-/* *INDENT-ON* */
-
-/* *INDENT-OFF* */
-VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) =
-{
-  .arc_name = "device-input",
-  .node_name = "sr-pl-rewrite-encaps-l2",
-  .runs_before = VNET_FEATURES ("ethernet-input"),
-};
-/* *INDENT-ON* */
-
-/*
-* fd.io coding-style-patch-verification: ON
-*
-* Local Variables:
-* eval: (c-set-style "gnu")
-* End:
-*/
diff --git a/src/vnet/sr/sr_steering.md b/src/vnet/sr/sr_steering.md
deleted file mode 100644
index cf446f81..00000000
--- a/src/vnet/sr/sr_steering.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# Steering packets into a SR Policy     {#srv6_steering_doc}
-
-To steer packets in Transit into an SR policy (T.Insert, T.Encaps and T.Encaps.L2 behaviors), the user needs to create an 'sr steering policy'.
-
-    sr steer l3 2001::/64 via sr policy index 1
-    sr steer l3 2001::/64 via sr policy bsid cafe::1
-    sr steer l3 2001::/64 via sr policy bsid cafe::1 fib-table 3
-    sr steer l3 10.0.0.0/16 via sr policy bsid cafe::1
-    sr steer l2 TenGE0/1/0 via sr policy bsid cafe::1
-
-Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy. Notice that creating an SR steering policy for L2 frames will actually automatically *put the interface into promiscous mode*.
diff --git a/src/vnet/srmpls/dir.dox b/src/vnet/srmpls/dir.dox
new file mode 100755
index 00000000..76ec1d6a
--- /dev/null
+++ b/src/vnet/srmpls/dir.dox
@@ -0,0 +1,22 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Segment Routing MPLS code
+
+ An implementation of Segment Routing for the MPLS dataplane.
+
+*/
\ No newline at end of file
diff --git a/src/vnet/srmpls/sr.h b/src/vnet/srmpls/sr.h
new file mode 100755
index 00000000..0e106697
--- /dev/null
+++ b/src/vnet/srmpls/sr.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing MPLS data structures definitions
+ *
+ */
+
+#ifndef included_vnet_srmpls_h
+#define included_vnet_srmpls_h
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+/* SR policy types */
+#define SR_POLICY_TYPE_DEFAULT 0
+#define SR_POLICY_TYPE_SPRAY 1
+
+#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
+
+#define SR_STEER_IPV4 4
+#define SR_STEER_IPV6 6
+
+/**
+ * @brief SR Segment List (SID list)
+ */
+typedef struct
+{
+  /**
+    * SIDs (key)
+    */
+  mpls_label_t *segments;
+
+  /**
+    * SID list weight (wECMP / UCMP)
+    */
+  u32 weight;
+
+} mpls_sr_sl_t;
+
+typedef struct
+{
+  u32 *segments_lists;		/**< Pool of SID lists indexes */
+
+  mpls_label_t bsid;		/**< BindingSID (key) */
+
+  u8 type;					/**< Type (default is 0) */
+  /* SR Policy specific DPO                                       */
+  /* IF Type = DEFAULT Then Load Balancer DPO among SID lists     */
+  /* IF Type = SPRAY then Spray DPO with all SID lists            */
+
+} mpls_sr_policy_t;
+
+/**
+ * @brief Steering db key
+ *
+ * L3 is IPv4/IPv6 + mask
+ */
+typedef struct
+{
+  ip46_address_t prefix;	/**< IP address of the prefix */
+  u32 mask_width;			/**< Mask width of the prefix */
+  u32 fib_table;			/**< VRF of the prefix */
+  u8 traffic_type;			/**< Traffic type (IPv4, IPv6, L2) */
+  u8 padding[3];
+} sr_mpls_steering_key_t;
+
+typedef struct
+{
+  sr_mpls_steering_key_t classify;		/**< Traffic classification */
+  u32 sr_policy;						/**< SR Policy index */
+} mpls_sr_steering_policy_t;
+
+/**
+ * @brief Segment Routing main datastructure
+ */
+typedef struct
+{
+  /**
+    * SR SID lists
+    */
+  mpls_sr_sl_t *sid_lists;
+
+  /**
+    * SR MPLS policies
+    */
+  mpls_sr_policy_t *sr_policies;
+
+  /**
+    * Hash table mapping BindingSID to SR MPLS policy
+    */
+  uword *sr_policies_index_hash;
+
+  /**
+    * Pool of SR steer policies instances
+    */
+  mpls_sr_steering_policy_t *steer_policies;
+
+  /**
+    * MHash table mapping steering rules to SR steer instance
+    */
+  mhash_t sr_steer_policies_hash;
+
+  /**
+    * convenience
+    */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+} mpls_sr_main_t;
+
+extern mpls_sr_main_t sr_mpls_main;
+
+extern int
+sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
+		    u8 behavior, u32 weight);
+
+extern int
+sr_mpls_policy_mod (mpls_label_t bsid, u32 index, u8 operation,
+		    mpls_label_t * segments, u32 sl_index, u32 weight);
+
+extern int sr_mpls_policy_del (mpls_label_t bsid, u32 index);
+
+#endif /* included_vnet_sr_mpls_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srmpls/sr_doc.md b/src/vnet/srmpls/sr_doc.md
new file mode 100644
index 00000000..d60592bb
--- /dev/null
+++ b/src/vnet/srmpls/sr_doc.md
@@ -0,0 +1,87 @@
+# SR-MPLS: Segment Routing for MPLS    {#srmpls_doc}
+
+This is a memo intended to contain documentation of the VPP SR-MPLS implementation.
+Everything that is not directly obvious should come here.
+For any feedback on content that should be explained please mailto:pcamaril@cisco.com
+
+## Segment Routing
+
+Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior.
+
+Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era.
+
+Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements.
+
+Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design.
+
+Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
+
+**The implementation of Segment Routing in VPP covers both the IPv6 data plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page contains the SR-MPLS documentation.**
+
+## Segment Routing terminology
+
+* SegmentID (SID): is an MPLS label.
+* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse.
+* SR Policy: is a set of candidate paths (SID list+weight). An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights.
+* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with MPLS label corresponding to a BindingSID, then the SR policy will be applied to such packet. (BindingSID is popped first.)
+
+## SR-MPLS features in VPP
+
+The SR-MPLS implementation is focused on the SR policies, as well on its steering. Others SR-MPLS features, such as for example AdjSIDs, can be achieved using the regular VPP MPLS implementation.
+
+The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-segment-routing-policy/">Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*)</a> defines SR Policies.
+
+## Creating a SR Policy
+
+An SR Policy is defined by a Binding SID and a weighted set of Segment Lists.
+
+A new SR policy is created with a first SID list using:
+
+    sr mpls policy add bsid 40001 next 16001 next 16002 next 16003 (weight 5)
+
+* The weight parameter is only used if more than one SID list is associated with the policy.
+
+An SR policy is deleted with:
+
+    sr mpls policy del bsid 40001
+
+The existing SR policies are listed with:
+
+    show sr mpls policies
+
+### Adding/Removing SID Lists from an SR policy
+
+An additional SID list is associated with an existing SR policy with:
+
+    sr mpls policy mod bsid 40001 add sl next 16001 next 16002 next 16003 (weight 3)
+
+Conversely, a SID list can be removed from an SR policy with:
+
+    sr mpls policy mod bsid 4001 del sl index 1
+
+Note that this CLI cannot be used to remove the last SID list of a policy. Instead the SR policy delete CLI must be used.
+
+The weight of a SID list can also be modified with:
+
+    sr mpls policy mod bsid 40001 mod sl index 1 weight 4
+    sr mpls policy mod index 1    mod sl index 1 weight 4
+
+### SR Policies: Spray policies
+
+Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them.
+
+SID list weights are ignored with this type of policies.
+
+A Spray policy is instantiated by appending the keyword **spray** to a regular SR-MPLS policy command, as in:
+
+    sr mpls policy add bsid 40002 next 16001 next 16002 next 16003 spray
+
+Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node.  
+
+## Steering packets into a SR Policy
+
+To steer packets in Transit into an SR policy, the user needs to create an 'sr steering policy'.
+
+    sr mpls steer l3 2001::/64 via sr policy bsid 40001
+    sr mpls steer l3 2001::/64 via sr policy bsid 40001 fib-table 3
+    sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001
diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c
new file mode 100755
index 00000000..5ebbc60d
--- /dev/null
+++ b/src/vnet/srmpls/sr_mpls_policy.c
@@ -0,0 +1,569 @@
+/*
+ * sr_mpls_policy.c: SR-MPLS policies
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief SR MPLS policy creation and application
+ *
+ * Create an SR policy.
+ * An SR policy can be either of 'default' type or 'spray' type
+ * An SR policy has attached a list of SID lists.
+ * In case the SR policy is a default one it will load balance among them.
+ * An SR policy has associated a BindingSID.
+ * In case any packet arrives with MPLS_label == BindingSID then the SR policy
+ * associated to such bindingSID will be applied to such packet.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srmpls/sr.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+mpls_sr_main_t sr_mpls_main;
+
+/***************************  SR LB helper functions **************************/
+/**
+ * @brief Creates a Segment List and adds it to an SR policy
+ *
+ * Creates a Segment List and adds it to the SR policy. Notice that the SL are
+ * not necessarily unique. Hence there might be two Segment List within the
+ * same SR Policy with exactly the same segments and same weight.
+ *
+ * @param sr_policy is the SR policy where the SL will be added
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param weight is the weight of the SegmentList (for load-balancing purposes)
+ * @param is_encap represents the mode (SRH insertion vs Encapsulation)
+ *
+ * @return pointer to the just created segment list
+ */
+static inline mpls_sr_sl_t *
+create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
+{
+  mpls_sr_main_t *sm = &sr_mpls_main;
+  mpls_sr_sl_t *segment_list;
+
+  pool_get (sm->sid_lists, segment_list);
+  memset (segment_list, 0, sizeof (*segment_list));
+
+  vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);
+
+  /* Fill in segment list */
+  segment_list->weight =
+    (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
+  segment_list->segments = vec_dup (sl);
+
+  fib_route_path_t path = {
+    .frp_proto = FIB_PROTOCOL_MPLS,
+    .frp_sw_if_index = ~0,
+    .frp_fib_index = 0,
+    .frp_weight = segment_list->weight,
+    .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+    .frp_label_stack = NULL,
+    .frp_local_label = sl[0],
+  };
+
+  vec_add (path.frp_label_stack, sl + 1, vec_len (sl) - 1);
+
+  fib_route_path_t *paths = NULL;
+  vec_add1 (paths, path);
+
+  mpls_eos_bit_t eos;
+  FOR_EACH_MPLS_EOS_BIT (eos)
+  {
+    /* *INDENT-OFF* */
+    fib_prefix_t pfx = {
+      .fp_len = 21,
+      .fp_proto = FIB_PROTOCOL_MPLS,
+      .fp_label = sr_policy->bsid,
+      .fp_eos = eos,
+      .fp_payload_proto = DPO_PROTO_MPLS,
+    };
+    /* *INDENT-ON* */
+
+    fib_table_entry_path_add2 (0,
+			       &pfx,
+			       FIB_SOURCE_SR,
+			       (sr_policy->type == SR_POLICY_TYPE_DEFAULT ?
+				FIB_ENTRY_FLAG_NONE :
+				FIB_ENTRY_FLAG_MULTICAST), paths);
+  }
+
+  vec_free (paths);
+
+  return segment_list;
+}
+
+/******************************* SR rewrite API *******************************/
+/* Three functions for handling sr policies:
+ *   -> sr_mpls_policy_add
+ *   -> sr_mpls_policy_del
+ *   -> sr_mpls_policy_mod
+ * All of them are API. CLI function on sr_policy_command_fn                  */
+
+/**
+ * @brief Create a new SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param segments is a vector of MPLS labels composing the segment list
+ * @param behavior is the behavior of the SR policy. (default//spray)
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param weight is the weight of this specific SID list
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
+		    u8 behavior, u32 weight)
+{
+  mpls_sr_main_t *sm = &sr_mpls_main;
+  mpls_sr_policy_t *sr_policy = 0;
+  uword *p;
+
+  /* Search for existing keys (BSID) */
+  p = hash_get (sm->sr_policies_index_hash, bsid);
+  if (p)
+    {
+      /* Add SR policy that already exists; complain */
+      return -12;
+    }
+
+  /* Add an SR policy object */
+  pool_get (sm->sr_policies, sr_policy);
+  memset (sr_policy, 0, sizeof (*sr_policy));
+  sr_policy->bsid = bsid;
+  sr_policy->type = behavior;
+
+  /* Copy the key */
+  hash_set (sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies);
+
+  /* Create a segment list and add the index to the SR policy */
+  create_sl (sr_policy, segments, weight);
+
+  return 0;
+}
+
+/**
+ * @brief Delete a SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_del (mpls_label_t bsid, u32 index)
+{
+  mpls_sr_main_t *sm = &sr_mpls_main;
+  mpls_sr_policy_t *sr_policy = 0;
+  mpls_sr_sl_t *segment_list;
+  mpls_eos_bit_t eos;
+  u32 *sl_index;
+  uword *p;
+
+  if (bsid)
+    {
+      p = hash_get (sm->sr_policies_index_hash, bsid);
+      if (p)
+	sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+      else
+	return -1;
+    }
+  else
+    {
+      sr_policy = pool_elt_at_index (sm->sr_policies, index);
+      if (!sr_policy)
+	return -1;
+    }
+
+  /* Clean SID Lists */
+  vec_foreach (sl_index, sr_policy->segments_lists)
+  {
+    segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+
+    fib_route_path_t path = {
+      .frp_proto = FIB_PROTOCOL_MPLS,
+      .frp_sw_if_index = ~0,
+      .frp_fib_index = 0,
+      .frp_weight = segment_list->weight,
+      .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+      .frp_local_label = segment_list->segments[0],
+    };
+
+    fib_route_path_t *paths = NULL;
+    vec_add1 (paths, path);
+
+    /* remove each of the MPLS routes */
+    FOR_EACH_MPLS_EOS_BIT (eos)
+    {
+      /* *INDENT-OFF* */
+      fib_prefix_t pfx = {
+        .fp_len = 21,
+        .fp_proto = FIB_PROTOCOL_MPLS,
+        .fp_label = sr_policy->bsid,
+        .fp_eos = eos,
+        .fp_payload_proto = DPO_PROTO_MPLS,
+      };
+      /* *INDENT-ON* */
+
+      fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+    }
+    vec_free (paths);
+    vec_free (segment_list->segments);
+    pool_put_index (sm->sid_lists, *sl_index);
+  }
+
+  /* Remove SR policy entry */
+  hash_unset (sm->sr_policies_index_hash, sr_policy->bsid);
+  pool_put (sm->sr_policies, sr_policy);
+
+  return 0;
+}
+
+/**
+ * @brief Modify an existing SR policy
+ *
+ * The possible modifications are adding a new Segment List, modifying an
+ * existing Segment List (modify the weight only) and delete a given
+ * Segment List from the SR Policy.
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param operation is the operation to perform (among the top ones)
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param sl_index is the index of the Segment List to modify/delete
+ * @param weight is the weight of the sid list. optional.
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_mod (mpls_label_t bsid, u32 index, u8 operation,
+		    mpls_label_t * segments, u32 sl_index, u32 weight)
+{
+  mpls_sr_main_t *sm = &sr_mpls_main;
+  mpls_sr_policy_t *sr_policy = 0;
+  mpls_sr_sl_t *segment_list;
+  u32 *sl_index_iterate;
+  uword *p;
+
+  if (bsid)
+    {
+      p = hash_get (sm->sr_policies_index_hash, bsid);
+      if (p)
+	sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+      else
+	return -1;
+    }
+  else
+    {
+      sr_policy = pool_elt_at_index (sm->sr_policies, index);
+      if (!sr_policy)
+	return -1;
+    }
+
+  if (operation == 1)		/* Add SR List to an existing SR policy */
+    {
+      /* Create the new SL */
+      segment_list = create_sl (sr_policy, segments, weight);
+
+    }
+  else if (operation == 2)	/* Delete SR List from an existing SR policy */
+    {
+      /* Check that currently there are more than one SID list */
+      if (vec_len (sr_policy->segments_lists) == 1)
+	return -21;
+
+      /* Check that the SR list does exist and is assigned to the sr policy */
+      vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+	if (*sl_index_iterate == sl_index)
+	break;
+
+      if (*sl_index_iterate != sl_index)
+	return -22;
+
+      /* Remove the lucky SR list that is being kicked out */
+      segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+
+      mpls_eos_bit_t eos;
+      fib_route_path_t path = {
+	.frp_proto = FIB_PROTOCOL_MPLS,
+	.frp_sw_if_index = ~0,
+	.frp_fib_index = 0,
+	.frp_weight = segment_list->weight,
+	.frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+	.frp_local_label = segment_list->segments[0],
+      };
+
+      fib_route_path_t *paths = NULL;
+      vec_add1 (paths, path);
+
+      FOR_EACH_MPLS_EOS_BIT (eos)
+      {
+	/* *INDENT-OFF* */
+        fib_prefix_t pfx = {
+          .fp_len = 21,
+          .fp_proto = FIB_PROTOCOL_MPLS,
+          .fp_label = sr_policy->bsid,
+          .fp_eos = eos,
+          .fp_payload_proto = DPO_PROTO_MPLS,
+        };
+	/* *INDENT-ON* */
+
+	fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+      }
+
+      vec_free (paths);
+      vec_free (segment_list->segments);
+      pool_put_index (sm->sid_lists, sl_index);
+      vec_del1 (sr_policy->segments_lists,
+		sl_index_iterate - sr_policy->segments_lists);
+    }
+  else if (operation == 3)	/* Modify the weight of an existing SR List */
+    {
+      /* Find the corresponding SL */
+      vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+	if (*sl_index_iterate == sl_index)
+	break;
+
+      if (*sl_index_iterate != sl_index)
+	return -32;
+
+      /* Change the weight */
+      segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+      segment_list->weight = weight;
+
+      /* Update LB */
+      //FIXME
+    }
+  return 0;
+}
+
+/**
+ * @brief CLI for 'sr mpls policies' command family
+ */
+static clib_error_t *
+sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+			   vlib_cli_command_t * cmd)
+{
+  int rv = -1;
+  char is_del = 0, is_add = 0, is_mod = 0;
+  char policy_set = 0;
+  mpls_label_t bsid, next_label;
+  u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0;
+  u32 weight = (u32) ~ 0;
+  mpls_label_t *segments = 0;
+  u8 operation = 0;
+  u8 is_spray = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (!is_add && !is_mod && !is_del && unformat (input, "add"))
+	is_add = 1;
+      else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
+	is_del = 1;
+      else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
+	is_mod = 1;
+      else if (!policy_set
+	       && unformat (input, "bsid %U", unformat_mpls_unicast_label,
+			    &bsid))
+	policy_set = 1;
+      else if (!is_add && !policy_set
+	       && unformat (input, "index %d", &sr_policy_index))
+	policy_set = 1;
+      else if (unformat (input, "weight %d", &weight));
+      else
+	if (unformat
+	    (input, "next %U", unformat_mpls_unicast_label, &next_label))
+	{
+	  vec_add (segments, &next_label, 1);
+	}
+      else if (unformat (input, "add sl"))
+	operation = 1;
+      else if (unformat (input, "del sl index %d", &sl_index))
+	operation = 2;
+      else if (unformat (input, "mod sl index %d", &sl_index))
+	operation = 3;
+      else if (unformat (input, "spray"))
+	is_spray = 1;
+      else
+	break;
+    }
+
+  if (!is_add && !is_mod && !is_del)
+    return clib_error_return (0, "Incorrect CLI");
+
+  if (!policy_set)
+    return clib_error_return (0, "No SR policy BSID or index specified");
+
+  if (is_add)
+    {
+      if (vec_len (segments) == 0)
+	return clib_error_return (0, "No Segment List specified");
+
+      rv = sr_mpls_policy_add (bsid, segments,
+			       (is_spray ? SR_POLICY_TYPE_SPRAY :
+				SR_POLICY_TYPE_DEFAULT), weight);
+    }
+  else if (is_del)
+    rv =
+      sr_mpls_policy_del ((sr_policy_index != (u32) ~ 0 ? (u32) ~ 0 : bsid),
+			  sr_policy_index);
+  else if (is_mod)
+    {
+      if (!operation)
+	return clib_error_return (0, "No SL modification specified");
+      if (operation != 1 && sl_index == (u32) ~ 0)
+	return clib_error_return (0, "No Segment List index specified");
+      if (operation == 1 && vec_len (segments) == 0)
+	return clib_error_return (0, "No Segment List specified");
+      if (operation == 3 && weight == (u32) ~ 0)
+	return clib_error_return (0, "No new weight for the SL specified");
+      rv =
+	sr_mpls_policy_mod ((sr_policy_index != (u32) ~ 0 ? (u32) ~ 0 : bsid),
+			    sr_policy_index, operation, segments,
+			    sl_index, weight);
+    }
+
+  switch (rv)
+    {
+    case 0:
+      break;
+    case 1:
+      return 0;
+    case -12:
+      return clib_error_return (0,
+				"There is already a FIB entry for the BindingSID address.\n"
+				"The SR policy could not be created.");
+    case -21:
+      return clib_error_return (0,
+				"The selected SR policy only contains ONE segment list. "
+				"Please remove the SR policy instead");
+    case -22:
+      return clib_error_return (0,
+				"Could not delete the segment list. "
+				"It is not associated with that SR policy.");
+    case -32:
+      return clib_error_return (0,
+				"Could not modify the segment list. "
+				"The given SL is not associated with such SR policy.");
+    default:
+      return clib_error_return (0, "BUG: sr policy returns %d", rv);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_mpls_policy_command, static) = {
+  .path = "sr mpls policy",
+  .short_help = "sr mpls policy [add||del||mod] bsid 2999 "
+  "next 10 next 20 next 30 (weight 1) (spray)",
+  .long_help = "TBD.\n",
+  .function = sr_mpls_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI to display onscreen all the SR MPLS policies
+ */
+static clib_error_t *
+show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
+				  vlib_cli_command_t * cmd)
+{
+  mpls_sr_main_t *sm = &sr_mpls_main;
+  mpls_sr_sl_t *segment_list = 0;
+  mpls_sr_policy_t *sr_policy = 0;
+  mpls_sr_policy_t **vec_policies = 0;
+  mpls_label_t *label;
+  u32 *sl_index;
+  u8 *s;
+  int i = 0;
+
+  vlib_cli_output (vm, "SR MPLS policies:");
+
+  /* *INDENT-OFF* */
+		pool_foreach  (sr_policy, sm->sr_policies, {vec_add1 (vec_policies, sr_policy); } );
+  /* *INDENT-ON* */
+
+  vec_foreach_index (i, vec_policies)
+  {
+    sr_policy = vec_policies[i];
+    vlib_cli_output (vm, "[%u].-\tBSID: %U",
+		     (u32) (sr_policy - sm->sr_policies),
+		     format_mpls_unicast_label, sr_policy->bsid);
+    vlib_cli_output (vm, "\tType: %s",
+		     (sr_policy->type ==
+		      SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
+    vlib_cli_output (vm, "\tSegment Lists:");
+    vec_foreach (sl_index, sr_policy->segments_lists)
+    {
+      s = NULL;
+      segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+      s = format (s, "\t[%u].- ", *sl_index);
+      s = format (s, "< ");
+      vec_foreach (label, segment_list->segments)
+      {
+	s = format (s, "%U, ", format_mpls_unicast_label, *label);
+      }
+      s = format (s, "\b\b > ");
+      vlib_cli_output (vm, "  %s", s);
+    }
+    vlib_cli_output (vm, "-----------");
+  }
+  vec_free (vec_policies);
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_mpls_policies_command, static) = {
+  .path = "show sr mpls policies",
+  .short_help = "show sr mpls policies",
+  .function = show_sr_mpls_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+/********************* SR MPLS Policy initialization ***********************/
+/**
+ * @brief SR MPLS Policy  initialization
+ */
+clib_error_t *
+sr_mpls_policy_rewrite_init (vlib_main_t * vm)
+{
+  mpls_sr_main_t *sm = &sr_mpls_main;
+
+  /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
+  sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_mpls_policy_rewrite_init);
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c
new file mode 100755
index 00000000..37707049
--- /dev/null
+++ b/src/vnet/srmpls/sr_mpls_steering.c
@@ -0,0 +1,453 @@
+/*
+ * sr_steering.c: ipv6 segment routing steering into SR policy
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Packet steering into SR-MPLS Policies
+ *
+ * This file is in charge of handling the FIB appropiatly to steer packets
+ * through SR Policies as defined in 'sr_mpls_policy.c'. Notice that here
+ * we are only doing steering. SR policy application is done in
+ * sr_policy_rewrite.c
+ *
+ * Supports:
+ *  - Steering of IPv6 traffic Destination Address based
+ *  - Steering of IPv4 traffic Destination Address based
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srmpls/sr.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/mpls_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief Steer traffic L3 traffic through a given SR-MPLS policy
+ *
+ * @param is_del
+ * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ * @param sr_policy is the index of the SR Policy (alt to bsid)
+ * @param table_id is the VRF where to install the FIB entry for the BSID
+ * @param prefix is the IPv4/v6 address for L3 traffic type
+ * @param mask_width is the mask for L3 traffic type
+ * @param traffic_type describes the type of traffic
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_steering_policy (int is_del, mpls_label_t bsid, u32 sr_policy_index,
+			 u32 table_id, ip46_address_t * prefix,
+			 u32 mask_width, u8 traffic_type)
+{
+  mpls_sr_main_t *sm = &sr_mpls_main;
+  sr_mpls_steering_key_t key;
+  mpls_sr_steering_policy_t *steer_pl;
+  fib_prefix_t pfx = { 0 };
+
+  mpls_sr_policy_t *sr_policy = 0;
+  uword *p = 0;
+
+  memset (&key, 0, sizeof (sr_mpls_steering_key_t));
+
+  /* Compute the steer policy key */
+  if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+    {
+      key.prefix.as_u64[0] = prefix->as_u64[0];
+      key.prefix.as_u64[1] = prefix->as_u64[1];
+      key.mask_width = mask_width;
+      key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+    }
+  else
+    return -1;
+
+  key.traffic_type = traffic_type;
+
+  /* Search for the item */
+  p = mhash_get (&sm->sr_steer_policies_hash, &key);
+
+  if (p)
+    {
+      /* Retrieve Steer Policy function */
+      steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
+
+      if (is_del)
+	{
+	  if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+	    {
+	      /* Remove FIB entry */
+	      pfx.fp_proto = FIB_PROTOCOL_IP6;
+	      pfx.fp_len = steer_pl->classify.mask_width;
+	      pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+
+	      fib_table_entry_delete (fib_table_find
+				      (FIB_PROTOCOL_MPLS,
+				       steer_pl->classify.fib_table), &pfx,
+				      FIB_SOURCE_SR);
+	    }
+	  else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+	    {
+	      /* Remove FIB entry */
+	      pfx.fp_proto = FIB_PROTOCOL_IP4;
+	      pfx.fp_len = steer_pl->classify.mask_width;
+	      pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+
+	      fib_table_entry_delete (fib_table_find
+				      (FIB_PROTOCOL_MPLS,
+				       steer_pl->classify.fib_table), &pfx,
+				      FIB_SOURCE_SR);
+	    }
+
+	  /* Delete SR steering policy entry */
+	  pool_put (sm->steer_policies, steer_pl);
+	  mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+
+	  return 1;
+	}
+      else			/* It means user requested to update an existing SR steering policy */
+	{
+	  /* Retrieve SR steering policy */
+	  if (bsid)		//TODO FIXME
+	    {
+	      p = hash_get (sm->sr_policies_index_hash, bsid);
+	      if (p)
+		sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+	      else
+		return -2;
+	    }
+	  else
+	    sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+	  if (!sr_policy)
+	    return -2;
+
+	  steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+	  /* Remove old FIB/hw redirection and create a new one */
+	  if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+	    {
+	      /* Remove FIB entry */
+	      pfx.fp_proto = FIB_PROTOCOL_IP6;
+	      pfx.fp_len = steer_pl->classify.mask_width;
+	      pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+
+	      fib_table_entry_delete (fib_table_find
+				      (FIB_PROTOCOL_IP6,
+				       steer_pl->classify.fib_table), &pfx,
+				      FIB_SOURCE_SR);
+
+	      /* Create a new one */
+	      goto update_fib;
+	    }
+	  else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+	    {
+	      /* Remove FIB entry */
+	      pfx.fp_proto = FIB_PROTOCOL_IP4;
+	      pfx.fp_len = steer_pl->classify.mask_width;
+	      pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+
+	      fib_table_entry_delete (fib_table_find
+				      (FIB_PROTOCOL_IP4,
+				       steer_pl->classify.fib_table), &pfx,
+				      FIB_SOURCE_SR);
+
+	      /* Create a new one */
+	      goto update_fib;
+	    }
+	}
+    }
+  else
+    /* delete; steering policy does not exist; complain */
+  if (is_del)
+    return -4;
+
+  /* Retrieve SR policy */
+  if (bsid)			//FIX
+    {
+      p = hash_get (sm->sr_policies_index_hash, bsid);
+      if (p)
+	sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+      else
+	return -2;
+    }
+  else
+    sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+  /* Create a new steering policy */
+  pool_get (sm->steer_policies, steer_pl);
+  memset (steer_pl, 0, sizeof (*steer_pl));
+
+  if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+    {
+      clib_memcpy (&steer_pl->classify.prefix, prefix,
+		   sizeof (ip46_address_t));
+      steer_pl->classify.mask_width = mask_width;
+      steer_pl->classify.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+      steer_pl->classify.traffic_type = traffic_type;
+    }
+  else
+    {
+      /* Incorrect API usage. Should never get here */
+      pool_put (sm->steer_policies, steer_pl);
+      mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+      return -1;
+    }
+  steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+  /* Create and store key */
+  mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies,
+	     NULL);
+
+update_fib:;
+
+  fib_route_path_t path = {
+    .frp_proto = FIB_PROTOCOL_MPLS,
+    .frp_local_label = sr_policy->bsid,
+    .frp_eos = MPLS_EOS,
+    .frp_sw_if_index = ~0,
+    .frp_fib_index = 0,
+    .frp_weight = 1,
+    .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+    .frp_label_stack = NULL
+  };
+
+  fib_route_path_t *paths = NULL;
+
+  /* FIB API calls - Recursive route through the BindingSID */
+  if (traffic_type == SR_STEER_IPV6)
+    {
+      pfx.fp_proto = FIB_PROTOCOL_IP6;
+      pfx.fp_len = steer_pl->classify.mask_width;
+      pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+      path.frp_fib_index = 0;
+
+      vec_add1 (paths, path);
+
+      fib_table_entry_path_add2 (fib_table_find
+				 (FIB_PROTOCOL_IP6,
+				  (table_id != (u32) ~ 0 ? table_id : 0)),
+				 &pfx, FIB_SOURCE_SR,
+				 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+      vec_free (paths);
+    }
+  else if (traffic_type == SR_STEER_IPV4)
+    {
+      pfx.fp_proto = FIB_PROTOCOL_IP4;
+      pfx.fp_len = steer_pl->classify.mask_width;
+      pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+      path.frp_fib_index = 0;
+
+      vec_add1 (paths, path);
+
+      fib_table_entry_path_add2 (fib_table_find
+				 (FIB_PROTOCOL_IP4,
+				  (table_id != (u32) ~ 0 ? table_id : 0)),
+				 &pfx, FIB_SOURCE_SR,
+				 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+      vec_free (paths);
+    }
+
+  return 0;
+}
+
+static clib_error_t *
+sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+				 vlib_cli_command_t * cmd)
+{
+  int is_del = 0;
+
+  ip46_address_t prefix;
+  u32 dst_mask_width = 0;
+  u8 traffic_type = 0;
+  u32 fib_table = (u32) ~ 0;
+
+  mpls_label_t bsid;
+  u32 sr_policy_index = (u32) ~ 0;
+
+  u8 sr_policy_set = 0;
+
+  memset (&prefix, 0, sizeof (ip46_address_t));
+
+  int rv;
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "del"))
+	is_del = 1;
+      else if (!traffic_type
+	       && unformat (input, "l3 %U/%d", unformat_ip6_address,
+			    &prefix.ip6, &dst_mask_width))
+	traffic_type = SR_STEER_IPV6;
+      else if (!traffic_type
+	       && unformat (input, "l3 %U/%d", unformat_ip4_address,
+			    &prefix.ip4, &dst_mask_width))
+	traffic_type = SR_STEER_IPV4;
+      else if (!sr_policy_set
+	       && unformat (input, "via sr policy index %d",
+			    &sr_policy_index))
+	sr_policy_set = 1;
+      else if (!sr_policy_set
+	       && unformat (input, "via sr policy bsid %U",
+			    unformat_mpls_unicast_label, &bsid))
+	sr_policy_set = 1;
+      else if (fib_table == (u32) ~ 0
+	       && unformat (input, "fib-table %d", &fib_table));
+      else
+	break;
+    }
+
+  if (!traffic_type)
+    return clib_error_return (0, "No L3 traffic specified");
+  if (!sr_policy_set)
+    return clib_error_return (0, "No SR policy specified");
+
+  /* Make sure that the prefixes are clean */
+  if (traffic_type == SR_STEER_IPV4)
+    {
+      u32 mask =
+	(dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0);
+      prefix.ip4.as_u32 &= mask;
+    }
+  else if (traffic_type == SR_STEER_IPV6)
+    {
+      ip6_address_t mask;
+      ip6_address_mask_from_width (&mask, dst_mask_width);
+      ip6_address_mask (&prefix.ip6, &mask);
+    }
+
+  rv =
+    sr_mpls_steering_policy (is_del, bsid,
+			     sr_policy_index, fib_table, &prefix,
+			     dst_mask_width, traffic_type);
+
+  switch (rv)
+    {
+    case 0:
+      break;
+    case 1:
+      return 0;
+    case -1:
+      return clib_error_return (0, "Incorrect API usage.");
+    case -2:
+      return clib_error_return (0,
+				"The requested SR policy could not be located. Review the BSID/index.");
+    case -3:
+      return clib_error_return (0,
+				"Unable to do SW redirect. Incorrect interface.");
+    case -4:
+      return clib_error_return (0,
+				"The requested SR steering policy could not be deleted.");
+    case -5:
+      return clib_error_return (0,
+				"The SR policy is not an encapsulation one.");
+    default:
+      return clib_error_return (0, "BUG: sr steer policy returns %d", rv);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_mpls_steer_policy_command, static) = {
+  .path = "sr mpls steer",
+  .short_help = "sr mpls steer (del) l3 <ip_addr/mask>"
+    "via sr policy bsid <mpls_label> (fib-table <fib_table_index>)",
+  .long_help =
+    "\tSteer L3 traffic through an existing SR policy.\n"
+    "\tExamples:\n"
+    "\t\tsr steer l3 2001::/64 via sr_policy index 5\n"
+    "\t\tsr steer l3 2001::/64 via sr_policy bsid 29999\n"
+    "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n",
+  .function = sr_mpls_steer_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
+					   unformat_input_t * input,
+					   vlib_cli_command_t * cmd)
+{
+  mpls_sr_main_t *sm = &sr_mpls_main;
+  mpls_sr_steering_policy_t **steer_policies = 0;
+  mpls_sr_steering_policy_t *steer_pl;
+
+  mpls_sr_policy_t *pl = 0;
+  int i;
+
+  vlib_cli_output (vm, "SR MPLS steering policies:");
+  /* *INDENT-OFF* */
+  pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);}));
+  /* *INDENT-ON* */
+  vlib_cli_output (vm, "Traffic\t\tSR policy BSID");
+  for (i = 0; i < vec_len (steer_policies); i++)
+    {
+      steer_pl = steer_policies[i];
+      pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy);
+      if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+	{
+	  vlib_cli_output (vm, "L3 %U/%d\t%U",
+			   format_ip4_address,
+			   &steer_pl->classify.prefix.ip4,
+			   steer_pl->classify.mask_width,
+			   format_mpls_unicast_label, pl->bsid);
+	}
+      else if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+	{
+	  vlib_cli_output (vm, "L3 %U/%d\t%U",
+			   format_ip6_address,
+			   &steer_pl->classify.prefix.ip6,
+			   steer_pl->classify.mask_width,
+			   format_mpls_unicast_label, pl->bsid);
+	}
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_mpls_steering_policies_command, static) = {
+  .path = "show sr mpls steering policies",
+  .short_help = "show sr mpls steering policies",
+  .function = show_sr_mpls_steering_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+sr_mpls_steering_init (vlib_main_t * vm)
+{
+  mpls_sr_main_t *sm = &sr_mpls_main;
+
+  /* Init memory for function keys */
+  mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
+	      sizeof (sr_mpls_steering_key_t));
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_INIT_FUNCTION (sr_mpls_steering_init);
+/* *INDENT-ON* */
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/dir.dox b/src/vnet/srv6/dir.dox
new file mode 100755
index 00000000..3f539a58
--- /dev/null
+++ b/src/vnet/srv6/dir.dox
@@ -0,0 +1,25 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Segment Routing code
+
+ An implementation of Segment Routing as per:
+ draft-ietf-6man-segment-routing-header-05
+
+ @see ietf_draft_05.txt 
+ 
+*/
\ No newline at end of file
diff --git a/src/vnet/srv6/ietf_draft_05.txt b/src/vnet/srv6/ietf_draft_05.txt
new file mode 100755
index 00000000..e9bff04f
--- /dev/null
+++ b/src/vnet/srv6/ietf_draft_05.txt
@@ -0,0 +1,1564 @@
+Network Working Group                                    S. Previdi, Ed.
+Internet-Draft                                               C. Filsfils
+Intended status: Standards Track                     Cisco Systems, Inc.
+Expires: August 5, 2017                                         B. Field
+                                                                 Comcast
+                                                                I. Leung
+                                                   Rogers Communications
+                                                              J. Linkova
+                                                                  Google
+                                                                E. Aries
+                                                                Facebook
+                                                               T. Kosugi
+                                                                     NTT
+                                                               E. Vyncke
+                                                     Cisco Systems, Inc.
+                                                               D. Lebrun
+                                        Universite Catholique de Louvain
+                                                        February 1, 2017
+
+
+                   IPv6 Segment Routing Header (SRH)
+               draft-ietf-6man-segment-routing-header-05
+
+Abstract
+
+   Segment Routing (SR) allows a node to steer a packet through a
+   controlled set of instructions, called segments, by prepending an SR
+   header to the packet.  A segment can represent any instruction,
+   topological or service-based.  SR allows to enforce a flow through
+   any path (topological, or application/service based) while
+   maintaining per-flow state only at the ingress node to the SR domain.
+
+   Segment Routing can be applied to the IPv6 data plane with the
+   addition of a new type of Routing Extension Header.  This draft
+   describes the Segment Routing Extension Header Type and how it is
+   used by SR capable nodes.
+
+Requirements Language
+
+   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+   document are to be interpreted as described in RFC 2119 [RFC2119].
+
+Status of This Memo
+
+   This Internet-Draft is submitted in full conformance with the
+   provisions of BCP 78 and BCP 79.
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                 [Page 1]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   Internet-Drafts are working documents of the Internet Engineering
+   Task Force (IETF).  Note that other groups may also distribute
+   working documents as Internet-Drafts.  The list of current Internet-
+   Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+   Internet-Drafts are draft documents valid for a maximum of six months
+   and may be updated, replaced, or obsoleted by other documents at any
+   time.  It is inappropriate to use Internet-Drafts as reference
+   material or to cite them other than as "work in progress."
+
+   This Internet-Draft will expire on August 5, 2017.
+
+Copyright Notice
+
+   Copyright (c) 2017 IETF Trust and the persons identified as the
+   document authors.  All rights reserved.
+
+   This document is subject to BCP 78 and the IETF Trust's Legal
+   Provisions Relating to IETF Documents
+   (http://trustee.ietf.org/license-info) in effect on the date of
+   publication of this document.  Please review these documents
+   carefully, as they describe your rights and restrictions with respect
+   to this document.  Code Components extracted from this document must
+   include Simplified BSD License text as described in Section 4.e of
+   the Trust Legal Provisions and are provided without warranty as
+   described in the Simplified BSD License.
+
+Table of Contents
+
+   1.  Segment Routing Documents . . . . . . . . . . . . . . . . . .   3
+   2.  Introduction  . . . . . . . . . . . . . . . . . . . . . . . .   3
+     2.1.  Data Planes supporting Segment Routing  . . . . . . . . .   4
+     2.2.  Segment Routing (SR) Domain . . . . . . . . . . . . . . .   4
+       2.2.1.  SR Domain in a Service Provider Network . . . . . . .   5
+       2.2.2.  SR Domain in a Overlay Network  . . . . . . . . . . .   6
+   3.  Segment Routing Extension Header (SRH)  . . . . . . . . . . .   7
+     3.1.  SRH TLVs  . . . . . . . . . . . . . . . . . . . . . . . .   9
+       3.1.1.  Ingress Node TLV  . . . . . . . . . . . . . . . . . .  10
+       3.1.2.  Egress Node TLV . . . . . . . . . . . . . . . . . . .  11
+       3.1.3.  Opaque Container TLV  . . . . . . . . . . . . . . . .  11
+       3.1.4.  Padding TLV . . . . . . . . . . . . . . . . . . . . .  12
+       3.1.5.  HMAC TLV  . . . . . . . . . . . . . . . . . . . . . .  13
+     3.2.  SRH and RFC2460 behavior  . . . . . . . . . . . . . . . .  14
+   4.  SRH Procedures  . . . . . . . . . . . . . . . . . . . . . . .  14
+     4.1.  Source SR Node  . . . . . . . . . . . . . . . . . . . . .  14
+     4.2.  Transit Node  . . . . . . . . . . . . . . . . . . . . . .  15
+     4.3.  SR Segment Endpoint Node  . . . . . . . . . . . . . . . .  16
+   5.  Security Considerations . . . . . . . . . . . . . . . . . . .  16
+
+
+
+Previdi, et al.          Expires August 5, 2017                 [Page 2]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+     5.1.  Threat model  . . . . . . . . . . . . . . . . . . . . . .  17
+       5.1.1.  Source routing threats  . . . . . . . . . . . . . . .  17
+       5.1.2.  Applicability of RFC 5095 to SRH  . . . . . . . . . .  17
+       5.1.3.  Service stealing threat . . . . . . . . . . . . . . .  18
+       5.1.4.  Topology disclosure . . . . . . . . . . . . . . . . .  18
+       5.1.5.  ICMP Generation . . . . . . . . . . . . . . . . . . .  18
+     5.2.  Security fields in SRH  . . . . . . . . . . . . . . . . .  19
+       5.2.1.  Selecting a hash algorithm  . . . . . . . . . . . . .  20
+       5.2.2.  Performance impact of HMAC  . . . . . . . . . . . . .  21
+       5.2.3.  Pre-shared key management . . . . . . . . . . . . . .  21
+     5.3.  Deployment Models . . . . . . . . . . . . . . . . . . . .  22
+       5.3.1.  Nodes within the SR domain  . . . . . . . . . . . . .  22
+       5.3.2.  Nodes outside of the SR domain  . . . . . . . . . . .  22
+       5.3.3.  SR path exposure  . . . . . . . . . . . . . . . . . .  23
+       5.3.4.  Impact of BCP-38  . . . . . . . . . . . . . . . . . .  23
+   6.  IANA Considerations . . . . . . . . . . . . . . . . . . . . .  24
+   7.  Manageability Considerations  . . . . . . . . . . . . . . . .  24
+   8.  Contributors  . . . . . . . . . . . . . . . . . . . . . . . .  24
+   9.  Acknowledgements  . . . . . . . . . . . . . . . . . . . . . .  24
+   10. References  . . . . . . . . . . . . . . . . . . . . . . . . .  25
+     10.1.  Normative References . . . . . . . . . . . . . . . . . .  25
+     10.2.  Informative References . . . . . . . . . . . . . . . . .  25
+   Authors' Addresses  . . . . . . . . . . . . . . . . . . . . . . .  27
+
+1.  Segment Routing Documents
+
+   Segment Routing terminology is defined in
+   [I-D.ietf-spring-segment-routing].
+
+   Segment Routing use cases are described in [RFC7855] and
+   [I-D.ietf-spring-ipv6-use-cases].
+
+   Segment Routing protocol extensions are defined in
+   [I-D.ietf-isis-segment-routing-extensions], and
+   [I-D.ietf-ospf-ospfv3-segment-routing-extensions].
+
+2.  Introduction
+
+   Segment Routing (SR), defined in [I-D.ietf-spring-segment-routing],
+   allows a node to steer a packet through a controlled set of
+   instructions, called segments, by prepending an SR header to the
+   packet.  A segment can represent any instruction, topological or
+   service-based.  SR allows to enforce a flow through any path
+   (topological or service/application based) while maintaining per-flow
+   state only at the ingress node to the SR domain.  Segments can be
+   derived from different components: IGP, BGP, Services, Contexts,
+   Locators, etc.  The list of segment forming the path is called the
+   Segment List and is encoded in the packet header.
+
+
+
+Previdi, et al.          Expires August 5, 2017                 [Page 3]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   SR allows the use of strict and loose source based routing paradigms
+   without requiring any additional signaling protocols in the
+   infrastructure hence delivering an excellent scalability property.
+
+   The source based routing model described in
+   [I-D.ietf-spring-segment-routing] is inherited from the ones proposed
+   by [RFC1940] and [RFC2460].  The source based routing model offers
+   the support for explicit routing capability.
+
+2.1.  Data Planes supporting Segment Routing
+
+   Segment Routing (SR), can be instantiated over MPLS
+   ([I-D.ietf-spring-segment-routing-mpls]) and IPv6.  This document
+   defines its instantiation over the IPv6 data-plane based on the use-
+   cases defined in [I-D.ietf-spring-ipv6-use-cases].
+
+   This document defines a new type of Routing Header (originally
+   defined in [RFC2460]) called the Segment Routing Header (SRH) in
+   order to convey the Segment List in the packet header as defined in
+   [I-D.ietf-spring-segment-routing].  Mechanisms through which segment
+   are known and advertised are outside the scope of this document.
+
+   A segment is materialized by an IPv6 address.  A segment identifies a
+   topological instruction or a service instruction.  A segment can be
+   either:
+
+   o  global: a global segment represents an instruction supported by
+      all nodes in the SR domain and it is instantiated through an IPv6
+      address globally known in the SR domain.
+
+   o  local: a local segment represents an instruction supported only by
+      the node who originates it and it is instantiated through an IPv6
+      address that is known only by the local node.
+
+2.2.  Segment Routing (SR) Domain
+
+   We define the concept of the Segment Routing Domain (SR Domain) as
+   the set of nodes participating into the source based routing model.
+   These nodes may be connected to the same physical infrastructure
+   (e.g.: a Service Provider's network) as well as nodes remotely
+   connected to each other (e.g.: an enterprise VPN or an overlay).
+
+   A non-exhaustive list of examples of SR Domains is:
+
+   o  The network of an operator, service provider, content provider,
+      enterprise including nodes, links and Autonomous Systems.
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                 [Page 4]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   o  A set of nodes connected as an overlay over one or more transit
+      providers.  The overlay nodes exchange SR-enabled traffic with
+      segments belonging solely to the overlay routers (the SR domain).
+      None of the segments in the SR-enabled packets exchanged by the
+      overlay belong to the transit networks
+
+   The source based routing model through its instantiation of the
+   Segment Routing Header (SRH) defined in this document equally applies
+   to all the above examples.
+
+   It is assumed in this document that the SRH is added to the packet by
+   its source, consistently with the source routing model defined in
+   [RFC2460].  For example:
+
+   o  At the node originating the packet (host, server).
+
+   o  At the ingress node of an SR domain where the ingress node
+      receives an IPv6 packet and encapsulates it into an outer IPv6
+      header followed by a Segment Routing header.
+
+2.2.1.  SR Domain in a Service Provider Network
+
+   The following figure illustrates an SR domain consisting of an
+   operator's network infrastructure.
+
+     (-------------------------- Operator 1 -----------------------)
+     (                                                             )
+     (  (-----AS 1-----)  (-------AS 2-------)  (----AS 3-------)  )
+     (  (              )  (                  )  (               )  )
+ A1--(--(--11---13--14-)--(-21---22---23--24-)--(-31---32---34--)--)--Z1
+     (  ( /|\  /|\  /| )  ( |\  /|\  /|\  /| )  ( |\  /|\  /| \ )  )
+ A2--(--(/ | \/ | \/ | )  ( | \/ | \/ | \/ | )  ( | \/ | \/ |  \)--)--Z2
+     (  (  | /\ | /\ | )  ( | /\ | /\ | /\ | )  ( | /\ | /\ |   )  )
+     (  (  |/  \|/  \| )  ( |/  \|/  \|/  \| )  ( |/  \|/  \|   )  )
+ A3--(--(--15---17--18-)--(-25---26---27--28-)--(-35---36---38--)--)--Z3
+     (  (              )  (                  )  (               )  )
+     (  (--------------)  (------------------)  (---------------)  )
+     (                                                             )
+     (-------------------------------------------------------------)
+
+                   Figure 1: Service Provider SR Domain
+
+   Figure 1 describes an operator network including several ASes and
+   delivering connectivity between endpoints.  In this scenario, Segment
+   Routing is used within the operator networks and across the ASes
+   boundaries (all being under the control of the same operator).  In
+   this case segment routing can be used in order to address use cases
+   such as end-to-end traffic engineering, fast re-route, egress peer
+
+
+
+Previdi, et al.          Expires August 5, 2017                 [Page 5]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   engineering, data-center traffic engineering as described in
+   [RFC7855], [I-D.ietf-spring-ipv6-use-cases] and
+   [I-D.ietf-spring-resiliency-use-cases].
+
+   Typically, an IPv6 packet received at ingress (i.e.: from outside the
+   SR domain), is classified according to network operator policies and
+   such classification results into an outer header with an SRH applied
+   to the incoming packet.  The SRH contains the list of segment
+   representing the path the packet must take inside the SR domain.
+   Thus, the SA of the packet is the ingress node, the DA (due to SRH
+   procedures described in Section 4) is set as the first segment of the
+   path and the last segment of the path is the egress node of the SR
+   domain.
+
+   The path may include intra-AS as well as inter-AS segments.  It has
+   to be noted that all nodes within the SR domain are under control of
+   the same administration.  When the packet reaches the egress point of
+   the SR domain, the outer header and its SRH are removed so that the
+   destination of the packet is unaware of the SR domain the packet has
+   traversed.
+
+   The outer header with the SRH is no different from any other
+   tunneling encapsulation mechanism and allows a network operator to
+   implement traffic engineering mechanisms so to efficiently steer
+   traffic across his infrastructure.
+
+2.2.2.  SR Domain in a Overlay Network
+
+   The following figure illustrates an SR domain consisting of an
+   overlay network over multiple operator's networks.
+
+       (--Operator 1---)  (-----Operator 2-----)  (--Operator 3---)
+       (               )  (                    )  (               )
+   A1--(--11---13--14--)--(--21---22---23--24--)--(-31---32---34--)--C1
+       ( /|\  /|\  /|  )  (  |\  /|\  /|\  /|  )  ( |\  /|\  /| \ )
+   A2--(/ | \/ | \/ |  )  (  | \/ | \/ | \/ |  )  ( | \/ | \/ |  \)--C2
+       (  | /\ | /\ |  )  (  | /\ | /\ | /\ |  )  ( | /\ | /\ |   )
+       (  |/  \|/  \|  )  (  |/  \|/  \|/  \|  )  ( |/  \|/  \|   )
+   A3--(--15---17--18--)--(--25---26---27--28--)--(-35---36---38--)--C3
+       (               )  (  |    |         |  )  (               )
+       (---------------)  (--|----|---------|--)  (---------------)
+                             |    |         |
+                             B1   B2        B3
+
+                        Figure 2: Overlay SR Domain
+
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                 [Page 6]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   Figure 2 describes an overlay consisting of nodes connected to three
+   different network operators and forming a single overlay network
+   where Segment routing packets are exchanged.
+
+   The overlay consists of nodes A1, A2, A3, B1, B2, B3, C1, C2 and C3.
+   These nodes are connected to their respective network operator and
+   form an overlay network.
+
+   Each node may originate packets with an SRH which contains, in the
+   segment list of the SRH or in the DA, segments identifying other
+   overlay nodes.  This implies that packets with an SRH may traverse
+   operator's networks but, obviously, these SRHs cannot contain an
+   address/segment of the transit operators 1, 2 and 3.  The SRH
+   originated by the overlay can only contain address/segment under the
+   administration of the overlay (e.g. address/segments supported by A1,
+   A2, A3, B1, B2, B3, C1,C2 or C3).
+
+   In this model, the operator network nodes are transit nodes and,
+   according to [RFC2460], MUST NOT inspect the routing extension header
+   since they are not the DA of the packet.
+
+   It is a common practice in operators networks to filter out, at
+   ingress, any packet whose DA is the address of an internal node and
+   it is also possible that an operator would filter out any packet
+   destined to an internal address and having an extension header in it.
+
+   This common practice does not impact the SR-enabled traffic between
+   the overlay nodes as the intermediate transit networks never see a
+   destination address belonging to their infrastructure.  These SR-
+   enabled overlay packets will thus never be filtered by the transit
+   operators.
+
+   In all cases, transit packets (i.e.: packets whose DA is outside the
+   domain of the operator's network) will be forwarded accordingly
+   without introducing any security concern in the operator's network.
+   This is similar to tunneled packets.
+
+3.  Segment Routing Extension Header (SRH)
+
+   A new type of the Routing Header (originally defined in [RFC2460]) is
+   defined: the Segment Routing Header (SRH) which has a new Routing
+   Type, (suggested value 4) to be assigned by IANA.
+
+   The Segment Routing Header (SRH) is defined as follows:
+
+
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                 [Page 7]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+     0                   1                   2                   3
+     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    | Next Header   |  Hdr Ext Len  | Routing Type  | Segments Left |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    | First Segment |     Flags     |           RESERVED            |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                                                               |
+    |            Segment List[0] (128 bits IPv6 address)            |
+    |                                                               |
+    |                                                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                                                               |
+    |                                                               |
+                                  ...
+    |                                                               |
+    |                                                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                                                               |
+    |            Segment List[n] (128 bits IPv6 address)            |
+    |                                                               |
+    |                                                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    //                                                             //
+    //         Optional Type Length Value objects (variable)       //
+    //                                                             //
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   where:
+
+   o  Next Header: 8-bit selector.  Identifies the type of header
+      immediately following the SRH.
+
+   o  Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+      header in 8-octet units, not including the first 8 octets.
+
+   o  Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+
+   o  Segments Left.  Defined in [RFC2460], it contains the index, in
+      the Segment List, of the next segment to inspect.  Segments Left
+      is decremented at each segment.
+
+   o  First Segment: contains the index, in the Segment List, of the
+      first segment of the path which is in fact the last element of the
+      Segment List.
+
+   o  Flags: 8 bits of flags.  Following flags are defined:
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                 [Page 8]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+          0 1 2 3 4 5 6 7
+         +-+-+-+-+-+-+-+-+
+         |U|P|O|A|H|  U  |
+         +-+-+-+-+-+-+-+-+
+
+         U: Unused and for future use.  SHOULD be unset on transmission
+         and MUST be ignored on receipt.
+
+         P-flag: Protected flag.  Set when the packet has been rerouted
+         through FRR mechanism by an SR endpoint node.
+
+         O-flag: OAM flag.  When set, it indicates that this packet is
+         an operations and management (OAM) packet.
+
+         A-flag: Alert flag.  If present, it means important Type Length
+         Value (TLV) objects are present.  See Section 3.1 for details
+         on TLVs objects.
+
+         H-flag: HMAC flag.  If set, the HMAC TLV is present and is
+         encoded as the last TLV of the SRH.  In other words, the last
+         36 octets of the SRH represent the HMAC information.  See
+         Section 3.1.5 for details on the HMAC TLV.
+
+   o  RESERVED: SHOULD be unset on transmission and MUST be ignored on
+      receipt.
+
+   o  Segment List[n]: 128 bit IPv6 addresses representing the nth
+      segment in the Segment List.  The Segment List is encoded starting
+      from the last segment of the path.  I.e., the first element of the
+      segment list (Segment List [0]) contains the last segment of the
+      path while the last segment of the Segment List (Segment List[n])
+      contains the first segment of the path.  The index contained in
+      "Segments Left" identifies the current active segment.
+
+   o  Type Length Value (TLV) are described in Section 3.1.
+
+3.1.  SRH TLVs
+
+   This section defines TLVs of the Segment Routing Header.
+
+   Type Length Value (TLV) contain optional information that may be used
+   by the node identified in the DA of the packet.  It has to be noted
+   that the information carried in the TLVs is not intended to be used
+   by the routing layer.  Typically, TLVs carry information that is
+   consumed by other components (e.g.: OAM) than the routing function.
+
+   Each TLV has its own length, format and semantic.  The code-point
+   allocated (by IANA) to each TLV defines both the format and the
+
+
+
+Previdi, et al.          Expires August 5, 2017                 [Page 9]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   semantic of the information carried in the TLV.  Multiple TLVs may be
+   encoded in the same SRH.
+
+   The "Length" field of the TLV is primarily used to skip the TLV while
+   inspecting the SRH in case the node doesn't support or recognize the
+   TLV codepoint.  The "Length" defines the TLV length in octets and not
+   including the "Type" and "Length" fields.
+
+   The primary scope of TLVs is to give the receiver of the packet
+   information related to the source routed path (e.g.: where the packet
+   entered in the SR domain and where it is expected to exit).
+
+   Additional TLVs may be defined in the future.
+
+3.1.1.  Ingress Node TLV
+
+   The Ingress Node TLV is optional and identifies the node this packet
+   traversed when entered the SR domain.  The Ingress Node TLV has
+   following format:
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |      Type     |    Length     |   RESERVED    |     Flags     |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   |                 Ingress Node (16 octets)                      |
+   |                                                               |
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   where:
+
+   o  Type: to be assigned by IANA (suggested value 1).
+
+   o  Length: 18.
+
+   o  RESERVED: 8 bits.  SHOULD be unset on transmission and MUST be
+      ignored on receipt.
+
+   o  Flags: 8 bits.  No flags are defined in this document.
+
+   o  Ingress Node: 128 bits.  Defines the node where the packet is
+      expected to enter the SR domain.  In the encapsulation case
+      described in Section 2.2.1, this information corresponds to the SA
+      of the encapsulating header.
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 10]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+3.1.2.  Egress Node TLV
+
+   The Egress Node TLV is optional and identifies the node this packet
+   is expected to traverse when exiting the SR domain.  The Egress Node
+   TLV has following format:
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |      Type     |    Length     |   RESERVED    |     Flags     |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   |                  Egress Node (16 octets)                      |
+   |                                                               |
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   where:
+
+   o  Type: to be assigned by IANA (suggested value 2).
+
+   o  Length: 18.
+
+   o  RESERVED: 8 bits.  SHOULD be unset on transmission and MUST be
+      ignored on receipt.
+
+   o  Flags: 8 bits.  No flags are defined in this document.
+
+   o  Egress Node: 128 bits.  Defines the node where the packet is
+      expected to exit the SR domain.  In the encapsulation case
+      described in Section 2.2.1, this information corresponds to the
+      last segment of the SRH in the encapsulating header.
+
+3.1.3.  Opaque Container TLV
+
+   The Opaque Container TLV is optional and has the following format:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 11]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |      Type     |    Length     |   RESERVED    |     Flags     |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   |             Opaque Container (16 octets)                      |
+   |                                                               |
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   where:
+
+   o  Type: to be assigned by IANA (suggested value 3).
+
+   o  Length: 18.
+
+   o  RESERVED: 8 bits.  SHOULD be unset on transmission and MUST be
+      ignored on receipt.
+
+   o  Flags: 8 bits.  No flags are defined in this document.
+
+   o  Opaque Container: 128 bits of opaque data not relevant for the
+      routing layer.  Typically, this information is consumed by a non-
+      routing component of the node receiving the packet (i.e.: the node
+      in the DA).
+
+3.1.4.  Padding TLV
+
+   The Padding TLV is optional and with the purpose of aligning the SRH
+   on a 8 octet boundary.  The Padding TLV has the following format:
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Type      |    Length     |      Padding (variable)       |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   //                    Padding (variable)                       //
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   where:
+
+   o  Type: to be assigned by IANA (suggested value 4).
+
+   o  Length: 1 to 7
+
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 12]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   o  Padding: from 1 to 7 octets of padding.  Padding bits have no
+      semantic.  They SHOULD be set to 0 on transmission and MUST be
+      ignored on receipt.
+
+   The following applies to the Padding TLV:
+
+   o  Padding TLV is optional and MAY only appear once in the SRH.  If
+      present, it MUST have a length between 1 and 7 octets.
+
+   o  The Padding TLV is used in order to align the SRH total length on
+      the 8 octet boundary.
+
+   o  When present, the Padding TLV MUST appear as the last TLV before
+      the HMAC TLV (if HMAC TLV is present).
+
+   o  When present, the Padding TLV MUST have a length from 1 to 7 in
+      order to align the SRH total lenght on a 8-octet boundary.
+
+   o  When a router inspecting the SRH encounters the Padding TLV, it
+      MUST assume that no other TLV (other than the HMAC) follow the
+      Padding TLV.
+
+3.1.5.  HMAC TLV
+
+   HMAC TLV is optional and contains the HMAC information.  The HMAC TLV
+   has the following format:
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |      Type     |     Length    |          RESERVED             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                      HMAC Key ID (4 octets)                   |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                              //
+   |                      HMAC (32 octets)                        //
+   |                                                              //
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   where:
+
+   o  Type: to be assigned by IANA (suggested value 5).
+
+   o  Length: 38.
+
+   o  RESERVED: 2 octets.  SHOULD be unset on transmission and MUST be
+      ignored on receipt.
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 13]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   o  HMAC Key ID: 4 octets.
+
+   o  HMAC: 32 octets.
+
+   o  HMAC and HMAC Key ID usage is described in Section 5
+
+   The Following applies to the HMAC TLV:
+
+   o  When present, the HMAC TLV MUST be encoded as the last TLV of the
+      SRH.
+
+   o  If the HMAC TLV is present, the SRH H-Flag (Figure 4) MUST be set.
+
+   o  When the H-flag is set in the SRH, the router inspecting the SRH
+      MUST find the HMAC TLV in the last 38 octets of the SRH.
+
+3.2.  SRH and RFC2460 behavior
+
+   The SRH being a new type of the Routing Header, it also has the same
+   properties:
+
+      SHOULD only appear once in the packet.
+
+      Only the router whose address is in the DA field of the packet
+      header MUST inspect the SRH.
+
+   Therefore, Segment Routing in IPv6 networks implies that the segment
+   identifier (i.e.: the IPv6 address of the segment) is moved into the
+   DA of the packet.
+
+   The DA of the packet changes at each segment termination/completion
+   and therefore the final DA of the packet MUST be encoded as the last
+   segment of the path.
+
+4.  SRH Procedures
+
+   In this section we describe the different procedures on the SRH.
+
+4.1.  Source SR Node
+
+   A Source SR Node can be any node originating an IPv6 packet with its
+   IPv6 and Segment Routing Headers.  This include either:
+
+      A host originating an IPv6 packet.
+
+      An SR domain ingress router encapsulating a received IPv6 packet
+      into an outer IPv6 header followed by an SRH.
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 14]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   The mechanism through which a Segment List is derived is outside of
+   the scope of this document.  As an example, the Segment List may be
+   obtained through:
+
+      Local path computation.
+
+      Local configuration.
+
+      Interaction with a centralized controller delivering the path.
+
+      Any other mechanism.
+
+   The following are the steps of the creation of the SRH:
+
+      Next Header and Hdr Ext Len fields are set according to [RFC2460].
+
+      Routing Type field is set as TBD (to be allocated by IANA,
+      suggested value 4).
+
+      The Segment List is built with the FIRST segment of the path
+      encoded in the LAST element of the Segment List.  Subsequent
+      segments are encoded on top of the first segment.  Finally, the
+      LAST segment of the path is encoded in the FIRST element of the
+      Segment List.  In other words, the Segment List is encoded in the
+      reverse order of the path.
+
+      The final DA of the packet is encoded as the last segment of the
+      path (encoded in the first element of the Segment List).
+
+      The DA of the packet is set with the value of the first segment
+      (found in the last element of the segment list).
+
+      The Segments Left field is set to n-1 where n is the number of
+      elements in the Segment List.
+
+      The First Segment field is set to n-1 where n is the number of
+      elements in the Segment List.
+
+      The packet is sent out towards the first segment (i.e.:
+      represented in the packet DA).
+
+      HMAC TLV may be set according to Section 5.
+
+4.2.  Transit Node
+
+   According to [RFC2460], the only node who is allowed to inspect the
+   Routing Extension Header (and therefore the SRH), is the node
+   corresponding to the DA of the packet.  Any other transit node MUST
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 15]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   NOT inspect the underneath routing header and MUST forward the packet
+   towards the DA and according to the IPv6 routing table.
+
+   In the example case described in Section 2.2.2, when SR capable nodes
+   are connected through an overlay spanning multiple third-party
+   infrastructure, it is safe to send SRH packets (i.e.: packet having a
+   Segment Routing Header) between each other overlay/SR-capable nodes
+   as long as the segment list does not include any of the transit
+   provider nodes.  In addition, as a generic security measure, any
+   service provider will block any packet destined to one of its
+   internal routers, especially if these packets have an extended header
+   in it.
+
+4.3.  SR Segment Endpoint Node
+
+   The SR segment endpoint node is the node whose address is in the DA.
+   The segment endpoint node inspects the SRH and does:
+
+   1.   IF DA = myself (segment endpoint)
+   2.      IF Segments Left > 0 THEN
+              decrement Segments Left
+              update DA with Segment List[Segments Left]
+   3.      ELSE continue IPv6 processing of the packet
+                End of processing.
+   4.   Forward the packet out
+
+5.  Security Considerations
+
+   This section analyzes the security threat model, the security issues
+   and proposed solutions related to the new Segment Routing Header.
+
+   The Segment Routing Header (SRH) is simply another type of the
+   routing header as described in RFC 2460 [RFC2460] and is:
+
+   o  Added by an SR edge router when entering the segment routing
+      domain or by the originating host itself.  The source host can
+      even be outside the SR domain;
+
+   o  inspected and acted upon when reaching the destination address of
+      the IP header per RFC 2460 [RFC2460].
+
+   Per RFC2460 [RFC2460], routers on the path that simply forward an
+   IPv6 packet (i.e. the IPv6 destination address is none of theirs)
+   will never inspect and process the content of the SRH.  Routers whose
+   one interface IPv6 address equals the destination address field of
+   the IPv6 packet MUST parse the SRH and, if supported and if the local
+   configuration allows it, MUST act accordingly to the SRH content.
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 16]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   According to RFC2460 [RFC2460], the default behavior of a non SR-
+   capable router upon receipt of an IPv6 packet with SRH destined to an
+   address of its, is to:
+
+   o  ignore the SRH completely if the Segment Left field is 0 and
+      proceed to process the next header in the IPv6 packet;
+
+   o  discard the IPv6 packet if Segment Left field is greater than 0,
+      it MAY send a Parameter Problem ICMP message back to the Source
+      Address.
+
+5.1.  Threat model
+
+5.1.1.  Source routing threats
+
+   Using an SRH is similar to source routing, therefore it has some
+   well-known security issues as described in RFC4942 [RFC4942] section
+   2.1.1 and RFC5095 [RFC5095]:
+
+   o  amplification attacks: where a packet could be forged in such a
+      way to cause looping among a set of SR-enabled routers causing
+      unnecessary traffic, hence a Denial of Service (DoS) against
+      bandwidth;
+
+   o  reflection attack: where a hacker could force an intermediate node
+      to appear as the immediate attacker, hence hiding the real
+      attacker from naive forensic;
+
+   o  bypass attack: where an intermediate node could be used as a
+      stepping stone (for example in a De-Militarized Zone) to attack
+      another host (for example in the datacenter or any back-end
+      server).
+
+5.1.2.  Applicability of RFC 5095 to SRH
+
+   First of all, the reader must remember this specific part of section
+   1 of RFC5095 [RFC5095], "A side effect is that this also eliminates
+   benign RH0 use-cases; however, such applications may be facilitated
+   by future Routing Header specifications.".  In short, it is not
+   forbidden to create new secure type of Routing Header; for example,
+   RFC 6554 (RPL) [RFC6554] also creates a new Routing Header type for a
+   specific application confined in a single network.
+
+   In the segment routing architecture described in
+   [I-D.ietf-spring-segment-routing] there are basically two kinds of
+   nodes (routers and hosts):
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 17]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   o  nodes within the SR domain, which is within one single
+      administrative domain, i.e., where all nodes are trusted anyway
+      else the damage caused by those nodes could be worse than
+      amplification attacks: traffic interception, man-in-the-middle
+      attacks, more server DoS by dropping packets, and so on.
+
+   o  nodes outside of the SR domain, which is outside of the
+      administrative segment routing domain hence they cannot be trusted
+      because there is no physical security for those nodes, i.e., they
+      can be replaced by hostile nodes or can be coerced in wrong
+      behaviors.
+
+   The main use case for SR consists of the single administrative domain
+   where only trusted nodes with SR enabled and configured participate
+   in SR: this is the same model as in RFC6554 [RFC6554].  All non-
+   trusted nodes do not participate as either SR processing is not
+   enabled by default or because they only process SRH from nodes within
+   their domain.
+
+   Moreover, all SR nodes ignore SRH created by outsiders based on
+   topology information (received on a peering or internal interface) or
+   on presence and validity of the HMAC field.  Therefore, if
+   intermediate nodes ONLY act on valid and authorized SRH (such as
+   within a single administrative domain), then there is no security
+   threat similar to RH-0.  Hence, the RFC 5095 [RFC5095] attacks are
+   not applicable.
+
+5.1.3.  Service stealing threat
+
+   Segment routing is used for added value services, there is also a
+   need to prevent non-participating nodes to use those services; this
+   is called 'service stealing prevention'.
+
+5.1.4.  Topology disclosure
+
+   The SRH may also contains IPv6 addresses of some intermediate SR-
+   nodes in the path towards the destination, this obviously reveals
+   those addresses to the potentially hostile attackers if those
+   attackers are able to intercept packets containing SRH.  On the other
+   hand, if the attacker can do a traceroute whose probes will be
+   forwarded along the SR path, then there is little learned by
+   intercepting the SRH itself.
+
+5.1.5.  ICMP Generation
+
+   Per section 4.4 of RFC2460 [RFC2460], when destination nodes (i.e.
+   where the destination address is one of theirs) receive a Routing
+   Header with unsupported Routing Type, the required behavior is:
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 18]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   o  If Segments Left is zero, the node must ignore the Routing header
+      and proceed to process the next header in the packet.
+
+   o  If Segments Left is non-zero, the node must discard the packet and
+      send an ICMP Parameter Problem, Code 0, message to the packet's
+      Source Address, pointing to the unrecognized Routing Type.
+
+   This required behavior could be used by an attacker to force the
+   generation of ICMP message by any node.  The attacker could send
+   packets with SRH (with Segment Left set to 0) destined to a node not
+   supporting SRH.  Per RFC2460 [RFC2460], the destination node could
+   generate an ICMP message, causing a local CPU utilization and if the
+   source of the offending packet with SRH was spoofed could lead to a
+   reflection attack without any amplification.
+
+   It must be noted that this is a required behavior for any unsupported
+   Routing Type and not limited to SRH packets.  So, it is not specific
+   to SRH and the usual rate limiting for ICMP generation is required
+   anyway for any IPv6 implementation and has been implemented and
+   deployed for many years.
+
+5.2.  Security fields in SRH
+
+   This section summarizes the use of specific fields in the SRH.  They
+   are based on a key-hashed message authentication code (HMAC).
+
+   The security-related fields in the SRH are instantiated by the HMAC
+   TLV, containing:
+
+   o  HMAC Key-id, 32 bits wide;
+
+   o  HMAC, 256 bits wide (optional, exists only if HMAC Key-id is not
+      0).
+
+   The HMAC field is the output of the HMAC computation (per RFC 2104
+   [RFC2104]) using a pre-shared key identified by HMAC Key-id and of
+   the text which consists of the concatenation of:
+
+   o  the source IPv6 address;
+
+   o  First Segment field;
+
+   o  an octet of bit flags;
+
+   o  HMAC Key-id;
+
+   o  all addresses in the Segment List.
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 19]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   The purpose of the HMAC TLV is to verify the validity, the integrity
+   and the authorization of the SRH itself.  If an outsider of the SR
+   domain does not have access to a current pre-shared secret, then it
+   cannot compute the right HMAC field and the first SR router on the
+   path processing the SRH and configured to check the validity of the
+   HMAC will simply reject the packet.
+
+   The HMAC TLV is located at the end of the SRH simply because only the
+   router on the ingress of the SR domain needs to process it, then all
+   other SR nodes can ignore it (based on local policy) because they
+   trust the upstream router.  This is to speed up forwarding operations
+   because SR routers which do not validate the SRH do not need to parse
+   the SRH until the end.
+
+   The HMAC Key-id field allows for the simultaneous existence of
+   several hash algorithms (SHA-256, SHA3-256 ... or future ones) as
+   well as pre-shared keys.  The HMAC Key-id field is opaque, i.e., it
+   has neither syntax nor semantic except as an index to the right
+   combination of pre-shared key and hash algorithm and except that a
+   value of 0 means that there is no HMAC field.  Having an HMAC Key-id
+   field allows for pre-shared key roll-over when two pre-shared keys
+   are supported for a while when all SR nodes converged to a fresher
+   pre-shared key.  It could also allow for interoperation among
+   different SR domains if allowed by local policy and assuming a
+   collision-free HMAC Key Id allocation.
+
+   When a specific SRH is linked to a time-related service (such as
+   turbo-QoS for a 1-hour period) where the DA, Segment ID (SID) are
+   identical, then it is important to refresh the shared-secret
+   frequently as the HMAC validity period expires only when the HMAC
+   Key-id and its associated shared-secret expires.
+
+5.2.1.  Selecting a hash algorithm
+
+   The HMAC field in the HMAC TLV is 256 bit wide.  Therefore, the HMAC
+   MUST be based on a hash function whose output is at least 256 bits.
+   If the output of the hash function is 256, then this output is simply
+   inserted in the HMAC field.  If the output of the hash function is
+   larger than 256 bits, then the output value is truncated to 256 by
+   taking the least-significant 256 bits and inserting them in the HMAC
+   field.
+
+   SRH implementations can support multiple hash functions but MUST
+   implement SHA-2 [FIPS180-4] in its SHA-256 variant.
+
+   NOTE: SHA-1 is currently used by some early implementations used for
+   quick interoperations testing, the 160-bit hash value must then be
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 20]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   right-hand padded with 96 bits set to 0.  The authors understand that
+   this is not secure but is ok for limited tests.
+
+5.2.2.  Performance impact of HMAC
+
+   While adding an HMAC to each and every SR packet increases the
+   security, it has a performance impact.  Nevertheless, it must be
+   noted that:
+
+   o  the HMAC field is used only when SRH is added by a device (such as
+      a home set-up box) which is outside of the segment routing domain.
+      If the SRH is added by a router in the trusted segment routing
+      domain, then, there is no need for an HMAC field, hence no
+      performance impact.
+
+   o  when present, the HMAC field MUST only be checked and validated by
+      the first router of the segment routing domain, this router is
+      named 'validating SR router'.  Downstream routers may not inspect
+      the HMAC field.
+
+   o  this validating router can also have a cache of <IPv6 header +
+      SRH, HMAC field value> to improve the performance.  It is not the
+      same use case as in IPsec where HMAC value was unique per packet,
+      in SRH, the HMAC value is unique per flow.
+
+   o  Last point, hash functions such as SHA-2 have been optimized for
+      security and performance and there are multiple implementations
+      with good performance.
+
+   With the above points in mind, the performance impact of using HMAC
+   is minimized.
+
+5.2.3.  Pre-shared key management
+
+   The field HMAC Key-id allows for:
+
+   o  key roll-over: when there is a need to change the key (the hash
+      pre-shared secret), then multiple pre-shared keys can be used
+      simultaneously.  The validating routing can have a table of <HMAC
+      Key-id, pre-shared secret> for the currently active and future
+      keys.
+
+   o  different algorithms: by extending the previous table to <HMAC
+      Key-id, hash function, pre-shared secret>, the validating router
+      can also support simultaneously several hash algorithms (see
+      section Section 5.2.1)
+
+   The pre-shared secret distribution can be done:
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 21]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   o  in the configuration of the validating routers, either by static
+      configuration or any SDN oriented approach;
+
+   o  dynamically using a trusted key distribution such as [RFC6407]
+
+   The intent of this document is NOT to define yet-another-key-
+   distribution-protocol.
+
+5.3.  Deployment Models
+
+5.3.1.  Nodes within the SR domain
+
+   An SR domain is defined as a set of interconnected routers where all
+   routers at the perimeter are configured to add and act on SRH.  Some
+   routers inside the SR domain can also act on SRH or simply forward
+   IPv6 packets.
+
+   The routers inside an SR domain can be trusted to generate SRH and to
+   process SRH received on interfaces that are part of the SR domain.
+   These nodes MUST drop all SRH packets received on an interface that
+   is not part of the SR domain and containing an SRH whose HMAC field
+   cannot be validated by local policies.  This includes obviously
+   packet with an SRH generated by a non-cooperative SR domain.
+
+   If the validation fails, then these packets MUST be dropped, ICMP
+   error messages (parameter problem) SHOULD be generated (but rate
+   limited) and SHOULD be logged.
+
+5.3.2.  Nodes outside of the SR domain
+
+   Nodes outside of the SR domain cannot be trusted for physical
+   security; hence, they need to request by some trusted means (outside
+   of the scope of this document) a complete SRH for each new connection
+   (i.e. new destination address).  The received SRH MUST include an
+   HMAC TLV which is computed correctly (see Section 5.2).
+
+   When an outside node sends a packet with an SRH and towards an SR
+   domain ingress node, the packet MUST contain the HMAC TLV (with a
+   Key-id and HMAC fields) and the the destination address MUST be an
+   address of an SR domain ingress node .
+
+   The ingress SR router, i.e., the router with an interface address
+   equals to the destination address, MUST verify the HMAC TLV.
+
+   If the validation is successful, then the packet is simply forwarded
+   as usual for an SR packet.  As long as the packet travels within the
+   SR domain, no further HMAC check needs to be done.  Subsequent
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 22]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   routers in the SR domain MAY verify the HMAC TLV when they process
+   the SRH (i.e. when they are the destination).
+
+   If the validation fails, then this packet MUST be dropped, an ICMP
+   error message (parameter problem) SHOULD be generated (but rate
+   limited) and SHOULD be logged.
+
+5.3.3.  SR path exposure
+
+   As the intermediate SR nodes addresses appears in the SRH, if this
+   SRH is visible to an outsider then he/she could reuse this knowledge
+   to launch an attack on the intermediate SR nodes or get some insider
+   knowledge on the topology.  This is especially applicable when the
+   path between the source node and the first SR domain ingress router
+   is on the public Internet.
+
+   The first remark is to state that 'security by obscurity' is never
+   enough; in other words, the security policy of the SR domain MUST
+   assume that the internal topology and addressing is known by the
+   attacker.  A simple traceroute will also give the same information
+   (with even more information as all intermediate nodes between SID
+   will also be exposed).  IPsec Encapsulating Security Payload
+   [RFC4303] cannot be use to protect the SRH as per RFC4303 the ESP
+   header must appear after any routing header (including SRH).
+
+   To prevent a user to leverage the gained knowledge by intercepting
+   SRH, it it recommended to apply an infrastructure Access Control List
+   (iACL) at the edge of the SR domain.  This iACL will drop all packets
+   from outside the SR-domain whose destination is any address of any
+   router inside the domain.  This security policy should be tuned for
+   local operations.
+
+5.3.4.  Impact of BCP-38
+
+   BCP-38 [RFC2827], also known as "Network Ingress Filtering", checks
+   whether the source address of packets received on an interface is
+   valid for this interface.  The use of loose source routing such as
+   SRH forces packets to follow a path which differs from the expected
+   routing.  Therefore, if BCP-38 was implemented in all routers inside
+   the SR domain, then SR packets could be received by an interface
+   which is not expected one and the packets could be dropped.
+
+   As an SR domain is usually a subset of one administrative domain, and
+   as BCP-38 is only deployed at the ingress routers of this
+   administrative domain and as packets arriving at those ingress
+   routers have been normally forwarded using the normal routing
+   information, then there is no reason why this ingress router should
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 23]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   drop the SRH packet based on BCP-38.  Routers inside the domain
+   commonly do not apply BCP-38; so, this is not a problem.
+
+6.  IANA Considerations
+
+   This document makes the following registrations in the Internet
+   Protocol Version 6 (IPv6) Parameters "Routing Type" registry
+   maintained by IANA:
+
+   Suggested            Description             Reference
+     Value
+   ----------------------------------------------------------
+      4         Segment Routing Header (SRH)    This document
+
+   In addition, this document request IANA to create and maintain a new
+   Registry: "Segment Routing Header Type-Value Objects".  The following
+   code-points are requested from the registry:
+
+   Registry: Segment Routing Header Type-Value Objects
+
+   Suggested         Description            Reference
+     Value
+   -----------------------------------------------------
+      1         Ingress Node TLV          This document
+      2         Egress Node  TLV          This document
+      3         Opaque Container TLV      This document
+      4         Padding TLV               This document
+      5         HMAC TLV                  This document
+
+7.  Manageability Considerations
+
+   TBD
+
+8.  Contributors
+
+   Dave Barach, John Leddy, John Brzozowski, Pierre Francois, Nagendra
+   Kumar, Mark Townsley, Christian Martin, Roberta Maglione, James
+   Connolly, Aloys Augustin contributed to the content of this document.
+
+9.  Acknowledgements
+
+   The authors would like to thank Ole Troan, Bob Hinden, Fred Baker,
+   Brian Carpenter, Alexandru Petrescu and Punit Kumar Jaiswal for their
+   comments to this document.
+
+
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 24]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+10.  References
+
+10.1.  Normative References
+
+   [FIPS180-4]
+              National Institute of Standards and Technology, "FIPS
+              180-4 Secure Hash Standard (SHS)", March 2012,
+              <http://csrc.nist.gov/publications/fips/fips180-4/
+              fips-180-4.pdf>.
+
+   [RFC2119]  Bradner, S., "Key words for use in RFCs to Indicate
+              Requirement Levels", BCP 14, RFC 2119,
+              DOI 10.17487/RFC2119, March 1997,
+              <http://www.rfc-editor.org/info/rfc2119>.
+
+   [RFC2460]  Deering, S. and R. Hinden, "Internet Protocol, Version 6
+              (IPv6) Specification", RFC 2460, DOI 10.17487/RFC2460,
+              December 1998, <http://www.rfc-editor.org/info/rfc2460>.
+
+   [RFC4303]  Kent, S., "IP Encapsulating Security Payload (ESP)",
+              RFC 4303, DOI 10.17487/RFC4303, December 2005,
+              <http://www.rfc-editor.org/info/rfc4303>.
+
+   [RFC5095]  Abley, J., Savola, P., and G. Neville-Neil, "Deprecation
+              of Type 0 Routing Headers in IPv6", RFC 5095,
+              DOI 10.17487/RFC5095, December 2007,
+              <http://www.rfc-editor.org/info/rfc5095>.
+
+   [RFC6407]  Weis, B., Rowles, S., and T. Hardjono, "The Group Domain
+              of Interpretation", RFC 6407, DOI 10.17487/RFC6407,
+              October 2011, <http://www.rfc-editor.org/info/rfc6407>.
+
+10.2.  Informative References
+
+   [I-D.ietf-isis-segment-routing-extensions]
+              Previdi, S., Filsfils, C., Bashandy, A., Gredler, H.,
+              Litkowski, S., Decraene, B., and j. jefftant@gmail.com,
+              "IS-IS Extensions for Segment Routing", draft-ietf-isis-
+              segment-routing-extensions-09 (work in progress), October
+              2016.
+
+   [I-D.ietf-ospf-ospfv3-segment-routing-extensions]
+              Psenak, P., Previdi, S., Filsfils, C., Gredler, H.,
+              Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3
+              Extensions for Segment Routing", draft-ietf-ospf-ospfv3-
+              segment-routing-extensions-07 (work in progress), October
+              2016.
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 25]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   [I-D.ietf-spring-ipv6-use-cases]
+              Brzozowski, J., Leddy, J., Townsley, W., Filsfils, C., and
+              R. Maglione, "IPv6 SPRING Use Cases", draft-ietf-spring-
+              ipv6-use-cases-08 (work in progress), January 2017.
+
+   [I-D.ietf-spring-resiliency-use-cases]
+              Filsfils, C., Previdi, S., Decraene, B., and R. Shakir,
+              "Resiliency use cases in SPRING networks", draft-ietf-
+              spring-resiliency-use-cases-08 (work in progress), October
+              2016.
+
+   [I-D.ietf-spring-segment-routing]
+              Filsfils, C., Previdi, S., Decraene, B., Litkowski, S.,
+              and R. Shakir, "Segment Routing Architecture", draft-ietf-
+              spring-segment-routing-10 (work in progress), November
+              2016.
+
+   [I-D.ietf-spring-segment-routing-mpls]
+              Filsfils, C., Previdi, S., Bashandy, A., Decraene, B.,
+              Litkowski, S., Horneffer, M., Shakir, R.,
+              jefftant@gmail.com, j., and E. Crabbe, "Segment Routing
+              with MPLS data plane", draft-ietf-spring-segment-routing-
+              mpls-06 (work in progress), January 2017.
+
+   [RFC1940]  Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D.
+              Zappala, "Source Demand Routing: Packet Format and
+              Forwarding Specification (Version 1)", RFC 1940,
+              DOI 10.17487/RFC1940, May 1996,
+              <http://www.rfc-editor.org/info/rfc1940>.
+
+   [RFC2104]  Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed-
+              Hashing for Message Authentication", RFC 2104,
+              DOI 10.17487/RFC2104, February 1997,
+              <http://www.rfc-editor.org/info/rfc2104>.
+
+   [RFC2827]  Ferguson, P. and D. Senie, "Network Ingress Filtering:
+              Defeating Denial of Service Attacks which employ IP Source
+              Address Spoofing", BCP 38, RFC 2827, DOI 10.17487/RFC2827,
+              May 2000, <http://www.rfc-editor.org/info/rfc2827>.
+
+   [RFC4942]  Davies, E., Krishnan, S., and P. Savola, "IPv6 Transition/
+              Co-existence Security Considerations", RFC 4942,
+              DOI 10.17487/RFC4942, September 2007,
+              <http://www.rfc-editor.org/info/rfc4942>.
+
+
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 26]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   [RFC6554]  Hui, J., Vasseur, JP., Culler, D., and V. Manral, "An IPv6
+              Routing Header for Source Routes with the Routing Protocol
+              for Low-Power and Lossy Networks (RPL)", RFC 6554,
+              DOI 10.17487/RFC6554, March 2012,
+              <http://www.rfc-editor.org/info/rfc6554>.
+
+   [RFC7855]  Previdi, S., Ed., Filsfils, C., Ed., Decraene, B.,
+              Litkowski, S., Horneffer, M., and R. Shakir, "Source
+              Packet Routing in Networking (SPRING) Problem Statement
+              and Requirements", RFC 7855, DOI 10.17487/RFC7855, May
+              2016, <http://www.rfc-editor.org/info/rfc7855>.
+
+Authors' Addresses
+
+   Stefano Previdi (editor)
+   Cisco Systems, Inc.
+   Via Del Serafico, 200
+   Rome  00142
+   Italy
+
+   Email: sprevidi@cisco.com
+
+
+   Clarence Filsfils
+   Cisco Systems, Inc.
+   Brussels
+   BE
+
+   Email: cfilsfil@cisco.com
+
+
+   Brian Field
+   Comcast
+   4100 East Dry Creek Road
+   Centennial, CO  80122
+   US
+
+   Email: Brian_Field@cable.comcast.com
+
+
+   Ida Leung
+   Rogers Communications
+   8200 Dixie Road
+   Brampton, ON  L6T 0C1
+   CA
+
+   Email: Ida.Leung@rci.rogers.com
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 27]
+ 
+Internet-Draft      IPv6 Segment Routing Header (SRH)      February 2017
+
+
+   Jen Linkova
+   Google
+   1600 Amphitheatre Parkway
+   Mountain View, CA 94043
+   US
+
+   Email: furry@google.com
+
+
+   Ebben Aries
+   Facebook
+   US
+
+   Email: exa@fb.com
+
+
+   Tomoya Kosugi
+   NTT
+   3-9-11, Midori-Cho Musashino-Shi,
+   Tokyo  180-8585
+   JP
+
+   Email: kosugi.tomoya@lab.ntt.co.jp
+
+
+   Eric Vyncke
+   Cisco Systems, Inc.
+   De Kleetlaann 6A
+   Diegem  1831
+   Belgium
+
+   Email: evyncke@cisco.com
+
+
+   David Lebrun
+   Universite Catholique de Louvain
+   Place Ste Barbe, 2
+   Louvain-la-Neuve, 1348
+   Belgium
+
+   Email: david.lebrun@uclouvain.be
+
+
+
+
+
+
+
+
+
+
+Previdi, et al.          Expires August 5, 2017                [Page 28]
\ No newline at end of file
diff --git a/src/vnet/srv6/sr.api b/src/vnet/srv6/sr.api
new file mode 100644
index 00000000..9e900741
--- /dev/null
+++ b/src/vnet/srv6/sr.api
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief IPv6 SR LocalSID add/del request
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param is_del Boolean of whether its a delete instruction
+    @param localsid_addr IPv6 address of the localsid
+    @param end_psp Boolean of whether decapsulation is allowed in this function
+    @param behavior Type of behavior (function) for this localsid
+    @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
+    @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
+    @param fib_table  FIB table in which we should install the localsid entry
+    @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
+*/
+autoreply define sr_localsid_add_del
+{
+  u32 client_index;
+  u32 context;
+  u8 is_del;
+  u8 localsid_addr[16];
+  u8 end_psp;
+  u8 behavior;
+  u32 sw_if_index;
+  u32 vlan_index;
+  u32 fib_table;
+  u8 nh_addr[16];
+};
+
+/** \brief IPv6 SR policy add
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param bsid is the bindingSID of the SR Policy
+    @param weight is the weight of the sid list. optional.
+    @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation)
+    @param type is the type of the SR policy. (0.Default // 1.Spray)
+    @param fib_table is the VRF where to install the FIB entry for the BSID
+    @param segments is a vector of IPv6 address composing the segment list
+*/
+autoreply define sr_policy_add
+{
+  u32 client_index;
+  u32 context;
+  u8 bsid_addr[16];
+  u32 weight;
+  u8 is_encap;
+  u8 type;
+  u32 fib_table;
+  u8 n_segments;
+  u8 segments[0];
+};
+
+/** \brief IPv6 SR policy modification
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param bsid is the bindingSID of the SR Policy
+    @param sr_policy_index is the index of the SR policy
+    @param fib_table is the VRF where to install the FIB entry for the BSID
+    @param operation is the operation to perform (among the top ones)
+    @param segments is a vector of IPv6 address composing the segment list
+    @param sl_index is the index of the Segment List to modify/delete
+    @param weight is the weight of the sid list. optional.
+    @param is_encap Mode. Encapsulation or SRH insertion.
+*/
+autoreply define sr_policy_mod
+{
+  u32 client_index;
+  u32 context;
+  u8 bsid_addr[16];
+  u32 sr_policy_index;
+  u32 fib_table;
+  u8 operation;
+  u32 sl_index;
+  u32 weight;
+  u8 n_segments;
+  u8 segments[0];
+};
+
+/** \brief IPv6 SR policy deletion
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param bsid is the bindingSID of the SR Policy
+    @param index is the index of the SR policy
+*/
+autoreply define sr_policy_del
+{
+  u32 client_index;
+  u32 context;
+  u8 bsid_addr[16];
+  u32 sr_policy_index;
+};
+
+/** \brief IPv6 SR steering add/del
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param is_del
+    @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+    @param sr_policy is the index of the SR Policy (alt to bsid)
+    @param table_id is the VRF where to install the FIB entry for the BSID
+    @param prefix is the IPv4/v6 address for L3 traffic type
+    @param mask_width is the mask for L3 traffic type
+    @param sw_if_index is the incoming interface for L2 traffic
+    @param traffic_type describes the type of traffic
+*/
+autoreply define sr_steering_add_del
+{
+  u32 client_index;
+  u32 context;
+  u8 is_del;
+  u8 bsid_addr[16];
+  u32 sr_policy_index;
+  u32 table_id;
+  u8 prefix_addr[16];
+  u32 mask_width;
+  u32 sw_if_index;
+  u8 traffic_type;
+};
+
+/** \brief Dump the list of SR LocalSIDs
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+*/
+/**define sr_localsids_dump
+{
+  u32 client_index;
+  u32 context;
+};*/
+
+/** \brief Details about a single SR LocalSID
+    @param context - returned sender context, to match reply w/ request
+    @param localsid_addr IPv6 address of the localsid
+    @param behavior Type of behavior (function) for this localsid
+    @param end_psp Boolean of whether decapsulation is allowed in this function
+    @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
+    @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
+    @param fib_table  FIB table in which we should install the localsid entry
+    @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
+*/
+/**manual_endian define sr_localsid_details
+{
+  u32 context;
+  u8 localsid_addr[16];
+  u8 behavior;
+  u8 end_psp;
+  u32 sw_if_index;
+  u32 vlan_index;
+  u32 fib_table;
+  u8 nh_addr[16];
+};*/
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srv6/sr.c b/src/vnet/srv6/sr.c
new file mode 100755
index 00000000..eb4f09e7
--- /dev/null
+++ b/src/vnet/srv6/sr.c
@@ -0,0 +1,57 @@
+/*
+ * sr.c: ipv6 segment routing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing initialization
+ *
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+ip6_sr_main_t sr_main;
+
+/**
+ * @brief no-op lock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+void
+sr_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+/**
+ * @brief no-op unlock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+void
+sr_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h
new file mode 100755
index 00000000..2014a23e
--- /dev/null
+++ b/src/vnet/srv6/sr.h
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing data structures definitions
+ *
+ */
+
+#ifndef included_vnet_srv6_h
+#define included_vnet_srv6_h
+
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#define IPv6_DEFAULT_HEADER_LENGTH 40
+#define IPv6_DEFAULT_HOP_LIMIT 64
+#define IPv6_DEFAULT_MAX_MASK_WIDTH 128
+
+#define SR_BEHAVIOR_END 1
+#define SR_BEHAVIOR_X 2
+#define SR_BEHAVIOR_D_FIRST 3	/* Unused. Separator in between regular and D */
+#define SR_BEHAVIOR_DX2 4
+#define SR_BEHAVIOR_DX6 5
+#define SR_BEHAVIOR_DX4 6
+#define SR_BEHAVIOR_DT6 7
+#define SR_BEHAVIOR_DT4 8
+#define SR_BEHAVIOR_LAST 9	/* Must always be the last one */
+
+#define SR_STEER_L2 2
+#define SR_STEER_IPV4 4
+#define SR_STEER_IPV6 6
+
+#define SR_FUNCTION_SIZE 4
+#define SR_ARGUMENT_SIZE 4
+
+#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
+
+/**
+ * @brief SR Segment List (SID list)
+ */
+typedef struct
+{
+  ip6_address_t *segments;		/**< SIDs (key) */
+
+  u32 weight;						/**< SID list weight (wECMP / UCMP) */
+
+  u8 *rewrite;					/**< Precomputed rewrite header */
+  u8 *rewrite_bsid;				/**< Precomputed rewrite header for bindingSID */
+
+  dpo_id_t bsid_dpo;				/**< DPO for Encaps/Insert for BSID */
+  dpo_id_t ip6_dpo;				/**< DPO for Encaps/Insert IPv6 */
+  dpo_id_t ip4_dpo;				/**< DPO for Encaps IPv6 */
+} ip6_sr_sl_t;
+
+/* SR policy types */
+#define SR_POLICY_TYPE_DEFAULT 0
+#define SR_POLICY_TYPE_SPRAY 1
+/**
+ * @brief SR Policy
+ */
+typedef struct
+{
+  u32 *segments_lists;		/**< SID lists indexes (vector) */
+
+  ip6_address_t bsid;			/**< BindingSID (key) */
+
+  u8 type;					/**< Type (default is 0) */
+  /* SR Policy specific DPO                                       */
+  /* IF Type = DEFAULT Then Load Balancer DPO among SID lists     */
+  /* IF Type = SPRAY then Spray DPO with all SID lists            */
+  dpo_id_t bsid_dpo;			/**< SR Policy specific DPO - BSID */
+  dpo_id_t ip4_dpo;			/**< SR Policy specific DPO - IPv6 */
+  dpo_id_t ip6_dpo;			/**< SR Policy specific DPO - IPv4 */
+
+  u32 fib_table;			/**< FIB table */
+
+  u8 is_encap;				/**< Mode (0 is SRH insert, 1 Encaps) */
+} ip6_sr_policy_t;
+
+/**
+ * @brief SR LocalSID
+ */
+typedef struct
+{
+  ip6_address_t localsid;		/**< LocalSID IPv6 address */
+
+  char end_psp;					/**< Combined with End.PSP? */
+
+  u16 behavior;					/**< Behavior associated to this localsid */
+
+  union
+  {
+    u32 sw_if_index;				/**< xconnect only */
+    u32 vrf_index;				/**< vrf only */
+  };
+
+  u32 fib_table;				/**< FIB table where localsid is registered */
+
+  u32 vlan_index;				/**< VLAN tag (not an index) */
+
+  ip46_address_t next_hop;		/**< Next_hop for xconnect usage only */
+
+  u32 nh_adj;						/**< Next_adj for xconnect usage only */
+
+  void *plugin_mem;				/**< Memory to be used by the plugin callback functions */
+} ip6_sr_localsid_t;
+
+typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid);
+
+/**
+ * @brief SR LocalSID behavior registration
+ */
+typedef struct
+{
+  u16 sr_localsid_function_number;			/**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */
+
+  u8 *function_name;							/**< Function name. (key). */
+
+  u8 *keyword_str;							/**< Behavior keyword (i.e. End.X) */
+
+  u8 *def_str;								/**< Behavior definition (i.e. Endpoint with cross-connect) */
+
+  u8 *params_str;							/**< Behavior parameters (i.e. <oif> <IP46next_hop>) */
+
+  dpo_type_t dpo;							/**< DPO type registration */
+
+  format_function_t *ls_format;				/**< LocalSID format function */
+
+  unformat_function_t *ls_unformat;			/**< LocalSID unformat function */
+
+  sr_plugin_callback_t *creation;			/**< Function within plugin that will be called after localsid creation*/
+
+  sr_plugin_callback_t *removal;			/**< Function within plugin that will be called before localsid removal */
+} sr_localsid_fn_registration_t;
+
+/**
+ * @brief Steering db key
+ *
+ * L3 is IPv4/IPv6 + mask
+ * L2 is sf_if_index + vlan
+ */
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      ip46_address_t prefix;			/**< IP address of the prefix */
+      u32 mask_width;					/**< Mask width of the prefix */
+      u32 fib_table;					/**< VRF of the prefix */
+    } l3;
+    struct
+    {
+      u32 sw_if_index;					/**< Incoming software interface */
+    } l2;
+  };
+  u8 traffic_type;					/**< Traffic type (IPv4, IPv6, L2) */
+  u8 padding[3];
+} sr_steering_key_t;
+
+typedef struct
+{
+  sr_steering_key_t classify;		/**< Traffic classification */
+  u32 sr_policy;					/**< SR Policy index */
+} ip6_sr_steering_policy_t;
+
+/**
+ * @brief Segment Routing main datastructure
+ */
+typedef struct
+{
+  /* L2-input -> SR rewrite next index */
+  u32 l2_sr_policy_rewrite_index;
+
+  /* SR SID lists */
+  ip6_sr_sl_t *sid_lists;
+
+  /* SRv6 policies */
+  ip6_sr_policy_t *sr_policies;
+
+  /* Hash table mapping BindingSID to SRv6 policy */
+  mhash_t sr_policies_index_hash;
+
+  /* Pool of SR localsid instances */
+  ip6_sr_localsid_t *localsids;
+
+  /* Hash table mapping LOC:FUNC to SR LocalSID instance */
+  mhash_t sr_localsids_index_hash;
+
+  /* Pool of SR steer policies instances */
+  ip6_sr_steering_policy_t *steer_policies;
+
+  /* Hash table mapping steering rules to SR steer instance */
+  mhash_t sr_steer_policies_hash;
+
+  /* L2 steering ifaces - sr_policies */
+  u32 *sw_iface_sr_policies;
+
+  /* Spray DPO */
+  dpo_type_t sr_pr_spray_dpo_type;
+
+  /* Plugin functions */
+  sr_localsid_fn_registration_t *plugin_functions;
+
+  /* Find plugin function by name */
+  uword *plugin_functions_by_key;
+
+  /* Counters */
+  vlib_combined_counter_main_t sr_ls_valid_counters;
+  vlib_combined_counter_main_t sr_ls_invalid_counters;
+
+  /* SR Policies FIBs */
+  u32 fib_table_ip6;
+  u32 fib_table_ip4;
+
+  /* convenience */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+} ip6_sr_main_t;
+
+extern ip6_sr_main_t sr_main;
+
+extern vlib_node_registration_t sr_policy_rewrite_encaps_node;
+extern vlib_node_registration_t sr_policy_rewrite_insert_node;
+extern vlib_node_registration_t sr_localsid_node;
+extern vlib_node_registration_t sr_localsid_d_node;
+
+extern void sr_dpo_lock (dpo_id_t * dpo);
+extern void sr_dpo_unlock (dpo_id_t * dpo);
+
+extern int
+sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name,
+			       u8 * keyword_str, u8 * def_str,
+			       u8 * params_str, dpo_type_t * dpo,
+			       format_function_t * ls_format,
+			       unformat_function_t * ls_unformat,
+			       sr_plugin_callback_t * creation_fn,
+			       sr_plugin_callback_t * removal_fn);
+
+extern int
+sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
+	       u32 weight, u8 behavior, u32 fib_table, u8 is_encap);
+extern int
+sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
+	       u8 operation, ip6_address_t * segments, u32 sl_index,
+	       u32 weight);
+extern int sr_policy_del (ip6_address_t * bsid, u32 index);
+
+extern int
+sr_cli_localsid (char is_del, ip6_address_t * localsid_addr,
+		 char end_psp, u8 behavior, u32 sw_if_index,
+		 u32 vlan_index, u32 fib_table, ip46_address_t * nh_addr,
+		 void *ls_plugin_mem);
+
+extern int
+sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
+		    u32 table_id, ip46_address_t * prefix, u32 mask_width,
+		    u32 sw_if_index, u8 traffic_type);
+
+/**
+ * @brief SR rewrite string computation for SRH insertion (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion
+ */
+static inline u8 *
+ip6_sr_compute_rewrite_string_insert (ip6_address_t * sl)
+{
+  ip6_sr_header_t *srh;
+  ip6_address_t *addrp, *this_address;
+  u32 header_length = 0;
+  u8 *rs = NULL;
+
+  header_length = 0;
+  header_length += sizeof (ip6_sr_header_t);
+  header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t);
+
+  vec_validate (rs, header_length - 1);
+
+  srh = (ip6_sr_header_t *) rs;
+  srh->type = ROUTING_HEADER_TYPE_SR;
+  srh->segments_left = vec_len (sl);
+  srh->first_segment = vec_len (sl);
+  srh->length = ((sizeof (ip6_sr_header_t) +
+		  ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1;
+  srh->flags = 0x00;
+  srh->reserved = 0x0000;
+  addrp = srh->segments + vec_len (sl);
+  vec_foreach (this_address, sl)
+  {
+    clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+    addrp--;
+  }
+  return rs;
+}
+
+
+#endif /* included_vnet_sr_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c
new file mode 100644
index 00000000..925b50a1
--- /dev/null
+++ b/src/vnet/srv6/sr_api.c
@@ -0,0 +1,244 @@
+/*
+ *------------------------------------------------------------------
+ * sr_api.c - ipv6 segment routing api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs		/* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun		/* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg                             \
+_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del)             \
+_(SR_POLICY_DEL, sr_policy_del)                         \
+_(SR_STEERING_ADD_DEL, sr_steering_add_del)
+//_(SR_LOCALSIDS, sr_localsids_dump)
+//_(SR_LOCALSID_BEHAVIORS, sr_localsid_behaviors_dump)
+
+static void vl_api_sr_localsid_add_del_t_handler
+  (vl_api_sr_localsid_add_del_t * mp)
+{
+  vl_api_sr_localsid_add_del_reply_t *rmp;
+  int rv = 0;
+/*
+ * int sr_cli_localsid (char is_del, ip6_address_t *localsid_addr,
+ *  char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table,
+ *  ip46_address_t *nh_addr, void *ls_plugin_mem)
+ */
+  rv = sr_cli_localsid (mp->is_del,
+			(ip6_address_t *) & mp->localsid_addr,
+			mp->end_psp,
+			mp->behavior,
+			ntohl (mp->sw_if_index),
+			ntohl (mp->vlan_index),
+			ntohl (mp->fib_table),
+			(ip46_address_t *) & mp->nh_addr, NULL);
+
+  REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp)
+{
+  vl_api_sr_policy_add_reply_t *rmp;
+  ip6_address_t *segments = 0, *seg;
+  ip6_address_t *this_address = (ip6_address_t *) mp->segments;
+
+  int i;
+  for (i = 0; i < mp->n_segments; i++)
+    {
+      vec_add2 (segments, seg, 1);
+      clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
+      this_address++;
+    }
+
+/*
+ * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ *                u32 weight, u8 behavior, u32 fib_table, u8 is_encap)
+ */
+  int rv = 0;
+  rv = sr_policy_add ((ip6_address_t *) & mp->bsid_addr,
+		      segments,
+		      ntohl (mp->weight),
+		      mp->type, ntohl (mp->fib_table), mp->is_encap);
+
+  REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY);
+}
+
+static void
+vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp)
+{
+  vl_api_sr_policy_mod_reply_t *rmp;
+
+  ip6_address_t *segments = 0, *seg;
+  ip6_address_t *this_address = (ip6_address_t *) mp->segments;
+
+  int i;
+  for (i = 0; i < mp->n_segments; i++)
+    {
+      vec_add2 (segments, seg, 1);
+      clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
+      this_address++;
+    }
+
+  int rv = 0;
+/*
+ * int
+ * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
+ *               u8 operation, ip6_address_t *segments, u32 sl_index,
+ *               u32 weight, u8 is_encap)
+ */
+  rv = sr_policy_mod ((ip6_address_t *) & mp->bsid_addr,
+		      ntohl (mp->sr_policy_index),
+		      ntohl (mp->fib_table),
+		      mp->operation,
+		      segments, ntohl (mp->sl_index), ntohl (mp->weight));
+
+  REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
+}
+
+static void
+vl_api_sr_policy_del_t_handler (vl_api_sr_policy_del_t * mp)
+{
+  vl_api_sr_policy_del_reply_t *rmp;
+  int rv = 0;
+/*
+ * int
+ * sr_policy_del (ip6_address_t *bsid, u32 index)
+ */
+  rv = sr_policy_del ((ip6_address_t *) & mp->bsid_addr,
+		      ntohl (mp->sr_policy_index));
+
+  REPLY_MACRO (VL_API_SR_POLICY_DEL_REPLY);
+}
+
+static void vl_api_sr_steering_add_del_t_handler
+  (vl_api_sr_steering_add_del_t * mp)
+{
+  vl_api_sr_steering_add_del_reply_t *rmp;
+  int rv = 0;
+/*
+ * int
+ * sr_steering_policy(int is_del, ip6_address_t *bsid, u32 sr_policy_index,
+ *  u32 table_id, ip46_address_t *prefix, u32 mask_width, u32 sw_if_index,
+ *  u8 traffic_type)
+ */
+  rv = sr_steering_policy (mp->is_del,
+			   (ip6_address_t *) & mp->bsid_addr,
+			   ntohl (mp->sr_policy_index),
+			   ntohl (mp->table_id),
+			   (ip46_address_t *) & mp->prefix_addr,
+			   ntohl (mp->mask_width),
+			   ntohl (mp->sw_if_index), mp->traffic_type);
+
+  REPLY_MACRO (VL_API_SR_STEERING_ADD_DEL_REPLY);
+}
+
+/*
+ * sr_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+  foreach_vl_msg_name_crc_sr;
+#undef _
+}
+
+static clib_error_t *
+sr_api_hookup (vlib_main_t * vm)
+{
+  api_main_t *am = &api_main;
+
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_vpe_api_msg;
+#undef _
+
+  /*
+   * Manually register the sr policy add msg, so we trace
+   * enough bytes to capture a typical segment list
+   */
+  vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD,
+			   "sr_policy_add",
+			   vl_api_sr_policy_add_t_handler,
+			   vl_noop_handler,
+			   vl_api_sr_policy_add_t_endian,
+			   vl_api_sr_policy_add_t_print, 256, 1);
+
+  /*
+   * Manually register the sr policy mod msg, so we trace
+   * enough bytes to capture a typical segment list
+   */
+  vl_msg_api_set_handlers (VL_API_SR_POLICY_MOD,
+			   "sr_policy_mod",
+			   vl_api_sr_policy_mod_t_handler,
+			   vl_noop_handler,
+			   vl_api_sr_policy_mod_t_endian,
+			   vl_api_sr_policy_mod_t_print, 256, 1);
+
+  /*
+   * Set up the (msg_name, crc, message-id) table
+   */
+  setup_message_id_table (am);
+
+  return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srv6/sr_doc.md b/src/vnet/srv6/sr_doc.md
new file mode 100644
index 00000000..5cdfc906
--- /dev/null
+++ b/src/vnet/srv6/sr_doc.md
@@ -0,0 +1,55 @@
+# SRv6: Segment Routing for IPv6    {#srv6_doc}
+
+This is a memo intended to contain documentation of the VPP SRv6 implementation.
+Everything that is not directly obvious should come here.
+For any feedback on content that should be explained please mailto:pcamaril@cisco.com
+
+## Segment Routing
+
+Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior.
+
+Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era.
+
+Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements.
+
+Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design.
+
+Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
+
+**The implementation of Segment Routing in VPP covers both the IPv6 data plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page contains the SRv6 documentation.**
+
+## Segment Routing terminology
+
+* Segment Routing Header (SRH): IPv6 routing extension header of type 'Segment Routing'. (draft-ietf-6man-segment-routing-header-05)
+* SegmentID (SID): is an IPv6 address.
+* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse.
+* SR Policy: defines the SRH that will be applied to a packet. A packet steered into an SR policy may either receive the SRH by IPv6 header encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it could be inserted within an existing IPv6 header. An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights.
+* Local SID: is a SID associated with a processing function on the local node, which may go from advancing to the next SID in the SRH, to complex user-defined behaviors. When a FIB lookup, either in the main FIB or in a specific VRF, returns a match on a local SID, the associated function is performed.
+* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with an IPv6 DA corresponding to a BindingSID, then the SR policy will be applied to such packet.
+
+## SRv6 Features in VPP
+
+The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-srv6-network-programming/">SRv6 Network Programming (*draft-filsfils-spring-srv6-network-programming*)</a> defines the SRv6 architecture.
+
+VPP supports the following SRv6 LocalSID functions: End, End.X, End.DX6, End.DT6, End.DX4, End.DT4, End.DX2, End.B6, End.B6.Encaps.
+
+For further information and how to configure each specific function: @subpage srv6_localsid_doc
+
+
+The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-segment-routing-policy/">Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*)</a> defines SR Policies.
+
+VPP supports SRv6 Policies with T.Insert and T.Encaps behaviors.
+
+For further information on how to create SR Policies: @subpage srv6_policy_doc
+
+For further information on how to steer traffic into SR Policies: @subpage srv6_steering_doc
+
+## SRv6 LocalSID development framework
+
+One of the *'key'* concepts about SRv6 is network programmability. This is why an SRv6 LocalSID is associated with an specific function. 
+
+However, the trully way to enable network programmability is allowing any developer **easily** create his own SRv6 LocalSID function. That is the reason why we have added some API calls such that any developer can code his own SRv6 LocalSID behaviors as plugins an add them to the running SRv6 code.
+
+The principle is that the developer only codes the behavior -the graph node-. However all the FIB handling, SR LocalSID instantiation and so on are done by the VPP SRv6 code.
+
+For more information please refer to: @subpage srv6_plugin_doc
diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c
new file mode 100755
index 00000000..bdc66386
--- /dev/null
+++ b/src/vnet/srv6/sr_localsid.c
@@ -0,0 +1,1492 @@
+/*
+ * sr_localsid.c: ipv6 segment routing Endpoint behaviors
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Processing of packets with a SRH
+ *
+ * CLI to define new Segment Routing End processing functions.
+ * Graph node to support such functions.
+ *
+ * Each function associates an SRv6 segment (IPv6 address) with an specific
+ * Segment Routing function.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief Dynamically added SR localsid DPO type
+ */
+static dpo_type_t sr_localsid_dpo_type;
+static dpo_type_t sr_localsid_d_dpo_type;
+
+/**
+ * @brief SR localsid add/del
+ *
+ * Function to add or delete SR LocalSIDs.
+ *
+ * @param is_del Boolean of whether its a delete instruction
+ * @param localsid_addr IPv6 address of the localsid
+ * @param is_decap Boolean of whether decapsulation is allowed in this function
+ * @param behavior Type of behavior (function) for this localsid
+ * @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
+ * @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
+ * @param fib_table  FIB table in which we should install the localsid entry
+ * @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
+ *
+ * @return 0 on success, error otherwise.
+ */
+int
+sr_cli_localsid (char is_del, ip6_address_t * localsid_addr,
+		 char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index,
+		 u32 fib_table, ip46_address_t * nh_addr, void *ls_plugin_mem)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  uword *p;
+  int rv;
+
+  ip6_sr_localsid_t *ls = 0;
+
+  dpo_id_t dpo = DPO_INVALID;
+
+  /* Search for the item */
+  p = mhash_get (&sm->sr_localsids_index_hash, localsid_addr);
+
+  if (p)
+    {
+      if (is_del)
+	{
+	  /* Retrieve localsid */
+	  ls = pool_elt_at_index (sm->localsids, p[0]);
+	  /* Delete FIB entry */
+	  fib_prefix_t pfx = {
+	    .fp_proto = FIB_PROTOCOL_IP6,
+	    .fp_len = 128,
+	    .fp_addr = {
+			.ip6 = *localsid_addr,
+			}
+	  };
+
+	  fib_table_entry_delete (fib_table_find (FIB_PROTOCOL_IP6,
+						  fib_table),
+				  &pfx, FIB_SOURCE_SR);
+
+	  /* In case it is a Xconnect iface remove the (OIF, NHOP) adj */
+	  if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX6
+	      || ls->behavior == SR_BEHAVIOR_DX4)
+	    adj_unlock (ls->nh_adj);
+
+	  if (ls->behavior >= SR_BEHAVIOR_LAST)
+	    {
+	      sr_localsid_fn_registration_t *plugin = 0;
+	      plugin = pool_elt_at_index (sm->plugin_functions,
+					  ls->behavior - SR_BEHAVIOR_LAST);
+
+	      /* Callback plugin removal function */
+	      rv = plugin->removal (ls);
+	    }
+
+	  /* Delete localsid registry */
+	  pool_put (sm->localsids, ls);
+	  mhash_unset (&sm->sr_localsids_index_hash, localsid_addr, NULL);
+	  return 1;
+	}
+      else			/* create with function already existing; complain */
+	return -1;
+    }
+  else
+    /* delete; localsid does not exist; complain */
+  if (is_del)
+    return -2;
+
+  /* Check whether there exists a FIB entry with such address */
+  fib_prefix_t pfx = {
+    .fp_proto = FIB_PROTOCOL_IP6,
+    .fp_len = 128,
+  };
+
+  pfx.fp_addr.as_u64[0] = localsid_addr->as_u64[0];
+  pfx.fp_addr.as_u64[1] = localsid_addr->as_u64[1];
+
+  /* Lookup the FIB index associated to the table id provided */
+  u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6, fib_table);
+  if (fib_index == ~0)
+    return -3;
+
+  /* Lookup the localsid in such FIB table */
+  fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx);
+  if (FIB_NODE_INDEX_INVALID != fei)
+    return -4;			//There is an entry for such address (the localsid addr)
+
+  /* Create a new localsid registry */
+  pool_get (sm->localsids, ls);
+  memset (ls, 0, sizeof (*ls));
+
+  clib_memcpy (&ls->localsid, localsid_addr, sizeof (ip6_address_t));
+  ls->end_psp = end_psp;
+  ls->behavior = behavior;
+  ls->nh_adj = (u32) ~ 0;
+  ls->fib_table = fib_table;
+  switch (behavior)
+    {
+    case SR_BEHAVIOR_END:
+      break;
+    case SR_BEHAVIOR_X:
+      ls->sw_if_index = sw_if_index;
+      clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t));
+      break;
+    case SR_BEHAVIOR_DX4:
+      ls->sw_if_index = sw_if_index;
+      clib_memcpy (&ls->next_hop.ip4, &nh_addr->ip4, sizeof (ip4_address_t));
+      break;
+    case SR_BEHAVIOR_DX6:
+      ls->sw_if_index = sw_if_index;
+      clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t));
+      break;
+    case SR_BEHAVIOR_DT6:
+      ls->vrf_index = sw_if_index;
+      break;
+    case SR_BEHAVIOR_DX2:
+      ls->sw_if_index = sw_if_index;
+      ls->vlan_index = vlan_index;
+      break;
+    }
+
+  /* Figure out the adjacency magic for Xconnect variants */
+  if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4
+      || ls->behavior == SR_BEHAVIOR_DX6)
+    {
+      adj_index_t nh_adj_index = ADJ_INDEX_INVALID;
+
+      /* Retrieve the adjacency corresponding to the (OIF, next_hop) */
+      if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X)
+	nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
+					    nh_addr, sw_if_index);
+
+      else if (ls->behavior == SR_BEHAVIOR_DX4)
+	nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4,
+					    nh_addr, sw_if_index);
+
+      /* Check for ADJ creation error. If so panic */
+      if (nh_adj_index == ADJ_INDEX_INVALID)
+	{
+	  pool_put (sm->localsids, ls);
+	  return -5;
+	}
+
+      ls->nh_adj = nh_adj_index;
+    }
+
+  /* Set DPO */
+  if (ls->behavior == SR_BEHAVIOR_END || ls->behavior == SR_BEHAVIOR_X)
+    dpo_set (&dpo, sr_localsid_dpo_type, DPO_PROTO_IP6, ls - sm->localsids);
+  else if (ls->behavior > SR_BEHAVIOR_D_FIRST
+	   && ls->behavior < SR_BEHAVIOR_LAST)
+    dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids);
+  else if (ls->behavior >= SR_BEHAVIOR_LAST)
+    {
+      sr_localsid_fn_registration_t *plugin = 0;
+      plugin = pool_elt_at_index (sm->plugin_functions,
+				  ls->behavior - SR_BEHAVIOR_LAST);
+      /* Copy the unformat memory result */
+      ls->plugin_mem = ls_plugin_mem;
+      /* Callback plugin creation function */
+      rv = plugin->creation (ls);
+      if (rv)
+	{
+	  pool_put (sm->localsids, ls);
+	  return -6;
+	}
+      dpo_set (&dpo, plugin->dpo, DPO_PROTO_IP6, ls - sm->localsids);
+    }
+
+  /* Set hash key for searching localsid by address */
+  mhash_set (&sm->sr_localsids_index_hash, localsid_addr, ls - sm->localsids,
+	     NULL);
+
+  fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_SR,
+				   FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+  dpo_reset (&dpo);
+
+  /* Set counter to zero */
+  vlib_validate_combined_counter (&(sm->sr_ls_valid_counters),
+				  ls - sm->localsids);
+  vlib_validate_combined_counter (&(sm->sr_ls_invalid_counters),
+				  ls - sm->localsids);
+
+  vlib_zero_combined_counter (&(sm->sr_ls_valid_counters),
+			      ls - sm->localsids);
+  vlib_zero_combined_counter (&(sm->sr_ls_invalid_counters),
+			      ls - sm->localsids);
+
+  return 0;
+}
+
+/**
+ * @brief SR LocalSID CLI function.
+ *
+ * @see sr_cli_localsid
+ */
+static clib_error_t *
+sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
+			    vlib_cli_command_t * cmd)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  ip6_sr_main_t *sm = &sr_main;
+  u32 sw_if_index = (u32) ~ 0, vlan_index = (u32) ~ 0, fib_index = 0;
+  int is_del = 0;
+  int end_psp = 0;
+  ip6_address_t resulting_address;
+  ip46_address_t next_hop;
+  char address_set = 0;
+  char behavior = 0;
+  void *ls_plugin_mem = 0;
+
+  int rv;
+
+  memset (&resulting_address, 0, sizeof (ip6_address_t));
+  ip46_address_reset (&next_hop);
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "del"))
+	is_del = 1;
+      else if (!address_set
+	       && unformat (input, "address %U", unformat_ip6_address,
+			    &resulting_address))
+	address_set = 1;
+      else if (!address_set
+	       && unformat (input, "addr %U", unformat_ip6_address,
+			    &resulting_address))
+	address_set = 1;
+      else if (unformat (input, "fib-table %u", &fib_index));
+      else if (vlan_index == (u32) ~ 0
+	       && unformat (input, "vlan %u", &vlan_index));
+      else if (!behavior && unformat (input, "behavior"))
+	{
+	  if (unformat (input, "end.x %U %U",
+			unformat_vnet_sw_interface, vnm, &sw_if_index,
+			unformat_ip6_address, &next_hop.ip6))
+	    behavior = SR_BEHAVIOR_X;
+	  else if (unformat (input, "end.dx6 %U %U",
+			     unformat_vnet_sw_interface, vnm, &sw_if_index,
+			     unformat_ip6_address, &next_hop.ip6))
+	    behavior = SR_BEHAVIOR_DX6;
+	  else if (unformat (input, "end.dx4 %U %U",
+			     unformat_vnet_sw_interface, vnm, &sw_if_index,
+			     unformat_ip4_address, &next_hop.ip4))
+	    behavior = SR_BEHAVIOR_DX4;
+	  else if (unformat (input, "end.dx2 %U",
+			     unformat_vnet_sw_interface, vnm, &sw_if_index))
+	    behavior = SR_BEHAVIOR_DX2;
+	  else if (unformat (input, "end.dt6 %u", &sw_if_index))
+	    behavior = SR_BEHAVIOR_DT6;
+	  else if (unformat (input, "end.dt4 %u", &sw_if_index))
+	    behavior = SR_BEHAVIOR_DT4;
+	  else
+	    {
+	      /* Loop over all the plugin behavior format functions */
+	      sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0;
+	      sr_localsid_fn_registration_t **plugin_it = 0;
+
+	      /* Create a vector out of the plugin pool as recommended */
+        /* *INDENT-OFF* */
+        pool_foreach (plugin, sm->plugin_functions,
+        {
+          vec_add1 (vec_plugins, plugin);
+        });
+        /* *INDENT-ON* */
+
+	      vec_foreach (plugin_it, vec_plugins)
+	      {
+		if (unformat
+		    (input, "%U", (*plugin_it)->ls_unformat, &ls_plugin_mem))
+		  {
+		    behavior = (*plugin_it)->sr_localsid_function_number;
+		    break;
+		  }
+	      }
+	    }
+
+	  if (!behavior)
+	    {
+	      if (unformat (input, "end"))
+		behavior = SR_BEHAVIOR_END;
+	      else
+		break;
+	    }
+	}
+      else if (!end_psp && unformat (input, "psp"))
+	end_psp = 1;
+      else
+	break;
+    }
+
+  if (!behavior && end_psp)
+    behavior = SR_BEHAVIOR_END;
+
+  if (!address_set)
+    return clib_error_return (0,
+			      "Error: SRv6 LocalSID address is mandatory.");
+  if (!is_del && !behavior)
+    return clib_error_return (0,
+			      "Error: SRv6 LocalSID behavior is mandatory.");
+  if (vlan_index != (u32) ~ 0)
+    return clib_error_return (0,
+			      "Error: SRv6 End.DX2 with rewrite VLAN tag not supported by now.");
+  if (end_psp && !(behavior == SR_BEHAVIOR_END || behavior == SR_BEHAVIOR_X))
+    return clib_error_return (0,
+			      "Error: SRv6 PSP only compatible with End and End.X");
+
+  rv = sr_cli_localsid (is_del, &resulting_address, end_psp, behavior,
+			sw_if_index, vlan_index, fib_index, &next_hop,
+			ls_plugin_mem);
+
+  switch (rv)
+    {
+    case 0:
+      break;
+    case 1:
+      return 0;
+    case -1:
+      return clib_error_return (0,
+				"Identical localsid already exists. Requested localsid not created.");
+    case -2:
+      return clib_error_return (0,
+				"The requested localsid could not be deleted. SR localsid not found");
+    case -3:
+      return clib_error_return (0, "FIB table %u does not exist", fib_index);
+    case -4:
+      return clib_error_return (0, "There is already one FIB entry for the"
+				"requested localsid non segment routing related");
+    case -5:
+      return clib_error_return (0,
+				"Could not create ARP/ND entry for such next_hop. Internal error.");
+    case -6:
+      return clib_error_return (0,
+				"Error on the plugin based localsid creation.");
+    default:
+      return clib_error_return (0, "BUG: sr localsid returns %d", rv);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_localsid_command, static) = {
+  .path = "sr localsid",
+  .short_help = "sr localsid (del) address XX:XX::YY:YY"
+      "(fib-table 8) behavior STRING",
+  .long_help =
+    "Create SR LocalSID and binds it to a particular behavior\n"
+    "Arguments:\n"
+    "\tlocalSID IPv6_addr(128b)   LocalSID IPv6 address\n"
+    "\t(fib-table X)              Optional. VRF where to install SRv6 localsid\n"
+    "\tbehavior STRING            Specifies the behavior\n"
+    "\n\tBehaviors:\n"
+    "\tEnd\t-> Endpoint.\n"
+    "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n"
+    "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+    "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n"
+    "\t\tParameters: '<iface>'\n"
+    "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n"
+    "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+    "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n"
+    "\t\tParameters: '<iface> <ip4_next_hop>'\n"
+    "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n"
+    "\t\tParameters: '<ip6_fib_table>'\n"
+    "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n"
+    "\t\tParameters: '<ip4_fib_table>'\n",
+  .function = sr_cli_localsid_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI function to 'show' all SR LocalSIDs on console.
+ */
+static clib_error_t *
+show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
+			     vlib_cli_command_t * cmd)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  ip6_sr_main_t *sm = &sr_main;
+  ip6_sr_localsid_t **localsid_list = 0;
+  ip6_sr_localsid_t *ls;
+  int i;
+
+  vlib_cli_output (vm, "SRv6 - My LocalSID Table:");
+  vlib_cli_output (vm, "=========================");
+  /* *INDENT-OFF* */
+  pool_foreach (ls, sm->localsids, ({ vec_add1 (localsid_list, ls); }));
+  /* *INDENT-ON* */
+  for (i = 0; i < vec_len (localsid_list); i++)
+    {
+      ls = localsid_list[i];
+      switch (ls->behavior)
+	{
+	case SR_BEHAVIOR_END:
+	  vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd",
+			   format_ip6_address, &ls->localsid);
+	  break;
+	case SR_BEHAVIOR_X:
+	  vlib_cli_output (vm,
+			   "\tAddress: \t%U\n\tBehavior: \tX (Endpoint with Layer-3 cross-connect)"
+			   "\n\tIface:  \t%U\n\tNext hop: \t%U",
+			   format_ip6_address, &ls->localsid,
+			   format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
+			   format_ip6_address, &ls->next_hop.ip6);
+	  break;
+	case SR_BEHAVIOR_DX4:
+	  vlib_cli_output (vm,
+			   "\tAddress: \t%U\n\tBehavior: \tDX4 (Endpoint with decapsulation and IPv4 cross-connect)"
+			   "\n\tIface:  \t%U\n\tNext hop: \t%U",
+			   format_ip6_address, &ls->localsid,
+			   format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
+			   format_ip4_address, &ls->next_hop.ip4);
+	  break;
+	case SR_BEHAVIOR_DX6:
+	  vlib_cli_output (vm,
+			   "\tAddress: \t%U\n\tBehavior: \tDX6 (Endpoint with decapsulation and IPv6 cross-connect)"
+			   "\n\tIface:  \t%U\n\tNext hop: \t%U",
+			   format_ip6_address, &ls->localsid,
+			   format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
+			   format_ip6_address, &ls->next_hop.ip6);
+	  break;
+	case SR_BEHAVIOR_DX2:
+	  if (ls->vlan_index == (u32) ~ 0)
+	    vlib_cli_output (vm,
+			     "\tAddress: \t%U\n\tBehavior: \tDX2 (Endpoint with decapulation and Layer-2 cross-connect)"
+			     "\n\tIface:  \t%U", format_ip6_address,
+			     &ls->localsid, format_vnet_sw_if_index_name, vnm,
+			     ls->sw_if_index);
+	  else
+	    vlib_cli_output (vm,
+			     "Unsupported yet. (DX2 with egress VLAN rewrite)");
+	  break;
+	case SR_BEHAVIOR_DT6:
+	  vlib_cli_output (vm,
+			   "\tAddress: \t%U\n\tBehavior: \tDT6 (Endpoint with decapsulation and specific IPv6 table lookup)"
+			   "\n\tTable: %u", format_ip6_address, &ls->localsid,
+			   ls->fib_table);
+	  break;
+	case SR_BEHAVIOR_DT4:
+	  vlib_cli_output (vm,
+			   "\tAddress: \t%U\n\tBehavior: \tDT4 (Endpoint with decapsulation and specific IPv4 table lookup)"
+			   "\n\tTable: \t%u", format_ip6_address,
+			   &ls->localsid, ls->fib_table);
+	  break;
+	default:
+	  if (ls->behavior >= SR_BEHAVIOR_LAST)
+	    {
+	      sr_localsid_fn_registration_t *plugin =
+		pool_elt_at_index (sm->plugin_functions,
+				   ls->behavior - SR_BEHAVIOR_LAST);
+
+	      vlib_cli_output (vm, "\tAddress: \t%U\n"
+			       "\tBehavior: \t%s (%s)\n\t%U",
+			       format_ip6_address, &ls->localsid,
+			       plugin->keyword_str, plugin->def_str,
+			       plugin->ls_format, ls->plugin_mem);
+	    }
+	  else
+	    //Should never get here...
+	    vlib_cli_output (vm, "Internal error");
+	  break;
+	}
+      if (ls->end_psp)
+	vlib_cli_output (vm, "\tPSP: \tTrue\n");
+
+      /* Print counters */
+      vlib_counter_t valid, invalid;
+      vlib_get_combined_counter (&(sm->sr_ls_valid_counters), i, &valid);
+      vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), i, &invalid);
+      vlib_cli_output (vm, "\tGood traffic: \t[%Ld packets : %Ld bytes]\n",
+		       valid.packets, valid.bytes);
+      vlib_cli_output (vm, "\tBad traffic:  \t[%Ld packets : %Ld bytes]\n",
+		       invalid.packets, invalid.bytes);
+      vlib_cli_output (vm, "--------------------");
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_localsid_command, static) = {
+  .path = "show sr localsids",
+  .short_help = "show sr localsids",
+  .function = show_sr_localsid_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Function to 'clear' ALL SR localsid counters
+ */
+static clib_error_t *
+clear_sr_localsid_counters_command_fn (vlib_main_t * vm,
+				       unformat_input_t * input,
+				       vlib_cli_command_t * cmd)
+{
+  ip6_sr_main_t *sm = &sr_main;
+
+  vlib_clear_combined_counters (&(sm->sr_ls_valid_counters));
+  vlib_clear_combined_counters (&(sm->sr_ls_invalid_counters));
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = {
+  .path = "clear sr localsid counters",
+  .short_help = "clear sr localsid counters",
+  .function = clear_sr_localsid_counters_command_fn,
+};
+/* *INDENT-ON* */
+
+/************************ SR LocalSID graphs node ****************************/
+/**
+ * @brief SR localsid node trace
+ */
+typedef struct
+{
+  u32 localsid_index;
+  ip6_address_t src, out_dst;
+  u8 sr[256];
+  u8 num_segments;
+  u8 segments_left;
+  //With SRv6 header update include flags here.
+} sr_localsid_trace_t;
+
+#define foreach_sr_localsid_error                                   \
+_(NO_INNER_HEADER, "(SR-Error) No inner IP header")                 \
+_(NO_MORE_SEGMENTS, "(SR-Error) No more segments")                  \
+_(NO_SRH, "(SR-Error) No SR header")                                \
+_(NO_PSP, "(SR-Error) PSP Not available (segments left > 0)")       \
+_(NOT_LS, "(SR-Error) Decaps not available (segments left > 0)")    \
+_(L2, "(SR-Error) SRv6 decapsulated a L2 frame without dest")
+
+typedef enum
+{
+#define _(sym,str) SR_LOCALSID_ERROR_##sym,
+  foreach_sr_localsid_error
+#undef _
+    SR_LOCALSID_N_ERROR,
+} sr_localsid_error_t;
+
+static char *sr_localsid_error_strings[] = {
+#define _(sym,string) string,
+  foreach_sr_localsid_error
+#undef _
+};
+
+#define foreach_sr_localsid_next        \
+_(ERROR, "error-drop")                  \
+_(IP6_LOOKUP, "ip6-lookup")             \
+_(IP4_LOOKUP, "ip4-lookup")             \
+_(IP6_REWRITE, "ip6-rewrite")           \
+_(IP4_REWRITE, "ip4-rewrite")           \
+_(INTERFACE_OUTPUT, "interface-output")
+
+typedef enum
+{
+#define _(s,n) SR_LOCALSID_NEXT_##s,
+  foreach_sr_localsid_next
+#undef _
+    SR_LOCALSID_N_NEXT,
+} sr_localsid_next_t;
+
+/**
+ * @brief SR LocalSID graph node trace function
+ *
+ * @see sr_localsid
+ */
+u8 *
+format_sr_localsid_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  ip6_sr_main_t *sm = &sr_main;
+  sr_localsid_trace_t *t = va_arg (*args, sr_localsid_trace_t *);
+
+  ip6_sr_localsid_t *ls =
+    pool_elt_at_index (sm->localsids, t->localsid_index);
+
+  s =
+    format (s, "SR-LOCALSID:\n\tLocalsid: %U\n", format_ip6_address,
+	    &ls->localsid);
+  switch (ls->behavior)
+    {
+    case SR_BEHAVIOR_END:
+      s = format (s, "\tBehavior: End\n");
+      break;
+    case SR_BEHAVIOR_DX6:
+      s = format (s, "\tBehavior: Decapsulation with IPv6 L3 xconnect\n");
+      break;
+    case SR_BEHAVIOR_DX4:
+      s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n");
+      break;
+    case SR_BEHAVIOR_X:
+      s = format (s, "\tBehavior: IPv6 L3 xconnect\n");
+      break;
+    case SR_BEHAVIOR_DT6:
+      s = format (s, "\tBehavior: Decapsulation with IPv6 Table lookup\n");
+      break;
+    case SR_BEHAVIOR_DT4:
+      s = format (s, "\tBehavior: Decapsulation with IPv4 Table lookup\n");
+      break;
+    case SR_BEHAVIOR_DX2:
+      s = format (s, "\tBehavior: Decapsulation with L2 xconnect\n");
+      break;
+    default:
+      s = format (s, "\tBehavior: defined in plugin\n");	//TODO
+      break;
+    }
+  if (t->num_segments != 0xFF)
+    {
+      if (t->num_segments > 0)
+	{
+	  s = format (s, "\tSegments left: %d\n", t->num_segments);
+	  s = format (s, "\tSID list: [in ietf order]");
+	  int i = 0;
+	  for (i = 0; i < t->num_segments; i++)
+	    {
+	      s = format (s, "\n\t-> %U", format_ip6_address,
+			  (ip6_address_t *) & t->sr[i *
+						    sizeof (ip6_address_t)]);
+	    }
+	}
+    }
+  return s;
+}
+
+/**
+ * @brief Function doing End processing.
+ */
+static_always_inline void
+end_srh_processing (vlib_node_runtime_t * node,
+		    vlib_buffer_t * b0,
+		    ip6_header_t * ip0,
+		    ip6_sr_header_t * sr0,
+		    ip6_sr_localsid_t * ls0, u32 * next0)
+{
+  ip6_address_t *new_dst0;
+
+  if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
+    {
+      if (PREDICT_TRUE (sr0->segments_left != 0))
+	{
+	  sr0->segments_left -= 1;
+	  new_dst0 = (ip6_address_t *) (sr0->segments);
+	  new_dst0 += sr0->segments_left;
+	  ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+	  ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+
+	  if (ls0->behavior == SR_BEHAVIOR_X)
+	    {
+	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+	      *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+	    }
+	}
+      else
+	{
+	  *next0 = SR_LOCALSID_NEXT_ERROR;
+	  b0->error = node->errors[SR_LOCALSID_ERROR_NO_MORE_SEGMENTS];
+	}
+    }
+  else
+    {
+      /* Error. Routing header of type != SR */
+      *next0 = SR_LOCALSID_NEXT_ERROR;
+      b0->error = node->errors[SR_LOCALSID_ERROR_NO_SRH];
+    }
+}
+
+/*
+ * @brief Function doing SRH processing for D* variants
+ */
+//FixME. I must crosscheck that next_proto matches the localsid
+static_always_inline void
+end_decaps_srh_processing (vlib_node_runtime_t * node,
+			   vlib_buffer_t * b0,
+			   ip6_header_t * ip0,
+			   ip6_sr_header_t * sr0,
+			   ip6_sr_localsid_t * ls0, u32 * next0)
+{
+  /* Compute the size of the IPv6 header with all Ext. headers */
+  u8 next_proto;
+  ip6_ext_header_t *next_ext_header;
+  u16 total_size = 0;
+
+  next_proto = ip0->protocol;
+  next_ext_header = (void *) (ip0 + 1);
+  total_size = sizeof (ip6_header_t);
+  while (ip6_ext_hdr (next_proto))
+    {
+      total_size += ip6_ext_header_len (next_ext_header);
+      next_proto = next_ext_header->next_hdr;
+      next_ext_header = ip6_ext_next_header (next_ext_header);
+    }
+
+  /* Ensure this is the last segment. Otherwise drop. */
+  if (sr0 && sr0->segments_left != 0)
+    {
+      *next0 = SR_LOCALSID_NEXT_ERROR;
+      b0->error = node->errors[SR_LOCALSID_ERROR_NOT_LS];
+      return;
+    }
+
+  switch (next_proto)
+    {
+    case IP_PROTOCOL_IPV6:
+      /* Encap-End IPv6. Pop outer IPv6 header. */
+      if (ls0->behavior == SR_BEHAVIOR_DX6)
+	{
+	  vlib_buffer_advance (b0, total_size);
+	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+	  *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+	  return;
+	}
+      else if (ls0->behavior == SR_BEHAVIOR_DT6)
+	{
+	  vlib_buffer_advance (b0, total_size);
+	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table;
+	  return;
+	}
+      break;
+    case IP_PROTOCOL_IP_IN_IP:
+      /* Encap-End IPv4. Pop outer IPv6 header */
+      if (ls0->behavior == SR_BEHAVIOR_DX4)
+	{
+	  vlib_buffer_advance (b0, total_size);
+	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+	  *next0 = SR_LOCALSID_NEXT_IP4_REWRITE;
+	  return;
+	}
+      else if (ls0->behavior == SR_BEHAVIOR_DT4)
+	{
+	  vlib_buffer_advance (b0, total_size);
+	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table;
+	  *next0 = SR_LOCALSID_NEXT_IP4_LOOKUP;
+	  return;
+	}
+      break;
+    case IP_PROTOCOL_IP6_NONXT:
+      /* L2 encaps */
+      if (ls0->behavior == SR_BEHAVIOR_DX2)
+	{
+	  vlib_buffer_advance (b0, total_size);
+	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->sw_if_index;
+	  *next0 = SR_LOCALSID_NEXT_INTERFACE_OUTPUT;
+	  return;
+	}
+      break;
+    }
+  *next0 = SR_LOCALSID_NEXT_ERROR;
+  b0->error = node->errors[SR_LOCALSID_ERROR_NO_INNER_HEADER];
+  return;
+}
+
+/**
+ * @brief Function doing End processing with PSP
+ */
+static_always_inline void
+end_psp_srh_processing (vlib_node_runtime_t * node,
+			vlib_buffer_t * b0,
+			ip6_header_t * ip0,
+			ip6_ext_header_t * prev0,
+			ip6_sr_header_t * sr0,
+			ip6_sr_localsid_t * ls0, u32 * next0)
+{
+  u32 new_l0, sr_len;
+  u64 *copy_dst0, *copy_src0;
+  u32 copy_len_u64s0 = 0;
+  int i;
+
+  if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
+    {
+      if (PREDICT_TRUE (sr0->segments_left == 1))
+	{
+	  ip0->dst_address.as_u64[0] = sr0->segments->as_u64[0];
+	  ip0->dst_address.as_u64[1] = sr0->segments->as_u64[1];
+
+	  /* Remove the SRH taking care of the rest of IPv6 ext header */
+	  if (prev0)
+	    prev0->next_hdr = sr0->protocol;
+	  else
+	    ip0->protocol = sr0->protocol;
+
+	  sr_len = ip6_ext_header_len (sr0);
+	  vlib_buffer_advance (b0, sr_len);
+	  new_l0 = clib_net_to_host_u16 (ip0->payload_length) - sr_len;
+	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
+	  copy_src0 = (u64 *) ip0;
+	  copy_dst0 = copy_src0 + (sr0->length + 1);
+	  /* number of 8 octet units to copy
+	   * By default in absence of extension headers it is equal to length of ip6 header
+	   * With extension headers it number of 8 octet units of ext headers preceding
+	   * SR header
+	   */
+	  copy_len_u64s0 =
+	    (((u8 *) sr0 - (u8 *) ip0) - sizeof (ip6_header_t)) >> 3;
+	  copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+	  copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+	  copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+	  copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+	  copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
+
+	  for (i = copy_len_u64s0 - 1; i >= 0; i--)
+	    {
+	      copy_dst0[i] = copy_src0[i];
+	    }
+
+	  if (ls0->behavior == SR_BEHAVIOR_X)
+	    {
+	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+	      *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+	    }
+	  return;
+	}
+    }
+  /* Error. Routing header of type != SR */
+  *next0 = SR_LOCALSID_NEXT_ERROR;
+  b0->error = node->errors[SR_LOCALSID_ERROR_NO_PSP];
+}
+
+/**
+ * @brief SR LocalSID graph node. Supports all default SR Endpoint variants
+ */
+static uword
+sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+		  vlib_frame_t * from_frame)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  ip6_sr_main_t *sm = &sr_main;
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+  next_index = node->cached_next_index;
+  u32 thread_index = vlib_get_thread_index ();
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      /* Quad - Loop */
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+	{
+	  u32 bi0, bi1, bi2, bi3;
+	  vlib_buffer_t *b0, *b1, *b2, *b3;
+	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
+	  ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
+	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+	  u32 next0, next1, next2, next3;
+	  next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+	  ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
+	    p6 = vlib_get_buffer (vm, from[6]);
+	    p7 = vlib_get_buffer (vm, from[7]);
+
+	    /* Prefetch the buffer header and packet for the N+4 loop iteration */
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_header (p6, LOAD);
+	    vlib_prefetch_buffer_header (p7, LOAD);
+
+	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+	  }
+
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
+
+	  ls0 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ls1 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ls2 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ls3 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+	  ip0 = vlib_buffer_get_current (b0);
+	  ip1 = vlib_buffer_get_current (b1);
+	  ip2 = vlib_buffer_get_current (b2);
+	  ip3 = vlib_buffer_get_current (b3);
+
+	  ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+	  ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE);
+	  ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE);
+	  ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE);
+
+	  end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+	  end_decaps_srh_processing (node, b1, ip1, sr1, ls1, &next1);
+	  end_decaps_srh_processing (node, b2, ip2, sr2, ls2, &next2);
+	  end_decaps_srh_processing (node, b3, ip3, sr3, ls3, &next3);
+
+	  //TODO: trace.
+
+	  vlib_increment_combined_counter
+	    (((next0 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
+
+	  vlib_increment_combined_counter
+	    (((next1 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b1));
+
+	  vlib_increment_combined_counter
+	    (((next2 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b2));
+
+	  vlib_increment_combined_counter
+	    (((next3 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b3));
+
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, bi1, bi2, bi3,
+					   next0, next1, next2, next3);
+	}
+
+      /* Single loop for potentially the last three packets */
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  ip6_header_t *ip0;
+	  ip6_ext_header_t *prev0;
+	  ip6_sr_header_t *sr0;
+	  u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+	  ip6_sr_localsid_t *ls0;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  ip0 = vlib_buffer_get_current (b0);
+
+	  /* Lookup the SR End behavior based on IP DA (adj) */
+	  ls0 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+	  /* Find SRH as well as previous header */
+	  ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+
+	  /* SRH processing and End variants */
+	  end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      sr_localsid_trace_t *tr =
+		vlib_add_trace (vm, node, b0, sizeof (*tr));
+	      tr->num_segments = 0;
+	      tr->localsid_index = ls0 - sm->localsids;
+
+	      if (ip0 == vlib_buffer_get_current (b0))
+		{
+		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8,
+			       sizeof (tr->out_dst.as_u8));
+		  if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
+		      && sr0->type == ROUTING_HEADER_TYPE_SR)
+		    {
+		      clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
+		      tr->num_segments =
+			sr0->length * 8 / sizeof (ip6_address_t);
+		      tr->segments_left = sr0->segments_left;
+		    }
+		}
+	      else
+		tr->num_segments = 0xFF;
+	    }
+
+	  /* Increase the counters */
+	  vlib_increment_combined_counter
+	    (((next0 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+  return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_localsid_d_node) = {
+  .function = sr_localsid_d_fn,
+  .name = "sr-localsid-d",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sr_localsid_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = SR_LOCALSID_N_ERROR,
+  .error_strings = sr_localsid_error_strings,
+  .n_next_nodes = SR_LOCALSID_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [SR_LOCALSID_NEXT_##s] = n,
+    foreach_sr_localsid_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief SR LocalSID graph node. Supports all default SR Endpoint variants
+ */
+static uword
+sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+		vlib_frame_t * from_frame)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  ip6_sr_main_t *sm = &sr_main;
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+  next_index = node->cached_next_index;
+  u32 thread_index = vlib_get_thread_index ();
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      /* Quad - Loop */
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+	{
+	  u32 bi0, bi1, bi2, bi3;
+	  vlib_buffer_t *b0, *b1, *b2, *b3;
+	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
+	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+	  ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
+	  u32 next0, next1, next2, next3;
+	  next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+	  ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
+	    p6 = vlib_get_buffer (vm, from[6]);
+	    p7 = vlib_get_buffer (vm, from[7]);
+
+	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_header (p6, LOAD);
+	    vlib_prefetch_buffer_header (p7, LOAD);
+
+	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+	  }
+
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
+
+	  ip0 = vlib_buffer_get_current (b0);
+	  ip1 = vlib_buffer_get_current (b1);
+	  ip2 = vlib_buffer_get_current (b2);
+	  ip3 = vlib_buffer_get_current (b3);
+
+	  ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+	  ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE);
+	  ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE);
+	  ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE);
+
+	  ls0 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ls1 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ls2 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ls3 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+	  if (ls0->end_psp)
+	    end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0);
+	  else
+	    end_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+
+	  if (ls1->end_psp)
+	    end_psp_srh_processing (node, b1, ip1, prev1, sr1, ls1, &next1);
+	  else
+	    end_srh_processing (node, b1, ip1, sr1, ls1, &next1);
+
+	  if (ls2->end_psp)
+	    end_psp_srh_processing (node, b2, ip2, prev2, sr2, ls2, &next2);
+	  else
+	    end_srh_processing (node, b2, ip2, sr2, ls2, &next2);
+
+	  if (ls3->end_psp)
+	    end_psp_srh_processing (node, b3, ip3, prev3, sr3, ls3, &next3);
+	  else
+	    end_srh_processing (node, b3, ip3, sr3, ls3, &next3);
+
+	  //TODO: proper trace.
+
+	  vlib_increment_combined_counter
+	    (((next0 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
+
+	  vlib_increment_combined_counter
+	    (((next1 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b1));
+
+	  vlib_increment_combined_counter
+	    (((next2 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b2));
+
+	  vlib_increment_combined_counter
+	    (((next3 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b3));
+
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, bi1, bi2, bi3,
+					   next0, next1, next2, next3);
+	}
+
+      /* Single loop for potentially the last three packets */
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  ip6_header_t *ip0 = 0;
+	  ip6_ext_header_t *prev0;
+	  ip6_sr_header_t *sr0;
+	  u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+	  ip6_sr_localsid_t *ls0;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  ip0 = vlib_buffer_get_current (b0);
+	  ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+
+	  /* Lookup the SR End behavior based on IP DA (adj) */
+	  ls0 =
+	    pool_elt_at_index (sm->localsids,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+	  /* SRH processing */
+	  if (ls0->end_psp)
+	    end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0);
+	  else
+	    end_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      sr_localsid_trace_t *tr =
+		vlib_add_trace (vm, node, b0, sizeof (*tr));
+	      tr->num_segments = 0;
+	      tr->localsid_index = ls0 - sm->localsids;
+
+	      if (ip0 == vlib_buffer_get_current (b0))
+		{
+		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8,
+			       sizeof (tr->out_dst.as_u8));
+		  if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
+		      && sr0->type == ROUTING_HEADER_TYPE_SR)
+		    {
+		      clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
+		      tr->num_segments =
+			sr0->length * 8 / sizeof (ip6_address_t);
+		      tr->segments_left = sr0->segments_left;
+		    }
+		}
+	      else
+		{
+		  tr->num_segments = 0xFF;
+		}
+	    }
+
+	  vlib_increment_combined_counter
+	    (((next0 ==
+	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+  return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_localsid_node) = {
+  .function = sr_localsid_fn,
+  .name = "sr-localsid",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sr_localsid_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = SR_LOCALSID_N_ERROR,
+  .error_strings = sr_localsid_error_strings,
+  .n_next_nodes = SR_LOCALSID_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [SR_LOCALSID_NEXT_##s] = n,
+    foreach_sr_localsid_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_sr_dpo (u8 * s, va_list * args)
+{
+  index_t index = va_arg (*args, index_t);
+  CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+  return (format (s, "SR: localsid_index:[%d]", index));
+}
+
+const static dpo_vft_t sr_loc_vft = {
+  .dv_lock = sr_dpo_lock,
+  .dv_unlock = sr_dpo_unlock,
+  .dv_format = format_sr_dpo,
+};
+
+const static char *const sr_loc_ip6_nodes[] = {
+  "sr-localsid",
+  NULL,
+};
+
+const static char *const *const sr_loc_nodes[DPO_PROTO_NUM] = {
+  [DPO_PROTO_IP6] = sr_loc_ip6_nodes,
+};
+
+const static char *const sr_loc_d_ip6_nodes[] = {
+  "sr-localsid-d",
+  NULL,
+};
+
+const static char *const *const sr_loc_d_nodes[DPO_PROTO_NUM] = {
+  [DPO_PROTO_IP6] = sr_loc_d_ip6_nodes,
+};
+
+
+/*************************** SR LocalSID plugins ******************************/
+/**
+ * @brief SR LocalSID plugin registry
+ */
+int
+sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name,
+			       u8 * keyword_str, u8 * def_str,
+			       u8 * params_str, dpo_type_t * dpo,
+			       format_function_t * ls_format,
+			       unformat_function_t * ls_unformat,
+			       sr_plugin_callback_t * creation_fn,
+			       sr_plugin_callback_t * removal_fn)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  uword *p;
+
+  sr_localsid_fn_registration_t *plugin;
+
+  /* Did this function exist? If so update it */
+  p = hash_get_mem (sm->plugin_functions_by_key, fn_name);
+  if (p)
+    {
+      plugin = pool_elt_at_index (sm->plugin_functions, p[0]);
+    }
+  /* Else create a new one and set hash key */
+  else
+    {
+      pool_get (sm->plugin_functions, plugin);
+      hash_set_mem (sm->plugin_functions_by_key, fn_name,
+		    plugin - sm->plugin_functions);
+    }
+
+  memset (plugin, 0, sizeof (*plugin));
+
+  plugin->sr_localsid_function_number = (plugin - sm->plugin_functions);
+  plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST;
+  plugin->ls_format = ls_format;
+  plugin->ls_unformat = ls_unformat;
+  plugin->creation = creation_fn;
+  plugin->removal = removal_fn;
+  clib_memcpy (&plugin->dpo, dpo, sizeof (dpo_type_t));
+  plugin->function_name = format (0, "%s%c", fn_name, 0);
+  plugin->keyword_str = format (0, "%s%c", keyword_str, 0);
+  plugin->def_str = format (0, "%s%c", def_str, 0);
+  plugin->params_str = format (0, "%s%c", params_str, 0);
+
+  return plugin->sr_localsid_function_number;
+}
+
+/**
+ * @brief CLI function to 'show' all available SR LocalSID behaviors
+ */
+static clib_error_t *
+show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
+				       unformat_input_t * input,
+				       vlib_cli_command_t * cmd)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  sr_localsid_fn_registration_t *plugin;
+  sr_localsid_fn_registration_t **plugins_vec = 0;
+  int i;
+
+  vlib_cli_output (vm,
+		   "SR LocalSIDs behaviors:\n-----------------------\n\n");
+
+  /* *INDENT-OFF* */
+  pool_foreach (plugin, sm->plugin_functions,
+    ({ vec_add1 (plugins_vec, plugin); }));
+  /* *INDENT-ON* */
+
+  /* Print static behaviors */
+  vlib_cli_output (vm, "Default behaviors:\n"
+		   "\tEnd\t-> Endpoint.\n"
+		   "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n"
+		   "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+		   "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n"
+		   "\t\tParameters: '<iface>'\n"
+		   "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n"
+		   "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+		   "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n"
+		   "\t\tParameters: '<iface> <ip4_next_hop>'\n"
+		   "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n"
+		   "\t\tParameters: '<ip6_fib_table>'\n"
+		   "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n"
+		   "\t\tParameters: '<ip4_fib_table>'\n");
+  vlib_cli_output (vm, "Plugin behaviors:\n");
+  for (i = 0; i < vec_len (plugins_vec); i++)
+    {
+      plugin = plugins_vec[i];
+      vlib_cli_output (vm, "\t%s\t-> %s.\n", plugin->keyword_str,
+		       plugin->def_str);
+      vlib_cli_output (vm, "\t\tParameters: '%s'\n", plugin->params_str);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = {
+  .path = "show sr localsids behaviors",
+  .short_help = "show sr localsids behaviors",
+  .function = show_sr_localsid_behaviors_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief SR LocalSID initialization
+ */
+clib_error_t *
+sr_localsids_init (vlib_main_t * vm)
+{
+  /* Init memory for function keys */
+  ip6_sr_main_t *sm = &sr_main;
+  mhash_init (&sm->sr_localsids_index_hash, sizeof (uword),
+	      sizeof (ip6_address_t));
+  /* Init SR behaviors DPO type */
+  sr_localsid_dpo_type = dpo_register_new_type (&sr_loc_vft, sr_loc_nodes);
+  /* Init SR behaviors DPO type */
+  sr_localsid_d_dpo_type =
+    dpo_register_new_type (&sr_loc_vft, sr_loc_d_nodes);
+  /* Init memory for localsid plugins */
+  sm->plugin_functions_by_key = hash_create_string (0, sizeof (uword));
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_localsids_init);
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr_localsid.md b/src/vnet/srv6/sr_localsid.md
new file mode 100644
index 00000000..340af4a3
--- /dev/null
+++ b/src/vnet/srv6/sr_localsid.md
@@ -0,0 +1,58 @@
+# SR LocalSIDs    {#srv6_localsid_doc}
+
+A local SID is associated to a Segment Routing behavior -or function- on the current node.
+
+The most basic behavior is called END. It simply activates the next SID in the current packet, by decrementing the Segments Left value and updating the IPv6 DA.
+
+A local END SID is instantiated using the following CLI:
+
+    sr localsid (del) address XX::YY behavior end
+
+This creates a new entry in the main FIB for IPv6 address XX::YY. All packets whose IPv6 DA matches this FIB entry are redirected to the sr-localsid node, where they are processed as described above.
+
+Other examples of local SIDs are the following:
+
+    sr localsid (del) address XX::YY behavior end
+    sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a
+    sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a
+    sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1
+    sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0
+    sr localsid (del) address XX::YY behavior end.dt6 5
+    sr localsid (del) address XX::YY behavior end.dt6 5
+
+Note that all of these behaviors match the definitions of the SRv6 architecture (*draft-filsfils-spring-srv6-network-programming*). Please refer to this document for a detailed description of each behavior.
+
+Note also that you can configure the PSP flavor of the End and End.X behaviors by typing:
+    
+    sr localsid (del) address XX::YY behavior end psp
+    sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a psp
+
+Help on the available local SID behaviors and their usage can be obtained with:
+    
+    help sr localsid
+
+Alternatively they can be obtained using.
+
+    show sr localsids behavior
+
+The difference in between those two commands is that the first one will only display the SR LocalSID behaviors that are built-in VPP, while the latter will display those behaviors plus the ones added with the SR LocalSID Development Framework.
+
+
+VPP keeps a 'My LocalSID Table' where it stores all the SR local SIDs instantiated as well as their parameters. Every time a new local SID is instantiated, a new entry is added to this table. In addition, counters for correctly and incorrectly processed traffic are maintained for each local SID. The counters store both the number of packets and bytes.
+
+The contents of the 'My LocalSID Table' is shown with:
+
+    vpp# show sr localsid
+    SRv6 - My LocalSID Table:
+    =========================
+            Address:        c3::1
+            Behavior:       DX6 (Endpoint with decapsulation and IPv6 cross-connect)
+            Iface:          GigabitEthernet0/5/0
+            Next hop:       b:c3::b
+            Good traffic:   [51277 packets : 5332808 bytes]
+            Bad traffic:    [0 packets : 0 bytes]
+    --------------------
+
+The traffic counters can be reset with:
+
+    vpp# clear sr localsid counters
diff --git a/src/vnet/srv6/sr_packet.h b/src/vnet/srv6/sr_packet.h
new file mode 100755
index 00000000..7af4ad4d
--- /dev/null
+++ b/src/vnet/srv6/sr_packet.h
@@ -0,0 +1,159 @@
+#ifndef included_vnet_sr_packet_h
+#define included_vnet_sr_packet_h
+
+#include <vnet/ip/ip.h>
+
+/*
+ * ipv6 segment-routing header format
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ *   The Segment Routing Header (SRH) is defined as follows:
+ *
+ *    0                   1                   2                   3
+ *    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *   | Next Header   |  Hdr Ext Len  | Routing Type  | Segments Left |
+ *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *   | First Segment |     Flags     |           RESERVED            |
+ *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *   |                                                               |
+ *   |            Segment List[0] (128 bits IPv6 address)            |
+ *   |                                                               |
+ *   |                                                               |
+ *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *   |                                                               |
+ *   |                                                               |
+ *                                 ...
+ *   |                                                               |
+ *   |                                                               |
+ *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *   |                                                               |
+ *   |            Segment List[n] (128 bits IPv6 address)            |
+ *   |                                                               |
+ *   |                                                               |
+ *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *   //                                                             //
+ *   //         Optional Type Length Value objects (variable)       //
+ *   //                                                             //
+ *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *   where:
+ *
+ *   o  Next Header: 8-bit selector.  Identifies the type of header
+ *      immediately following the SRH.
+ *
+ *   o  Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+ *      header in 8-octet units, not including the first 8 octets.
+ *
+ *   o  Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+ *
+ *   o  Segments Left.  Defined in [RFC2460], it contains the index, in
+ *      the Segment List, of the next segment to inspect.  Segments Left
+ *      is decremented at each segment.
+ *
+ *   o  First Segment: contains the index, in the Segment List, of the
+ *      first segment of the path which is in fact the last element of the
+ *      Segment List.
+ *
+ *   o  Flags: 8 bits of flags.  Following flags are defined:
+ *
+ *         0 1 2 3 4 5 6 7
+ *        +-+-+-+-+-+-+-+-+
+ *        |U|P|O|A|H|  U  |
+ *        +-+-+-+-+-+-+-+-+
+ *
+ *        U: Unused and for future use.  SHOULD be unset on transmission
+ *        and MUST be ignored on receipt.
+ *
+ *        P-flag: Protected flag.  Set when the packet has been rerouted
+ *        through FRR mechanism by an SR endpoint node.
+ *
+ *        O-flag: OAM flag.  When set, it indicates that this packet is
+ *        an operations and management (OAM) packet.
+ *
+ *        A-flag: Alert flag.  If present, it means important Type Length
+ *        Value (TLV) objects are present.  See Section 3.1 for details
+ *        on TLVs objects.
+ *
+ *        H-flag: HMAC flag.  If set, the HMAC TLV is present and is
+ *        encoded as the last TLV of the SRH.  In other words, the last
+ *        36 octets of the SRH represent the HMAC information.  See
+ *        Section 3.1.5 for details on the HMAC TLV.
+ *
+ *   o  RESERVED: SHOULD be unset on transmission and MUST be ignored on
+ *      receipt.
+ *
+ *   o  Segment List[n]: 128 bit IPv6 addresses representing the nth
+ *      segment in the Segment List.  The Segment List is encoded starting
+ *      from the last segment of the path.  I.e., the first element of the
+ *      segment list (Segment List [0]) contains the last segment of the
+ *      path while the last segment of the Segment List (Segment List[n])
+ *      contains the first segment of the path.  The index contained in
+ *      "Segments Left" identifies the current active segment.
+ *
+ *   o  Type Length Value (TLV) are described in Section 3.1.
+ *
+ */
+
+#ifndef IPPROTO_IPV6_ROUTE
+#define IPPROTO_IPV6_ROUTE        43
+#endif
+
+#define ROUTING_HEADER_TYPE_SR    4
+
+typedef struct
+{
+  /* Protocol for next header. */
+  u8 protocol;
+  /*
+   * Length of routing header in 8 octet units,
+   * not including the first 8 octets
+   */
+  u8 length;
+
+  /* Type of routing header; type 4 = segement routing */
+  u8 type;
+
+  /* Next segment in the segment list */
+  u8 segments_left;
+
+  /* Pointer to the first segment in the header */
+  u8 first_segment;
+
+  /* Flag bits */
+#define IP6_SR_HEADER_FLAG_PROTECTED  (0x40)
+#define IP6_SR_HEADER_FLAG_OAM        (0x20)
+#define IP6_SR_HEADER_FLAG_ALERT      (0x10)
+#define IP6_SR_HEADER_FLAG_HMAC       (0x80)
+
+  /* values 0x0, 0x4 - 0x7 are reserved */
+  u8 flags;
+  u16 reserved;
+
+  /* The segment elts */
+  ip6_address_t segments[0];
+} __attribute__ ((packed)) ip6_sr_header_t;
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
+
+#endif /* included_vnet_sr_packet_h */
diff --git a/src/vnet/srv6/sr_policy.md b/src/vnet/srv6/sr_policy.md
new file mode 100644
index 00000000..521b8461
--- /dev/null
+++ b/src/vnet/srv6/sr_policy.md
@@ -0,0 +1,56 @@
+# Creating a SR Policy    {#srv6_policy_doc}
+
+An SR Policy is defined by a Binding SID and a weighted set of Segment Lists.
+
+A new SR policy is created with a first SID list using:
+
+    sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3)
+
+* The weight parameter is only used if more than one SID list is associated with the policy.
+* The fib-table parameter specifies in which table (VRF) the Binding SID is to be installed.
+
+An SR policy is deleted with:
+
+    sr policy del bsid 2001::1
+    sr policy del index 1
+
+The existing SR policies are listed with:
+
+    show sr policies
+
+## Adding/Removing SID Lists from an SR policy
+
+An additional SID list is associated with an existing SR policy with:
+
+    sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3)
+    sr policy mod index 3      add sl next A2:: next B2:: next C2:: (weight 3)
+
+Conversely, a SID list can be removed from an SR policy with:
+
+    sr policy mod bsid 2001::1 del sl index 1
+    sr policy mod index 3      del sl index 1
+
+Note that this cannot be used to remove the last SID list of a policy.
+
+The weight of a SID list can also be modified with:
+
+    sr policy mod bsid 2001::1 mod sl index 1 weight 4
+    sr policy mod index 3      mod sl index 1 weight 4
+
+## SR Policies: Spray policies
+
+Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them.
+
+SID list weights are ignored with this type of policies.
+
+A Spray policy is instantiated by appending the keyword **spray** to a regular SR policy command, as in:
+
+    sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray
+
+Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node.  
+
+## Encapsulation SR policies
+
+In case the user decides to create an SR policy an IPv6 Source Address must be specified for the encapsulated traffic. In order to do so the user might use the following command:
+    
+    set sr encaps source addr XXXX::YYYY
diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c
new file mode 100755
index 00000000..7a37a66b
--- /dev/null
+++ b/src/vnet/srv6/sr_policy_rewrite.c
@@ -0,0 +1,3227 @@
+/*
+ * sr_policy_rewrite.c: ipv6 sr policy creation
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief SR policy creation and application
+ *
+ * Create an SR policy.
+ * An SR policy can be either of 'default' type or 'spray' type
+ * An SR policy has attached a list of SID lists.
+ * In case the SR policy is a default one it will load balance among them.
+ * An SR policy has associated a BindingSID.
+ * In case any packet arrives with IPv6 DA == BindingSID then the SR policy
+ * associated to such bindingSID will be applied to such packet.
+ *
+ * SR policies can be applied either by using IPv6 encapsulation or
+ * SRH insertion. Both methods can be found on this file.
+ *
+ * Traffic input usually is IPv6 packets. However it is possible to have
+ * IPv4 packets or L2 frames. (that are encapsulated into IPv6 with SRH)
+ *
+ * This file provides the appropiates VPP graph nodes to do any of these
+ * methods.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief SR policy rewrite trace
+ */
+typedef struct
+{
+  ip6_address_t src, dst;
+} sr_policy_rewrite_trace_t;
+
+/* Graph arcs */
+#define foreach_sr_policy_rewrite_next     \
+_(IP6_LOOKUP, "ip6-lookup")         \
+_(ERROR, "error-drop")
+
+typedef enum
+{
+#define _(s,n) SR_POLICY_REWRITE_NEXT_##s,
+  foreach_sr_policy_rewrite_next
+#undef _
+    SR_POLICY_REWRITE_N_NEXT,
+} sr_policy_rewrite_next_t;
+
+/* SR rewrite errors */
+#define foreach_sr_policy_rewrite_error                     \
+_(INTERNAL_ERROR, "Segment Routing undefined error")        \
+_(BSID_ZERO, "BSID with SL = 0")                            \
+_(COUNTER_TOTAL, "SR steered IPv6 packets")                 \
+_(COUNTER_ENCAP, "SR: Encaps packets")                      \
+_(COUNTER_INSERT, "SR: SRH inserted packets")               \
+_(COUNTER_BSID, "SR: BindingSID steered packets")
+
+typedef enum
+{
+#define _(sym,str) SR_POLICY_REWRITE_ERROR_##sym,
+  foreach_sr_policy_rewrite_error
+#undef _
+    SR_POLICY_REWRITE_N_ERROR,
+} sr_policy_rewrite_error_t;
+
+static char *sr_policy_rewrite_error_strings[] = {
+#define _(sym,string) string,
+  foreach_sr_policy_rewrite_error
+#undef _
+};
+
+/**
+ * @brief Dynamically added SR SL DPO type
+ */
+static dpo_type_t sr_pr_encaps_dpo_type;
+static dpo_type_t sr_pr_insert_dpo_type;
+static dpo_type_t sr_pr_bsid_encaps_dpo_type;
+static dpo_type_t sr_pr_bsid_insert_dpo_type;
+
+/**
+ * @brief IPv6 SA for encapsulated packets
+ */
+static ip6_address_t sr_pr_encaps_src;
+
+/******************* SR rewrite set encaps IPv6 source addr *******************/
+/* Note:  This is temporal. We don't know whether to follow this path or
+          take the ip address of a loopback interface or even the OIF         */
+
+static clib_error_t *
+set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input,
+		       vlib_cli_command_t * cmd)
+{
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat
+	  (input, "addr %U", unformat_ip6_address, &sr_pr_encaps_src))
+	return 0;
+      else
+	return clib_error_return (0, "No address specified");
+    }
+  return clib_error_return (0, "No address specified");
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_sr_src_command, static) = {
+  .path = "set sr encaps source",
+  .short_help = "set sr encaps source addr <ip6_addr>",
+  .function = set_sr_src_command_fn,
+};
+/* *INDENT-ON* */
+
+/*********************** SR rewrite string computation ************************/
+/**
+ * @brief SR rewrite string computation for IPv6 encapsulation (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for encapsulation
+ */
+static inline u8 *
+compute_rewrite_encaps (ip6_address_t * sl)
+{
+  ip6_header_t *iph;
+  ip6_sr_header_t *srh;
+  ip6_address_t *addrp, *this_address;
+  u32 header_length = 0;
+  u8 *rs = NULL;
+
+  header_length = 0;
+  header_length += IPv6_DEFAULT_HEADER_LENGTH;
+  if (vec_len (sl) > 1)
+    {
+      header_length += sizeof (ip6_sr_header_t);
+      header_length += vec_len (sl) * sizeof (ip6_address_t);
+    }
+
+  vec_validate (rs, header_length - 1);
+
+  iph = (ip6_header_t *) rs;
+  iph->ip_version_traffic_class_and_flow_label =
+    clib_host_to_net_u32 (0 | ((6 & 0xF) << 28));
+  iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0];
+  iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1];
+  iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH;
+  iph->protocol = IP_PROTOCOL_IPV6;
+  iph->hop_limit = IPv6_DEFAULT_HOP_LIMIT;
+
+  srh = (ip6_sr_header_t *) (iph + 1);
+  iph->protocol = IP_PROTOCOL_IPV6_ROUTE;
+  srh->protocol = IP_PROTOCOL_IPV6;
+  srh->type = ROUTING_HEADER_TYPE_SR;
+  srh->segments_left = vec_len (sl) - 1;
+  srh->first_segment = vec_len (sl) - 1;
+  srh->length = ((sizeof (ip6_sr_header_t) +
+		  (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1;
+  srh->flags = 0x00;
+  srh->reserved = 0x00;
+  addrp = srh->segments + vec_len (sl) - 1;
+  vec_foreach (this_address, sl)
+  {
+    clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+    addrp--;
+  }
+  iph->dst_address.as_u64[0] = sl->as_u64[0];
+  iph->dst_address.as_u64[1] = sl->as_u64[1];
+  return rs;
+}
+
+/**
+ * @brief SR rewrite string computation for SRH insertion (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion
+ */
+static inline u8 *
+compute_rewrite_insert (ip6_address_t * sl)
+{
+  ip6_sr_header_t *srh;
+  ip6_address_t *addrp, *this_address;
+  u32 header_length = 0;
+  u8 *rs = NULL;
+
+  header_length = 0;
+  header_length += sizeof (ip6_sr_header_t);
+  header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t);
+
+  vec_validate (rs, header_length - 1);
+
+  srh = (ip6_sr_header_t *) rs;
+  srh->type = ROUTING_HEADER_TYPE_SR;
+  srh->segments_left = vec_len (sl);
+  srh->first_segment = vec_len (sl);
+  srh->length = ((sizeof (ip6_sr_header_t) +
+		  ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1;
+  srh->flags = 0x00;
+  srh->reserved = 0x0000;
+  addrp = srh->segments + vec_len (sl);
+  vec_foreach (this_address, sl)
+  {
+    clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+    addrp--;
+  }
+  return rs;
+}
+
+/**
+ * @brief SR rewrite string computation for SRH insertion with BSID (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion with BSID
+ */
+static inline u8 *
+compute_rewrite_bsid (ip6_address_t * sl)
+{
+  ip6_sr_header_t *srh;
+  ip6_address_t *addrp, *this_address;
+  u32 header_length = 0;
+  u8 *rs = NULL;
+
+  header_length = 0;
+  header_length += sizeof (ip6_sr_header_t);
+  header_length += vec_len (sl) * sizeof (ip6_address_t);
+
+  vec_validate (rs, header_length - 1);
+
+  srh = (ip6_sr_header_t *) rs;
+  srh->type = ROUTING_HEADER_TYPE_SR;
+  srh->segments_left = vec_len (sl) - 1;
+  srh->first_segment = vec_len (sl) - 1;
+  srh->length = ((sizeof (ip6_sr_header_t) +
+		  (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1;
+  srh->flags = 0x00;
+  srh->reserved = 0x0000;
+  addrp = srh->segments + vec_len (sl) - 1;
+  vec_foreach (this_address, sl)
+  {
+    clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+    addrp--;
+  }
+  return rs;
+}
+
+/***************************  SR LB helper functions **************************/
+/**
+ * @brief Creates a Segment List and adds it to an SR policy
+ *
+ * Creates a Segment List and adds it to the SR policy. Notice that the SL are
+ * not necessarily unique. Hence there might be two Segment List within the
+ * same SR Policy with exactly the same segments and same weight.
+ *
+ * @param sr_policy is the SR policy where the SL will be added
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param weight is the weight of the SegmentList (for load-balancing purposes)
+ * @param is_encap represents the mode (SRH insertion vs Encapsulation)
+ *
+ * @return pointer to the just created segment list
+ */
+static inline ip6_sr_sl_t *
+create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
+	   u8 is_encap)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  ip6_sr_sl_t *segment_list;
+
+  pool_get (sm->sid_lists, segment_list);
+  memset (segment_list, 0, sizeof (*segment_list));
+
+  vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);
+
+  /* Fill in segment list */
+  segment_list->weight =
+    (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
+  segment_list->segments = vec_dup (sl);
+
+  if (is_encap)
+    {
+      segment_list->rewrite = compute_rewrite_encaps (sl);
+      segment_list->rewrite_bsid = segment_list->rewrite;
+    }
+  else
+    {
+      segment_list->rewrite = compute_rewrite_insert (sl);
+      segment_list->rewrite_bsid = compute_rewrite_bsid (sl);
+    }
+
+  /* Create DPO */
+  dpo_reset (&segment_list->bsid_dpo);
+  dpo_reset (&segment_list->ip6_dpo);
+  dpo_reset (&segment_list->ip4_dpo);
+
+  if (is_encap)
+    {
+      dpo_set (&segment_list->ip6_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP6,
+	       segment_list - sm->sid_lists);
+      dpo_set (&segment_list->ip4_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP4,
+	       segment_list - sm->sid_lists);
+      dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_encaps_dpo_type,
+	       DPO_PROTO_IP6, segment_list - sm->sid_lists);
+    }
+  else
+    {
+      dpo_set (&segment_list->ip6_dpo, sr_pr_insert_dpo_type, DPO_PROTO_IP6,
+	       segment_list - sm->sid_lists);
+      dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_insert_dpo_type,
+	       DPO_PROTO_IP6, segment_list - sm->sid_lists);
+    }
+
+  return segment_list;
+}
+
+/**
+ * @brief Updates the Load Balancer after an SR Policy change
+ *
+ * @param sr_policy is the modified SR Policy
+ */
+static inline void
+update_lb (ip6_sr_policy_t * sr_policy)
+{
+  flow_hash_config_t fhc;
+  u32 *sl_index;
+  ip6_sr_sl_t *segment_list;
+  ip6_sr_main_t *sm = &sr_main;
+  load_balance_path_t path;
+  path.path_index = FIB_NODE_INDEX_INVALID;
+  load_balance_path_t *ip4_path_vector = 0;
+  load_balance_path_t *ip6_path_vector = 0;
+  load_balance_path_t *b_path_vector = 0;
+
+  /* In case LB does not exist, create it */
+  if (!dpo_id_is_valid (&sr_policy->bsid_dpo))
+    {
+      fib_prefix_t pfx = {
+	.fp_proto = FIB_PROTOCOL_IP6,
+	.fp_len = 128,
+	.fp_addr = {
+		    .ip6 = sr_policy->bsid,
+		    }
+      };
+
+      /* Add FIB entry for BSID */
+      fhc = fib_table_get_flow_hash_config (sr_policy->fib_table,
+					    dpo_proto_to_fib (DPO_PROTO_IP6));
+
+      dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
+	       load_balance_create (0, DPO_PROTO_IP6, fhc));
+
+      dpo_set (&sr_policy->ip6_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
+	       load_balance_create (0, DPO_PROTO_IP6, fhc));
+
+      /* Update FIB entry's to point to the LB DPO in the main FIB and hidden one */
+      fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6,
+							  sr_policy->fib_table),
+					  &pfx, FIB_SOURCE_SR,
+					  FIB_ENTRY_FLAG_EXCLUSIVE,
+					  &sr_policy->bsid_dpo);
+
+      fib_table_entry_special_dpo_update (sm->fib_table_ip6,
+					  &pfx,
+					  FIB_SOURCE_SR,
+					  FIB_ENTRY_FLAG_EXCLUSIVE,
+					  &sr_policy->ip6_dpo);
+
+      if (sr_policy->is_encap)
+	{
+	  dpo_set (&sr_policy->ip4_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP4,
+		   load_balance_create (0, DPO_PROTO_IP4, fhc));
+
+	  fib_table_entry_special_dpo_update (sm->fib_table_ip4,
+					      &pfx,
+					      FIB_SOURCE_SR,
+					      FIB_ENTRY_FLAG_EXCLUSIVE,
+					      &sr_policy->ip4_dpo);
+	}
+
+    }
+
+  /* Create the LB path vector */
+  //path_vector = vec_new(load_balance_path_t, vec_len(sr_policy->segments_lists));
+  vec_foreach (sl_index, sr_policy->segments_lists)
+  {
+    segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+    path.path_dpo = segment_list->bsid_dpo;
+    path.path_weight = segment_list->weight;
+    vec_add1 (b_path_vector, path);
+    path.path_dpo = segment_list->ip6_dpo;
+    vec_add1 (ip6_path_vector, path);
+    if (sr_policy->is_encap)
+      {
+	path.path_dpo = segment_list->ip4_dpo;
+	vec_add1 (ip4_path_vector, path);
+      }
+  }
+
+  /* Update LB multipath */
+  load_balance_multipath_update (&sr_policy->bsid_dpo, b_path_vector,
+				 LOAD_BALANCE_FLAG_NONE);
+  load_balance_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector,
+				 LOAD_BALANCE_FLAG_NONE);
+  if (sr_policy->is_encap)
+    load_balance_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector,
+				   LOAD_BALANCE_FLAG_NONE);
+
+  /* Cleanup */
+  vec_free (b_path_vector);
+  vec_free (ip6_path_vector);
+  vec_free (ip4_path_vector);
+
+}
+
+/**
+ * @brief Updates the Replicate DPO after an SR Policy change
+ *
+ * @param sr_policy is the modified SR Policy (type spray)
+ */
+static inline void
+update_replicate (ip6_sr_policy_t * sr_policy)
+{
+  u32 *sl_index;
+  ip6_sr_sl_t *segment_list;
+  ip6_sr_main_t *sm = &sr_main;
+  load_balance_path_t path;
+  path.path_index = FIB_NODE_INDEX_INVALID;
+  load_balance_path_t *b_path_vector = 0;
+  load_balance_path_t *ip6_path_vector = 0;
+  load_balance_path_t *ip4_path_vector = 0;
+
+  /* In case LB does not exist, create it */
+  if (!dpo_id_is_valid (&sr_policy->bsid_dpo))
+    {
+      dpo_set (&sr_policy->bsid_dpo, DPO_REPLICATE,
+	       DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6));
+
+      dpo_set (&sr_policy->ip6_dpo, DPO_REPLICATE,
+	       DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6));
+
+      /* Update FIB entry's DPO to point to SR without LB */
+      fib_prefix_t pfx = {
+	.fp_proto = FIB_PROTOCOL_IP6,
+	.fp_len = 128,
+	.fp_addr = {
+		    .ip6 = sr_policy->bsid,
+		    }
+      };
+      fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6,
+							  sr_policy->fib_table),
+					  &pfx, FIB_SOURCE_SR,
+					  FIB_ENTRY_FLAG_EXCLUSIVE,
+					  &sr_policy->bsid_dpo);
+
+      fib_table_entry_special_dpo_update (sm->fib_table_ip6,
+					  &pfx,
+					  FIB_SOURCE_SR,
+					  FIB_ENTRY_FLAG_EXCLUSIVE,
+					  &sr_policy->ip6_dpo);
+
+      if (sr_policy->is_encap)
+	{
+	  dpo_set (&sr_policy->ip4_dpo, DPO_REPLICATE, DPO_PROTO_IP4,
+		   replicate_create (0, DPO_PROTO_IP4));
+
+	  fib_table_entry_special_dpo_update (sm->fib_table_ip4,
+					      &pfx,
+					      FIB_SOURCE_SR,
+					      FIB_ENTRY_FLAG_EXCLUSIVE,
+					      &sr_policy->ip4_dpo);
+	}
+
+    }
+
+  /* Create the replicate path vector */
+  path.path_weight = 1;
+  vec_foreach (sl_index, sr_policy->segments_lists)
+  {
+    segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+    path.path_dpo = segment_list->bsid_dpo;
+    vec_add1 (b_path_vector, path);
+    path.path_dpo = segment_list->ip6_dpo;
+    vec_add1 (ip6_path_vector, path);
+    if (sr_policy->is_encap)
+      {
+	path.path_dpo = segment_list->ip4_dpo;
+	vec_add1 (ip4_path_vector, path);
+      }
+  }
+
+  /* Update replicate multipath */
+  replicate_multipath_update (&sr_policy->bsid_dpo, b_path_vector);
+  replicate_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector);
+  if (sr_policy->is_encap)
+    replicate_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector);
+}
+
+/******************************* SR rewrite API *******************************/
+/* Three functions for handling sr policies:
+ *   -> sr_policy_add
+ *   -> sr_policy_del
+ *   -> sr_policy_mod
+ * All of them are API. CLI function on sr_policy_command_fn                  */
+
+/**
+ * @brief Create a new SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param weight is the weight of the sid list. optional.
+ * @param behavior is the behavior of the SR policy. (default//spray)
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
+	       u32 weight, u8 behavior, u32 fib_table, u8 is_encap)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  ip6_sr_policy_t *sr_policy = 0;
+  uword *p;
+
+  /* Search for existing keys (BSID) */
+  p = mhash_get (&sm->sr_policies_index_hash, bsid);
+  if (p)
+    {
+      /* Add SR policy that already exists; complain */
+      return -12;
+    }
+
+  /* Search collision in FIB entries */
+  /* Explanation: It might be possible that some other entity has already
+   * created a route for the BSID. This in theory is impossible, but in
+   * practise we could see it. Assert it and scream if needed */
+  fib_prefix_t pfx = {
+    .fp_proto = FIB_PROTOCOL_IP6,
+    .fp_len = 128,
+    .fp_addr = {
+		.ip6 = *bsid,
+		}
+  };
+
+  /* Lookup the FIB index associated to the table selected */
+  u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6,
+				  (fib_table != (u32) ~ 0 ? fib_table : 0));
+  if (fib_index == ~0)
+    return -13;
+
+  /* Lookup whether there exists an entry for the BSID */
+  fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx);
+  if (FIB_NODE_INDEX_INVALID != fei)
+    return -12;			//There is an entry for such lookup
+
+  /* Add an SR policy object */
+  pool_get (sm->sr_policies, sr_policy);
+  memset (sr_policy, 0, sizeof (*sr_policy));
+  clib_memcpy (&sr_policy->bsid, bsid, sizeof (ip6_address_t));
+  sr_policy->type = behavior;
+  sr_policy->fib_table = (fib_table != (u32) ~ 0 ? fib_table : 0);	//Is default FIB 0 ?
+  sr_policy->is_encap = is_encap;
+
+  /* Copy the key */
+  mhash_set (&sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies,
+	     NULL);
+
+  /* Create a segment list and add the index to the SR policy */
+  create_sl (sr_policy, segments, weight, is_encap);
+
+  /* If FIB doesnt exist, create them */
+  if (sm->fib_table_ip6 == (u32) ~ 0)
+    {
+      sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
+						     "SRv6 steering of IP6 prefixes through BSIDs");
+      sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
+						     "SRv6 steering of IP4 prefixes through BSIDs");
+    }
+
+  /* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */
+  if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+    update_lb (sr_policy);
+  else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
+    update_replicate (sr_policy);
+  return 0;
+}
+
+/**
+ * @brief Delete a SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_policy_del (ip6_address_t * bsid, u32 index)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  ip6_sr_policy_t *sr_policy = 0;
+  ip6_sr_sl_t *segment_list;
+  u32 *sl_index;
+  uword *p;
+
+  if (bsid)
+    {
+      p = mhash_get (&sm->sr_policies_index_hash, bsid);
+      if (p)
+	sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+      else
+	return -1;
+    }
+  else
+    {
+      sr_policy = pool_elt_at_index (sm->sr_policies, index);
+      if (!sr_policy)
+	return -1;
+    }
+
+  /* Remove BindingSID FIB entry */
+  fib_prefix_t pfx = {
+    .fp_proto = FIB_PROTOCOL_IP6,
+    .fp_len = 128,
+    .fp_addr = {
+		.ip6 = sr_policy->bsid,
+		}
+    ,
+  };
+
+  fib_table_entry_special_remove (fib_table_find (FIB_PROTOCOL_IP6,
+						  sr_policy->fib_table),
+				  &pfx, FIB_SOURCE_SR);
+
+  fib_table_entry_special_remove (sm->fib_table_ip6, &pfx, FIB_SOURCE_SR);
+
+  if (sr_policy->is_encap)
+    fib_table_entry_special_remove (sm->fib_table_ip4, &pfx, FIB_SOURCE_SR);
+
+  if (dpo_id_is_valid (&sr_policy->bsid_dpo))
+    {
+      dpo_reset (&sr_policy->bsid_dpo);
+      dpo_reset (&sr_policy->ip4_dpo);
+      dpo_reset (&sr_policy->ip6_dpo);
+    }
+
+  /* Clean SID Lists */
+  vec_foreach (sl_index, sr_policy->segments_lists)
+  {
+    segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+    vec_free (segment_list->segments);
+    vec_free (segment_list->rewrite);
+    vec_free (segment_list->rewrite_bsid);
+    pool_put_index (sm->sid_lists, *sl_index);
+  }
+
+  /* Remove SR policy entry */
+  mhash_unset (&sm->sr_policies_index_hash, &sr_policy->bsid, NULL);
+  pool_put (sm->sr_policies, sr_policy);
+
+  /* If FIB empty unlock it */
+  if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
+    {
+      fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6);
+      fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6);
+      sm->fib_table_ip6 = (u32) ~ 0;
+      sm->fib_table_ip4 = (u32) ~ 0;
+    }
+
+  return 0;
+}
+
+/**
+ * @brief Modify an existing SR policy
+ *
+ * The possible modifications are adding a new Segment List, modifying an
+ * existing Segment List (modify the weight only) and delete a given
+ * Segment List from the SR Policy.
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param operation is the operation to perform (among the top ones)
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param sl_index is the index of the Segment List to modify/delete
+ * @param weight is the weight of the sid list. optional.
+ * @param is_encap Mode. Encapsulation or SRH insertion.
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
+	       u8 operation, ip6_address_t * segments, u32 sl_index,
+	       u32 weight)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  ip6_sr_policy_t *sr_policy = 0;
+  ip6_sr_sl_t *segment_list;
+  u32 *sl_index_iterate;
+  uword *p;
+
+  if (bsid)
+    {
+      p = mhash_get (&sm->sr_policies_index_hash, bsid);
+      if (p)
+	sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+      else
+	return -1;
+    }
+  else
+    {
+      sr_policy = pool_elt_at_index (sm->sr_policies, index);
+      if (!sr_policy)
+	return -1;
+    }
+
+  if (operation == 1)		/* Add SR List to an existing SR policy */
+    {
+      /* Create the new SL */
+      segment_list =
+	create_sl (sr_policy, segments, weight, sr_policy->is_encap);
+
+      /* Create a new LB DPO */
+      if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+	update_lb (sr_policy);
+      else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
+	update_replicate (sr_policy);
+    }
+  else if (operation == 2)	/* Delete SR List from an existing SR policy */
+    {
+      /* Check that currently there are more than one SID list */
+      if (vec_len (sr_policy->segments_lists) == 1)
+	return -21;
+
+      /* Check that the SR list does exist and is assigned to the sr policy */
+      vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+	if (*sl_index_iterate == sl_index)
+	break;
+
+      if (*sl_index_iterate != sl_index)
+	return -22;
+
+      /* Remove the lucky SR list that is being kicked out */
+      segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+      vec_free (segment_list->segments);
+      vec_free (segment_list->rewrite);
+      vec_free (segment_list->rewrite_bsid);
+      pool_put_index (sm->sid_lists, sl_index);
+      vec_del1 (sr_policy->segments_lists,
+		sl_index_iterate - sr_policy->segments_lists);
+
+      /* Create a new LB DPO */
+      if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+	update_lb (sr_policy);
+      else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
+	update_replicate (sr_policy);
+    }
+  else if (operation == 3)	/* Modify the weight of an existing SR List */
+    {
+      /* Find the corresponding SL */
+      vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+	if (*sl_index_iterate == sl_index)
+	break;
+
+      if (*sl_index_iterate != sl_index)
+	return -32;
+
+      /* Change the weight */
+      segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+      segment_list->weight = weight;
+
+      /* Update LB */
+      if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+	update_lb (sr_policy);
+    }
+  else				/* Incorrect op. */
+    return -1;
+
+  return 0;
+}
+
+/**
+ * @brief CLI for 'sr policies' command family
+ */
+static clib_error_t *
+sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+		      vlib_cli_command_t * cmd)
+{
+  int rv = -1;
+  char is_del = 0, is_add = 0, is_mod = 0;
+  char policy_set = 0;
+  ip6_address_t bsid, next_address;
+  u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0;
+  u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0;
+  ip6_address_t *segments = 0, *this_seg;
+  u8 operation = 0;
+  char is_encap = 1;
+  char is_spray = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (!is_add && !is_mod && !is_del && unformat (input, "add"))
+	is_add = 1;
+      else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
+	is_del = 1;
+      else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
+	is_mod = 1;
+      else if (!policy_set
+	       && unformat (input, "bsid %U", unformat_ip6_address, &bsid))
+	policy_set = 1;
+      else if (!is_add && !policy_set
+	       && unformat (input, "index %d", &sr_policy_index))
+	policy_set = 1;
+      else if (unformat (input, "weight %d", &weight));
+      else
+	if (unformat (input, "next %U", unformat_ip6_address, &next_address))
+	{
+	  vec_add2 (segments, this_seg, 1);
+	  clib_memcpy (this_seg->as_u8, next_address.as_u8,
+		       sizeof (*this_seg));
+	}
+      else if (unformat (input, "add sl"))
+	operation = 1;
+      else if (unformat (input, "del sl index %d", &sl_index))
+	operation = 2;
+      else if (unformat (input, "mod sl index %d", &sl_index))
+	operation = 3;
+      else if (fib_table == (u32) ~ 0
+	       && unformat (input, "fib-table %d", &fib_table));
+      else if (unformat (input, "encap"))
+	is_encap = 1;
+      else if (unformat (input, "insert"))
+	is_encap = 0;
+      else if (unformat (input, "spray"))
+	is_spray = 1;
+      else
+	break;
+    }
+
+  if (!is_add && !is_mod && !is_del)
+    return clib_error_return (0, "Incorrect CLI");
+
+  if (!policy_set)
+    return clib_error_return (0, "No SR policy BSID or index specified");
+
+  if (is_add)
+    {
+      if (vec_len (segments) == 0)
+	return clib_error_return (0, "No Segment List specified");
+      rv = sr_policy_add (&bsid, segments, weight,
+			  (is_spray ? SR_POLICY_TYPE_SPRAY :
+			   SR_POLICY_TYPE_DEFAULT), fib_table, is_encap);
+    }
+  else if (is_del)
+    rv = sr_policy_del ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
+			sr_policy_index);
+  else if (is_mod)
+    {
+      if (!operation)
+	return clib_error_return (0, "No SL modification specified");
+      if (operation != 1 && sl_index == (u32) ~ 0)
+	return clib_error_return (0, "No Segment List index specified");
+      if (operation == 1 && vec_len (segments) == 0)
+	return clib_error_return (0, "No Segment List specified");
+      if (operation == 3 && weight == (u32) ~ 0)
+	return clib_error_return (0, "No new weight for the SL specified");
+      rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
+			  sr_policy_index, fib_table, operation, segments,
+			  sl_index, weight);
+    }
+
+  switch (rv)
+    {
+    case 0:
+      break;
+    case 1:
+      return 0;
+    case -12:
+      return clib_error_return (0,
+				"There is already a FIB entry for the BindingSID address.\n"
+				"The SR policy could not be created.");
+    case -13:
+      return clib_error_return (0, "The specified FIB table does not exist.");
+    case -21:
+      return clib_error_return (0,
+				"The selected SR policy only contains ONE segment list. "
+				"Please remove the SR policy instead");
+    case -22:
+      return clib_error_return (0,
+				"Could not delete the segment list. "
+				"It is not associated with that SR policy.");
+    case -32:
+      return clib_error_return (0,
+				"Could not modify the segment list. "
+				"The given SL is not associated with such SR policy.");
+    default:
+      return clib_error_return (0, "BUG: sr policy returns %d", rv);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_policy_command, static) = {
+  .path = "sr policy",
+  .short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] "
+    "next A:: next B:: next C:: (weight 1) (fib-table 2) (encap|insert)",
+  .long_help =
+    "Manipulation of SR policies.\n"
+    "A Segment Routing policy may contain several SID lists. Each SID list has\n"
+    "an associated weight (default 1), which will result in wECMP (uECMP).\n"
+    "Segment Routing policies might be of type encapsulation or srh insertion\n"
+    "Each SR policy will be associated with a unique BindingSID.\n"
+    "A BindingSID is a locally allocated SegmentID. For every packet that arrives\n"
+    "with IPv6_DA:BSID such traffic will be steered into the SR policy.\n"
+    "The add command will create a SR policy with its first segment list (sl)\n"
+    "The mod command allows you to add, remove, or modify the existing segment lists\n"
+    "within an SR policy.\n"
+    "The del command allows you to delete a SR policy along with all its associated\n"
+    "SID lists.\n",
+  .function = sr_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI to display onscreen all the SR policies
+ */
+static clib_error_t *
+show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
+			     vlib_cli_command_t * cmd)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  u32 *sl_index;
+  ip6_sr_sl_t *segment_list = 0;
+  ip6_sr_policy_t *sr_policy = 0;
+  ip6_sr_policy_t **vec_policies = 0;
+  ip6_address_t *addr;
+  u8 *s;
+  int i = 0;
+
+  vlib_cli_output (vm, "SR policies:");
+
+  /* *INDENT-OFF* */
+  pool_foreach  (sr_policy, sm->sr_policies,
+                {vec_add1 (vec_policies, sr_policy); } );
+  /* *INDENT-ON* */
+
+  vec_foreach_index (i, vec_policies)
+  {
+    sr_policy = vec_policies[i];
+    vlib_cli_output (vm, "[%u].-\tBSID: %U",
+		     (u32) (sr_policy - sm->sr_policies),
+		     format_ip6_address, &sr_policy->bsid);
+    vlib_cli_output (vm, "\tBehavior: %s",
+		     (sr_policy->is_encap ? "Encapsulation" :
+		      "SRH insertion"));
+    vlib_cli_output (vm, "\tType: %s",
+		     (sr_policy->type ==
+		      SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
+    vlib_cli_output (vm, "\tFIB table: %u",
+		     (sr_policy->fib_table !=
+		      (u32) ~ 0 ? sr_policy->fib_table : 0));
+    vlib_cli_output (vm, "\tSegment Lists:");
+    vec_foreach (sl_index, sr_policy->segments_lists)
+    {
+      s = NULL;
+      s = format (s, "\t[%u].- ", *sl_index);
+      segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+      s = format (s, "< ");
+      vec_foreach (addr, segment_list->segments)
+      {
+	s = format (s, "%U, ", format_ip6_address, addr);
+      }
+      s = format (s, "\b\b > ");
+      s = format (s, "weight: %u", segment_list->weight);
+      vlib_cli_output (vm, "  %s", s);
+    }
+    vlib_cli_output (vm, "-----------");
+  }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_policies_command, static) = {
+  .path = "show sr policies",
+  .short_help = "show sr policies",
+  .function = show_sr_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+/*************************** SR rewrite graph node ****************************/
+/**
+ * @brief Trace for the SR Policy Rewrite graph node
+ */
+static u8 *
+format_sr_policy_rewrite_trace (u8 * s, va_list * args)
+{
+  //TODO
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  sr_policy_rewrite_trace_t *t = va_arg (*args, sr_policy_rewrite_trace_t *);
+
+  s = format
+    (s, "SR-policy-rewrite: src %U dst %U",
+     format_ip6_address, &t->src, format_ip6_address, &t->dst);
+
+  return s;
+}
+
+/**
+ * @brief IPv6 encapsulation processing as per RFC2473
+ */
+static_always_inline void
+encaps_processing_v6 (vlib_node_runtime_t * node,
+		      vlib_buffer_t * b0,
+		      ip6_header_t * ip0, ip6_header_t * ip0_encap)
+{
+  u32 new_l0;
+
+  ip0_encap->hop_limit -= 1;
+  new_l0 =
+    ip0->payload_length + sizeof (ip6_header_t) +
+    clib_net_to_host_u16 (ip0_encap->payload_length);
+  ip0->payload_length = clib_host_to_net_u16 (new_l0);
+  ip0->ip_version_traffic_class_and_flow_label =
+    ip0_encap->ip_version_traffic_class_and_flow_label;
+}
+
+/**
+ * @brief Graph node for applying a SR policy into an IPv6 packet. Encapsulation
+ */
+static uword
+sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
+			  vlib_frame_t * from_frame)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  u32 n_left_from, next_index, *from, *to_next;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  int encap_pkts = 0, bsid_pkts = 0;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      /* Quad - Loop */
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+	{
+	  u32 bi0, bi1, bi2, bi3;
+	  vlib_buffer_t *b0, *b1, *b2, *b3;
+	  u32 next0, next1, next2, next3;
+	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
+	  ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
+	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
+	    p6 = vlib_get_buffer (vm, from[6]);
+	    p7 = vlib_get_buffer (vm, from[7]);
+
+	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_header (p6, LOAD);
+	    vlib_prefetch_buffer_header (p7, LOAD);
+
+	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+	  }
+
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
+
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  sl1 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+	  sl2 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+	  sl3 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl1->rewrite));
+	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl2->rewrite));
+	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl3->rewrite));
+
+	  ip0_encap = vlib_buffer_get_current (b0);
+	  ip1_encap = vlib_buffer_get_current (b1);
+	  ip2_encap = vlib_buffer_get_current (b2);
+	  ip3_encap = vlib_buffer_get_current (b3);
+
+	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+		       sl0->rewrite, vec_len (sl0->rewrite));
+	  clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
+		       sl1->rewrite, vec_len (sl1->rewrite));
+	  clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
+		       sl2->rewrite, vec_len (sl2->rewrite));
+	  clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
+		       sl3->rewrite, vec_len (sl3->rewrite));
+
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+	  ip1 = vlib_buffer_get_current (b1);
+	  ip2 = vlib_buffer_get_current (b2);
+	  ip3 = vlib_buffer_get_current (b3);
+
+	  encaps_processing_v6 (node, b0, ip0, ip0_encap);
+	  encaps_processing_v6 (node, b1, ip1, ip1_encap);
+	  encaps_processing_v6 (node, b2, ip2, ip2_encap);
+	  encaps_processing_v6 (node, b3, ip3, ip3_encap);
+
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+	    {
+	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b0, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b1, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b2, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b3, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+	    }
+
+	  encap_pkts += 4;
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, bi1, bi2, bi3,
+					   next0, next1, next2, next3);
+	}
+
+      /* Single loop for potentially the last three packets */
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  ip6_header_t *ip0 = 0, *ip0_encap = 0;
+	  ip6_sr_sl_t *sl0;
+	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+
+	  ip0_encap = vlib_buffer_get_current (b0);
+
+	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+		       sl0->rewrite, vec_len (sl0->rewrite));
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+
+	  encaps_processing_v6 (node, b0, ip0, ip0_encap);
+
+	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      sr_policy_rewrite_trace_t *tr =
+		vlib_add_trace (vm, node, b0, sizeof (*tr));
+	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			   sizeof (tr->src.as_u8));
+	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			   sizeof (tr->dst.as_u8));
+	    }
+
+	  encap_pkts++;
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  /* Update counters */
+  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+			       encap_pkts);
+  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+			       bsid_pkts);
+
+  return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
+  .function = sr_policy_rewrite_encaps,
+  .name = "sr-pl-rewrite-encaps",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sr_policy_rewrite_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = SR_POLICY_REWRITE_N_ERROR,
+  .error_strings = sr_policy_rewrite_error_strings,
+  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+    foreach_sr_policy_rewrite_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief IPv4 encapsulation processing as per RFC2473
+ */
+static_always_inline void
+encaps_processing_v4 (vlib_node_runtime_t * node,
+		      vlib_buffer_t * b0,
+		      ip6_header_t * ip0, ip4_header_t * ip0_encap)
+{
+  u32 new_l0;
+  ip6_sr_header_t *sr0;
+
+  u32 checksum0;
+
+  /* Inner IPv4: Decrement TTL & update checksum */
+  ip0_encap->ttl -= 1;
+  checksum0 = ip0_encap->checksum + clib_host_to_net_u16 (0x0100);
+  checksum0 += checksum0 >= 0xffff;
+  ip0_encap->checksum = checksum0;
+
+  /* Outer IPv6: Update length, FL, proto */
+  new_l0 = ip0->payload_length + clib_net_to_host_u16 (ip0_encap->length);
+  ip0->payload_length = clib_host_to_net_u16 (new_l0);
+  ip0->ip_version_traffic_class_and_flow_label =
+    clib_host_to_net_u32 (0 | ((6 & 0xF) << 28) |
+			  ((ip0_encap->tos & 0xFF) << 20));
+  sr0 = (void *) (ip0 + 1);
+  sr0->protocol = IP_PROTOCOL_IP_IN_IP;
+}
+
+/**
+ * @brief Graph node for applying a SR policy into an IPv4 packet. Encapsulation
+ */
+static uword
+sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node,
+			     vlib_frame_t * from_frame)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  u32 n_left_from, next_index, *from, *to_next;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  int encap_pkts = 0, bsid_pkts = 0;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      /* Quad - Loop */
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+	{
+	  u32 bi0, bi1, bi2, bi3;
+	  vlib_buffer_t *b0, *b1, *b2, *b3;
+	  u32 next0, next1, next2, next3;
+	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
+	  ip4_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
+	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
+	    p6 = vlib_get_buffer (vm, from[6]);
+	    p7 = vlib_get_buffer (vm, from[7]);
+
+	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_header (p6, LOAD);
+	    vlib_prefetch_buffer_header (p7, LOAD);
+
+	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+	  }
+
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
+
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  sl1 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+	  sl2 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+	  sl3 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl1->rewrite));
+	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl2->rewrite));
+	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl3->rewrite));
+
+	  ip0_encap = vlib_buffer_get_current (b0);
+	  ip1_encap = vlib_buffer_get_current (b1);
+	  ip2_encap = vlib_buffer_get_current (b2);
+	  ip3_encap = vlib_buffer_get_current (b3);
+
+	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+		       sl0->rewrite, vec_len (sl0->rewrite));
+	  clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
+		       sl1->rewrite, vec_len (sl1->rewrite));
+	  clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
+		       sl2->rewrite, vec_len (sl2->rewrite));
+	  clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
+		       sl3->rewrite, vec_len (sl3->rewrite));
+
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+	  ip1 = vlib_buffer_get_current (b1);
+	  ip2 = vlib_buffer_get_current (b2);
+	  ip3 = vlib_buffer_get_current (b3);
+
+	  encaps_processing_v4 (node, b0, ip0, ip0_encap);
+	  encaps_processing_v4 (node, b1, ip1, ip1_encap);
+	  encaps_processing_v4 (node, b2, ip2, ip2_encap);
+	  encaps_processing_v4 (node, b3, ip3, ip3_encap);
+
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+	    {
+	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b0, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b1, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b2, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b3, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+	    }
+
+	  encap_pkts += 4;
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, bi1, bi2, bi3,
+					   next0, next1, next2, next3);
+	}
+
+      /* Single loop for potentially the last three packets */
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  ip6_header_t *ip0 = 0;
+	  ip4_header_t *ip0_encap = 0;
+	  ip6_sr_sl_t *sl0;
+	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+
+	  ip0_encap = vlib_buffer_get_current (b0);
+
+	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+		       sl0->rewrite, vec_len (sl0->rewrite));
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+
+	  encaps_processing_v4 (node, b0, ip0, ip0_encap);
+
+	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      sr_policy_rewrite_trace_t *tr =
+		vlib_add_trace (vm, node, b0, sizeof (*tr));
+	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			   sizeof (tr->src.as_u8));
+	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			   sizeof (tr->dst.as_u8));
+	    }
+
+	  encap_pkts++;
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  /* Update counters */
+  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+			       encap_pkts);
+  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+			       bsid_pkts);
+
+  return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
+  .function = sr_policy_rewrite_encaps_v4,
+  .name = "sr-pl-rewrite-encaps-v4",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sr_policy_rewrite_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = SR_POLICY_REWRITE_N_ERROR,
+  .error_strings = sr_policy_rewrite_error_strings,
+  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+    foreach_sr_policy_rewrite_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+always_inline u32
+ip_flow_hash (void *data)
+{
+  ip4_header_t *iph = (ip4_header_t *) data;
+
+  if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
+    return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
+  else
+    return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
+}
+
+always_inline u64
+mac_to_u64 (u8 * m)
+{
+  return (*((u64 *) m) & 0xffffffffffff);
+}
+
+always_inline u32
+l2_flow_hash (vlib_buffer_t * b0)
+{
+  ethernet_header_t *eh;
+  u64 a, b, c;
+  uword is_ip, eh_size;
+  u16 eh_type;
+
+  eh = vlib_buffer_get_current (b0);
+  eh_type = clib_net_to_host_u16 (eh->type);
+  eh_size = ethernet_buffer_header_size (b0);
+
+  is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
+
+  /* since we have 2 cache lines, use them */
+  if (is_ip)
+    a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
+  else
+    a = eh->type;
+
+  b = mac_to_u64 ((u8 *) eh->dst_address);
+  c = mac_to_u64 ((u8 *) eh->src_address);
+  hash_mix64 (a, b, c);
+
+  return (u32) c;
+}
+
+/**
+ * @brief Graph node for applying a SR policy into a L2 frame
+ */
+static uword
+sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node,
+			     vlib_frame_t * from_frame)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  u32 n_left_from, next_index, *from, *to_next;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  int encap_pkts = 0, bsid_pkts = 0;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      /* Quad - Loop */
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+	{
+	  u32 bi0, bi1, bi2, bi3;
+	  vlib_buffer_t *b0, *b1, *b2, *b3;
+	  u32 next0, next1, next2, next3;
+	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+	  ethernet_header_t *en0, *en1, *en2, *en3;
+	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
+	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+	  ip6_sr_policy_t *sp0, *sp1, *sp2, *sp3;
+	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
+	    p6 = vlib_get_buffer (vm, from[6]);
+	    p7 = vlib_get_buffer (vm, from[7]);
+
+	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_header (p6, LOAD);
+	    vlib_prefetch_buffer_header (p7, LOAD);
+
+	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+	  }
+
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
+
+	  sp0 = pool_elt_at_index (sm->sr_policies,
+				   sm->sw_iface_sr_policies[vnet_buffer
+							    (b0)->sw_if_index
+							    [VLIB_RX]]);
+
+	  sp1 = pool_elt_at_index (sm->sr_policies,
+				   sm->sw_iface_sr_policies[vnet_buffer
+							    (b1)->sw_if_index
+							    [VLIB_RX]]);
+
+	  sp2 = pool_elt_at_index (sm->sr_policies,
+				   sm->sw_iface_sr_policies[vnet_buffer
+							    (b2)->sw_if_index
+							    [VLIB_RX]]);
+
+	  sp3 = pool_elt_at_index (sm->sr_policies,
+				   sm->sw_iface_sr_policies[vnet_buffer
+							    (b3)->sw_if_index
+							    [VLIB_RX]]);
+
+	  if (vec_len (sp0->segments_lists) == 1)
+	    vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0];
+	  else
+	    {
+	      vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
+	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+		sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash &
+				     (vec_len (sp0->segments_lists) - 1))];
+	    }
+
+	  if (vec_len (sp1->segments_lists) == 1)
+	    vnet_buffer (b1)->ip.adj_index[VLIB_TX] = sp1->segments_lists[1];
+	  else
+	    {
+	      vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1);
+	      vnet_buffer (b1)->ip.adj_index[VLIB_TX] =
+		sp1->segments_lists[(vnet_buffer (b1)->ip.flow_hash &
+				     (vec_len (sp1->segments_lists) - 1))];
+	    }
+
+	  if (vec_len (sp2->segments_lists) == 1)
+	    vnet_buffer (b2)->ip.adj_index[VLIB_TX] = sp2->segments_lists[2];
+	  else
+	    {
+	      vnet_buffer (b2)->ip.flow_hash = l2_flow_hash (b2);
+	      vnet_buffer (b2)->ip.adj_index[VLIB_TX] =
+		sp2->segments_lists[(vnet_buffer (b2)->ip.flow_hash &
+				     (vec_len (sp2->segments_lists) - 1))];
+	    }
+
+	  if (vec_len (sp3->segments_lists) == 1)
+	    vnet_buffer (b3)->ip.adj_index[VLIB_TX] = sp3->segments_lists[3];
+	  else
+	    {
+	      vnet_buffer (b3)->ip.flow_hash = l2_flow_hash (b3);
+	      vnet_buffer (b3)->ip.adj_index[VLIB_TX] =
+		sp3->segments_lists[(vnet_buffer (b3)->ip.flow_hash &
+				     (vec_len (sp3->segments_lists) - 1))];
+	    }
+
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  sl1 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+	  sl2 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+	  sl3 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl1->rewrite));
+	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl2->rewrite));
+	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl3->rewrite));
+
+	  en0 = vlib_buffer_get_current (b0);
+	  en1 = vlib_buffer_get_current (b1);
+	  en2 = vlib_buffer_get_current (b2);
+	  en3 = vlib_buffer_get_current (b3);
+
+	  clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite,
+		       vec_len (sl0->rewrite));
+	  clib_memcpy (((u8 *) en1) - vec_len (sl1->rewrite), sl1->rewrite,
+		       vec_len (sl1->rewrite));
+	  clib_memcpy (((u8 *) en2) - vec_len (sl2->rewrite), sl2->rewrite,
+		       vec_len (sl2->rewrite));
+	  clib_memcpy (((u8 *) en3) - vec_len (sl3->rewrite), sl3->rewrite,
+		       vec_len (sl3->rewrite));
+
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+	  ip1 = vlib_buffer_get_current (b1);
+	  ip2 = vlib_buffer_get_current (b2);
+	  ip3 = vlib_buffer_get_current (b3);
+
+	  ip0->payload_length =
+	    clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
+	  ip1->payload_length =
+	    clib_host_to_net_u16 (b1->current_length - sizeof (ip6_header_t));
+	  ip2->payload_length =
+	    clib_host_to_net_u16 (b2->current_length - sizeof (ip6_header_t));
+	  ip3->payload_length =
+	    clib_host_to_net_u16 (b3->current_length - sizeof (ip6_header_t));
+
+	  sr0 = (void *) (ip0 + 1);
+	  sr1 = (void *) (ip1 + 1);
+	  sr2 = (void *) (ip2 + 1);
+	  sr3 = (void *) (ip3 + 1);
+
+	  sr0->protocol = sr1->protocol = sr2->protocol = sr3->protocol =
+	    IP_PROTOCOL_IP6_NONXT;
+
+	  /* Which Traffic class and flow label do I set ? */
+	  //ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(0|((6&0xF)<<28)|((ip0_encap->tos&0xFF)<<20));
+
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+	    {
+	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b0, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b1, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b2, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b3, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+	    }
+
+	  encap_pkts += 4;
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, bi1, bi2, bi3,
+					   next0, next1, next2, next3);
+	}
+
+      /* Single loop for potentially the last three packets */
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  ip6_header_t *ip0 = 0;
+	  ip6_sr_header_t *sr0;
+	  ethernet_header_t *en0;
+	  ip6_sr_policy_t *sp0;
+	  ip6_sr_sl_t *sl0;
+	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  /* Find the SR policy */
+	  sp0 = pool_elt_at_index (sm->sr_policies,
+				   sm->sw_iface_sr_policies[vnet_buffer
+							    (b0)->sw_if_index
+							    [VLIB_RX]]);
+
+	  /* In case there is more than one SL, LB among them */
+	  if (vec_len (sp0->segments_lists) == 1)
+	    vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0];
+	  else
+	    {
+	      vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
+	      vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+		sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash &
+				     (vec_len (sp0->segments_lists) - 1))];
+	    }
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+
+	  en0 = vlib_buffer_get_current (b0);
+
+	  clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite,
+		       vec_len (sl0->rewrite));
+
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+
+	  ip0->payload_length =
+	    clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
+
+	  sr0 = (void *) (ip0 + 1);
+	  sr0->protocol = IP_PROTOCOL_IP6_NONXT;
+
+	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      sr_policy_rewrite_trace_t *tr =
+		vlib_add_trace (vm, node, b0, sizeof (*tr));
+	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			   sizeof (tr->src.as_u8));
+	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			   sizeof (tr->dst.as_u8));
+	    }
+
+	  encap_pkts++;
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  /* Update counters */
+  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+			       encap_pkts);
+  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+			       bsid_pkts);
+
+  return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
+  .function = sr_policy_rewrite_encaps_l2,
+  .name = "sr-pl-rewrite-encaps-l2",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sr_policy_rewrite_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = SR_POLICY_REWRITE_N_ERROR,
+  .error_strings = sr_policy_rewrite_error_strings,
+  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+    foreach_sr_policy_rewrite_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Graph node for applying a SR policy into a packet. SRH insertion.
+ */
+static uword
+sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
+			  vlib_frame_t * from_frame)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  u32 n_left_from, next_index, *from, *to_next;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  int insert_pkts = 0, bsid_pkts = 0;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      /* Quad - Loop */
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+	{
+	  u32 bi0, bi1, bi2, bi3;
+	  vlib_buffer_t *b0, *b1, *b2, *b3;
+	  u32 next0, next1, next2, next3;
+	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
+	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+	  u16 new_l0, new_l1, new_l2, new_l3;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
+	    p6 = vlib_get_buffer (vm, from[6]);
+	    p7 = vlib_get_buffer (vm, from[7]);
+
+	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_header (p6, LOAD);
+	    vlib_prefetch_buffer_header (p7, LOAD);
+
+	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+	  }
+
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
+
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  sl1 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+	  sl2 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+	  sl3 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl1->rewrite));
+	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl2->rewrite));
+	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl3->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+	  ip1 = vlib_buffer_get_current (b1);
+	  ip2 = vlib_buffer_get_current (b2);
+	  ip3 = vlib_buffer_get_current (b3);
+
+	  if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr0 =
+	      (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+				   ip6_ext_header_len (ip0 + 1));
+	  else
+	    sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+	  if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr1 =
+	      (ip6_sr_header_t *) (((void *) (ip1 + 1)) +
+				   ip6_ext_header_len (ip1 + 1));
+	  else
+	    sr1 = (ip6_sr_header_t *) (ip1 + 1);
+
+	  if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr2 =
+	      (ip6_sr_header_t *) (((void *) (ip2 + 1)) +
+				   ip6_ext_header_len (ip2 + 1));
+	  else
+	    sr2 = (ip6_sr_header_t *) (ip2 + 1);
+
+	  if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr3 =
+	      (ip6_sr_header_t *) (((void *) (ip3 + 1)) +
+				   ip6_ext_header_len (ip3 + 1));
+	  else
+	    sr3 = (ip6_sr_header_t *) (ip3 + 1);
+
+	  clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0,
+		       (void *) sr0 - (void *) ip0);
+	  clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite), (u8 *) ip1,
+		       (void *) sr1 - (void *) ip1);
+	  clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite), (u8 *) ip2,
+		       (void *) sr2 - (void *) ip2);
+	  clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite), (u8 *) ip3,
+		       (void *) sr3 - (void *) ip3);
+
+	  clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite,
+		       vec_len (sl0->rewrite));
+	  clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite)), sl1->rewrite,
+		       vec_len (sl1->rewrite));
+	  clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite)), sl2->rewrite,
+		       vec_len (sl2->rewrite));
+	  clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite)), sl3->rewrite,
+		       vec_len (sl3->rewrite));
+
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+	  ip0 = ((void *) ip0) - vec_len (sl0->rewrite);
+	  ip1 = ((void *) ip1) - vec_len (sl1->rewrite);
+	  ip2 = ((void *) ip2) - vec_len (sl2->rewrite);
+	  ip3 = ((void *) ip3) - vec_len (sl3->rewrite);
+
+	  ip0->hop_limit -= 1;
+	  ip1->hop_limit -= 1;
+	  ip2->hop_limit -= 1;
+	  ip3->hop_limit -= 1;
+
+	  new_l0 =
+	    clib_net_to_host_u16 (ip0->payload_length) +
+	    vec_len (sl0->rewrite);
+	  new_l1 =
+	    clib_net_to_host_u16 (ip1->payload_length) +
+	    vec_len (sl1->rewrite);
+	  new_l2 =
+	    clib_net_to_host_u16 (ip2->payload_length) +
+	    vec_len (sl2->rewrite);
+	  new_l3 =
+	    clib_net_to_host_u16 (ip3->payload_length) +
+	    vec_len (sl3->rewrite);
+
+	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
+	  ip1->payload_length = clib_host_to_net_u16 (new_l1);
+	  ip2->payload_length = clib_host_to_net_u16 (new_l2);
+	  ip3->payload_length = clib_host_to_net_u16 (new_l3);
+
+	  sr0 = ((void *) sr0) - vec_len (sl0->rewrite);
+	  sr1 = ((void *) sr1) - vec_len (sl1->rewrite);
+	  sr2 = ((void *) sr2) - vec_len (sl2->rewrite);
+	  sr3 = ((void *) sr3) - vec_len (sl3->rewrite);
+
+	  sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0];
+	  sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1];
+	  sr1->segments->as_u64[0] = ip1->dst_address.as_u64[0];
+	  sr1->segments->as_u64[1] = ip1->dst_address.as_u64[1];
+	  sr2->segments->as_u64[0] = ip2->dst_address.as_u64[0];
+	  sr2->segments->as_u64[1] = ip2->dst_address.as_u64[1];
+	  sr3->segments->as_u64[0] = ip3->dst_address.as_u64[0];
+	  sr3->segments->as_u64[1] = ip3->dst_address.as_u64[1];
+
+	  ip0->dst_address.as_u64[0] =
+	    (sr0->segments + sr0->segments_left)->as_u64[0];
+	  ip0->dst_address.as_u64[1] =
+	    (sr0->segments + sr0->segments_left)->as_u64[1];
+	  ip1->dst_address.as_u64[0] =
+	    (sr1->segments + sr1->segments_left)->as_u64[0];
+	  ip1->dst_address.as_u64[1] =
+	    (sr1->segments + sr1->segments_left)->as_u64[1];
+	  ip2->dst_address.as_u64[0] =
+	    (sr2->segments + sr2->segments_left)->as_u64[0];
+	  ip2->dst_address.as_u64[1] =
+	    (sr2->segments + sr2->segments_left)->as_u64[1];
+	  ip3->dst_address.as_u64[0] =
+	    (sr3->segments + sr3->segments_left)->as_u64[0];
+	  ip3->dst_address.as_u64[1] =
+	    (sr3->segments + sr3->segments_left)->as_u64[1];
+
+	  ip6_ext_header_t *ip_ext;
+	  if (ip0 + 1 == (void *) sr0)
+	    {
+	      sr0->protocol = ip0->protocol;
+	      ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip_ext = (void *) (ip0 + 1);
+	      sr0->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  if (ip1 + 1 == (void *) sr1)
+	    {
+	      sr1->protocol = ip1->protocol;
+	      ip1->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip_ext = (void *) (ip2 + 1);
+	      sr2->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  if (ip2 + 1 == (void *) sr2)
+	    {
+	      sr2->protocol = ip2->protocol;
+	      ip2->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip_ext = (void *) (ip2 + 1);
+	      sr2->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  if (ip3 + 1 == (void *) sr3)
+	    {
+	      sr3->protocol = ip3->protocol;
+	      ip3->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip_ext = (void *) (ip3 + 1);
+	      sr3->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  insert_pkts += 4;
+
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+	    {
+	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b0, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b1, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b2, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b3, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+	    }
+
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, bi1, bi2, bi3,
+					   next0, next1, next2, next3);
+	}
+
+      /* Single loop for potentially the last three packets */
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  ip6_header_t *ip0 = 0;
+	  ip6_sr_header_t *sr0 = 0;
+	  ip6_sr_sl_t *sl0;
+	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+	  u16 new_l0 = 0;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+
+	  if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr0 =
+	      (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+				   ip6_ext_header_len (ip0 + 1));
+	  else
+	    sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+	  clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0,
+		       (void *) sr0 - (void *) ip0);
+	  clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite,
+		       vec_len (sl0->rewrite));
+
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+	  ip0 = ((void *) ip0) - vec_len (sl0->rewrite);
+	  ip0->hop_limit -= 1;
+	  new_l0 =
+	    clib_net_to_host_u16 (ip0->payload_length) +
+	    vec_len (sl0->rewrite);
+	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+	  sr0 = ((void *) sr0) - vec_len (sl0->rewrite);
+	  sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0];
+	  sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1];
+
+	  ip0->dst_address.as_u64[0] =
+	    (sr0->segments + sr0->segments_left)->as_u64[0];
+	  ip0->dst_address.as_u64[1] =
+	    (sr0->segments + sr0->segments_left)->as_u64[1];
+
+	  if (ip0 + 1 == (void *) sr0)
+	    {
+	      sr0->protocol = ip0->protocol;
+	      ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip6_ext_header_t *ip_ext = (void *) (ip0 + 1);
+	      sr0->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      sr_policy_rewrite_trace_t *tr =
+		vlib_add_trace (vm, node, b0, sizeof (*tr));
+	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			   sizeof (tr->src.as_u8));
+	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			   sizeof (tr->dst.as_u8));
+	    }
+
+	  insert_pkts++;
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  /* Update counters */
+  vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+			       insert_pkts);
+  vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+			       bsid_pkts);
+  return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
+  .function = sr_policy_rewrite_insert,
+  .name = "sr-pl-rewrite-insert",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sr_policy_rewrite_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = SR_POLICY_REWRITE_N_ERROR,
+  .error_strings = sr_policy_rewrite_error_strings,
+  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+    foreach_sr_policy_rewrite_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion.
+ */
+static uword
+sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
+			    vlib_frame_t * from_frame)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  u32 n_left_from, next_index, *from, *to_next;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  int insert_pkts = 0, bsid_pkts = 0;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      /* Quad - Loop */
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+	{
+	  u32 bi0, bi1, bi2, bi3;
+	  vlib_buffer_t *b0, *b1, *b2, *b3;
+	  u32 next0, next1, next2, next3;
+	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
+	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+	  u16 new_l0, new_l1, new_l2, new_l3;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
+	    p6 = vlib_get_buffer (vm, from[6]);
+	    p7 = vlib_get_buffer (vm, from[7]);
+
+	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_header (p6, LOAD);
+	    vlib_prefetch_buffer_header (p7, LOAD);
+
+	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+	  }
+
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
+
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  sl1 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+	  sl2 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+	  sl3 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite_bsid));
+	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl1->rewrite_bsid));
+	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl2->rewrite_bsid));
+	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl3->rewrite_bsid));
+
+	  ip0 = vlib_buffer_get_current (b0);
+	  ip1 = vlib_buffer_get_current (b1);
+	  ip2 = vlib_buffer_get_current (b2);
+	  ip3 = vlib_buffer_get_current (b3);
+
+	  if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr0 =
+	      (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+				   ip6_ext_header_len (ip0 + 1));
+	  else
+	    sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+	  if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr1 =
+	      (ip6_sr_header_t *) (((void *) (ip1 + 1)) +
+				   ip6_ext_header_len (ip1 + 1));
+	  else
+	    sr1 = (ip6_sr_header_t *) (ip1 + 1);
+
+	  if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr2 =
+	      (ip6_sr_header_t *) (((void *) (ip2 + 1)) +
+				   ip6_ext_header_len (ip2 + 1));
+	  else
+	    sr2 = (ip6_sr_header_t *) (ip2 + 1);
+
+	  if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr3 =
+	      (ip6_sr_header_t *) (((void *) (ip3 + 1)) +
+				   ip6_ext_header_len (ip3 + 1));
+	  else
+	    sr3 = (ip6_sr_header_t *) (ip3 + 1);
+
+	  clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0,
+		       (void *) sr0 - (void *) ip0);
+	  clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite_bsid), (u8 *) ip1,
+		       (void *) sr1 - (void *) ip1);
+	  clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite_bsid), (u8 *) ip2,
+		       (void *) sr2 - (void *) ip2);
+	  clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite_bsid), (u8 *) ip3,
+		       (void *) sr3 - (void *) ip3);
+
+	  clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)),
+		       sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid));
+	  clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite_bsid)),
+		       sl1->rewrite_bsid, vec_len (sl1->rewrite_bsid));
+	  clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite_bsid)),
+		       sl2->rewrite_bsid, vec_len (sl2->rewrite_bsid));
+	  clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite_bsid)),
+		       sl3->rewrite_bsid, vec_len (sl3->rewrite_bsid));
+
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid));
+	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite_bsid));
+	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite_bsid));
+	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite_bsid));
+
+	  ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid);
+	  ip1 = ((void *) ip1) - vec_len (sl1->rewrite_bsid);
+	  ip2 = ((void *) ip2) - vec_len (sl2->rewrite_bsid);
+	  ip3 = ((void *) ip3) - vec_len (sl3->rewrite_bsid);
+
+	  ip0->hop_limit -= 1;
+	  ip1->hop_limit -= 1;
+	  ip2->hop_limit -= 1;
+	  ip3->hop_limit -= 1;
+
+	  new_l0 =
+	    clib_net_to_host_u16 (ip0->payload_length) +
+	    vec_len (sl0->rewrite_bsid);
+	  new_l1 =
+	    clib_net_to_host_u16 (ip1->payload_length) +
+	    vec_len (sl1->rewrite_bsid);
+	  new_l2 =
+	    clib_net_to_host_u16 (ip2->payload_length) +
+	    vec_len (sl2->rewrite_bsid);
+	  new_l3 =
+	    clib_net_to_host_u16 (ip3->payload_length) +
+	    vec_len (sl3->rewrite_bsid);
+
+	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
+	  ip1->payload_length = clib_host_to_net_u16 (new_l1);
+	  ip2->payload_length = clib_host_to_net_u16 (new_l2);
+	  ip3->payload_length = clib_host_to_net_u16 (new_l3);
+
+	  sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid);
+	  sr1 = ((void *) sr1) - vec_len (sl1->rewrite_bsid);
+	  sr2 = ((void *) sr2) - vec_len (sl2->rewrite_bsid);
+	  sr3 = ((void *) sr3) - vec_len (sl3->rewrite_bsid);
+
+	  ip0->dst_address.as_u64[0] =
+	    (sr0->segments + sr0->segments_left)->as_u64[0];
+	  ip0->dst_address.as_u64[1] =
+	    (sr0->segments + sr0->segments_left)->as_u64[1];
+	  ip1->dst_address.as_u64[0] =
+	    (sr1->segments + sr1->segments_left)->as_u64[0];
+	  ip1->dst_address.as_u64[1] =
+	    (sr1->segments + sr1->segments_left)->as_u64[1];
+	  ip2->dst_address.as_u64[0] =
+	    (sr2->segments + sr2->segments_left)->as_u64[0];
+	  ip2->dst_address.as_u64[1] =
+	    (sr2->segments + sr2->segments_left)->as_u64[1];
+	  ip3->dst_address.as_u64[0] =
+	    (sr3->segments + sr3->segments_left)->as_u64[0];
+	  ip3->dst_address.as_u64[1] =
+	    (sr3->segments + sr3->segments_left)->as_u64[1];
+
+	  ip6_ext_header_t *ip_ext;
+	  if (ip0 + 1 == (void *) sr0)
+	    {
+	      sr0->protocol = ip0->protocol;
+	      ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip_ext = (void *) (ip0 + 1);
+	      sr0->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  if (ip1 + 1 == (void *) sr1)
+	    {
+	      sr1->protocol = ip1->protocol;
+	      ip1->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip_ext = (void *) (ip2 + 1);
+	      sr2->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  if (ip2 + 1 == (void *) sr2)
+	    {
+	      sr2->protocol = ip2->protocol;
+	      ip2->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip_ext = (void *) (ip2 + 1);
+	      sr2->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  if (ip3 + 1 == (void *) sr3)
+	    {
+	      sr3->protocol = ip3->protocol;
+	      ip3->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip_ext = (void *) (ip3 + 1);
+	      sr3->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  insert_pkts += 4;
+
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+	    {
+	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b0, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b1, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b2, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b3, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+	    }
+
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, bi1, bi2, bi3,
+					   next0, next1, next2, next3);
+	}
+
+      /* Single loop for potentially the last three packets */
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  ip6_header_t *ip0 = 0;
+	  ip6_sr_header_t *sr0 = 0;
+	  ip6_sr_sl_t *sl0;
+	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+	  u16 new_l0 = 0;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite_bsid));
+
+	  ip0 = vlib_buffer_get_current (b0);
+
+	  if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+	    sr0 =
+	      (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+				   ip6_ext_header_len (ip0 + 1));
+	  else
+	    sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+	  clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0,
+		       (void *) sr0 - (void *) ip0);
+	  clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)),
+		       sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid));
+
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid));
+
+	  ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid);
+	  ip0->hop_limit -= 1;
+	  new_l0 =
+	    clib_net_to_host_u16 (ip0->payload_length) +
+	    vec_len (sl0->rewrite_bsid);
+	  ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+	  sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid);
+
+	  ip0->dst_address.as_u64[0] =
+	    (sr0->segments + sr0->segments_left)->as_u64[0];
+	  ip0->dst_address.as_u64[1] =
+	    (sr0->segments + sr0->segments_left)->as_u64[1];
+
+	  if (ip0 + 1 == (void *) sr0)
+	    {
+	      sr0->protocol = ip0->protocol;
+	      ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+	  else
+	    {
+	      ip6_ext_header_t *ip_ext = (void *) (ip0 + 1);
+	      sr0->protocol = ip_ext->next_hdr;
+	      ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+	    }
+
+	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      sr_policy_rewrite_trace_t *tr =
+		vlib_add_trace (vm, node, b0, sizeof (*tr));
+	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			   sizeof (tr->src.as_u8));
+	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			   sizeof (tr->dst.as_u8));
+	    }
+
+	  insert_pkts++;
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  /* Update counters */
+  vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+			       insert_pkts);
+  vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+			       bsid_pkts);
+  return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
+  .function = sr_policy_rewrite_b_insert,
+  .name = "sr-pl-rewrite-b-insert",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sr_policy_rewrite_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = SR_POLICY_REWRITE_N_ERROR,
+  .error_strings = sr_policy_rewrite_error_strings,
+  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+    foreach_sr_policy_rewrite_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Function BSID encapsulation
+ */
+static_always_inline void
+end_bsid_encaps_srh_processing (vlib_node_runtime_t * node,
+				vlib_buffer_t * b0,
+				ip6_header_t * ip0,
+				ip6_sr_header_t * sr0, u32 * next0)
+{
+  ip6_address_t *new_dst0;
+
+  if (PREDICT_FALSE (!sr0))
+    goto error_bsid_encaps;
+
+  if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
+    {
+      if (PREDICT_TRUE (sr0->segments_left != 0))
+	{
+	  sr0->segments_left -= 1;
+	  new_dst0 = (ip6_address_t *) (sr0->segments);
+	  new_dst0 += sr0->segments_left;
+	  ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+	  ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+	  return;
+	}
+    }
+
+error_bsid_encaps:
+  *next0 = SR_POLICY_REWRITE_NEXT_ERROR;
+  b0->error = node->errors[SR_POLICY_REWRITE_ERROR_BSID_ZERO];
+}
+
+/**
+ * @brief Graph node for applying a SR policy BSID - Encapsulation
+ */
+static uword
+sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
+			    vlib_frame_t * from_frame)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  u32 n_left_from, next_index, *from, *to_next;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  int encap_pkts = 0, bsid_pkts = 0;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      /* Quad - Loop */
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+	{
+	  u32 bi0, bi1, bi2, bi3;
+	  vlib_buffer_t *b0, *b1, *b2, *b3;
+	  u32 next0, next1, next2, next3;
+	  next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+	  ip6_header_t *ip0, *ip1, *ip2, *ip3;
+	  ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
+	  ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+	  ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
+	  ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
+	    p6 = vlib_get_buffer (vm, from[6]);
+	    p7 = vlib_get_buffer (vm, from[7]);
+
+	    /* Prefetch the buffer header and packet for the N+2 loop iteration */
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_header (p6, LOAD);
+	    vlib_prefetch_buffer_header (p7, LOAD);
+
+	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+	  }
+
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
+
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  sl1 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+	  sl2 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+	  sl3 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+	  ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl1->rewrite));
+	  ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl2->rewrite));
+	  ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl3->rewrite));
+
+	  ip0_encap = vlib_buffer_get_current (b0);
+	  ip1_encap = vlib_buffer_get_current (b1);
+	  ip2_encap = vlib_buffer_get_current (b2);
+	  ip3_encap = vlib_buffer_get_current (b3);
+
+	  ip6_ext_header_find_t (ip0_encap, prev0, sr0,
+				 IP_PROTOCOL_IPV6_ROUTE);
+	  ip6_ext_header_find_t (ip1_encap, prev1, sr1,
+				 IP_PROTOCOL_IPV6_ROUTE);
+	  ip6_ext_header_find_t (ip2_encap, prev2, sr2,
+				 IP_PROTOCOL_IPV6_ROUTE);
+	  ip6_ext_header_find_t (ip3_encap, prev3, sr3,
+				 IP_PROTOCOL_IPV6_ROUTE);
+
+	  end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
+	  end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1);
+	  end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2);
+	  end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3);
+
+	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+		       sl0->rewrite, vec_len (sl0->rewrite));
+	  clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
+		       sl1->rewrite, vec_len (sl1->rewrite));
+	  clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
+		       sl2->rewrite, vec_len (sl2->rewrite));
+	  clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
+		       sl3->rewrite, vec_len (sl3->rewrite));
+
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+	  vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+	  vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+	  vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+	  ip1 = vlib_buffer_get_current (b1);
+	  ip2 = vlib_buffer_get_current (b2);
+	  ip3 = vlib_buffer_get_current (b3);
+
+	  encaps_processing_v6 (node, b0, ip0, ip0_encap);
+	  encaps_processing_v6 (node, b1, ip1, ip1_encap);
+	  encaps_processing_v6 (node, b2, ip2, ip2_encap);
+	  encaps_processing_v6 (node, b3, ip3, ip3_encap);
+
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+	    {
+	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b0, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b1, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b2, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+
+	      if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  sr_policy_rewrite_trace_t *tr =
+		    vlib_add_trace (vm, node, b3, sizeof (*tr));
+		  clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+			       sizeof (tr->src.as_u8));
+		  clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+			       sizeof (tr->dst.as_u8));
+		}
+	    }
+
+	  encap_pkts += 4;
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, bi1, bi2, bi3,
+					   next0, next1, next2, next3);
+	}
+
+      /* Single loop for potentially the last three packets */
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  ip6_header_t *ip0 = 0, *ip0_encap = 0;
+	  ip6_ext_header_t *prev0;
+	  ip6_sr_header_t *sr0;
+	  ip6_sr_sl_t *sl0;
+	  u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  sl0 =
+	    pool_elt_at_index (sm->sid_lists,
+			       vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+	  ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+		  vec_len (sl0->rewrite));
+
+	  ip0_encap = vlib_buffer_get_current (b0);
+	  ip6_ext_header_find_t (ip0_encap, prev0, sr0,
+				 IP_PROTOCOL_IPV6_ROUTE);
+	  end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
+
+	  clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+		       sl0->rewrite, vec_len (sl0->rewrite));
+	  vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+	  ip0 = vlib_buffer_get_current (b0);
+
+	  encaps_processing_v6 (node, b0, ip0, ip0_encap);
+
+	  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+	      PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      sr_policy_rewrite_trace_t *tr =
+		vlib_add_trace (vm, node, b0, sizeof (*tr));
+	      clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+			   sizeof (tr->src.as_u8));
+	      clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+			   sizeof (tr->dst.as_u8));
+	    }
+
+	  encap_pkts++;
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  /* Update counters */
+  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+			       encap_pkts);
+  vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+			       SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+			       bsid_pkts);
+
+  return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
+  .function = sr_policy_rewrite_b_encaps,
+  .name = "sr-pl-rewrite-b-encaps",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sr_policy_rewrite_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = SR_POLICY_REWRITE_N_ERROR,
+  .error_strings = sr_policy_rewrite_error_strings,
+  .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+    foreach_sr_policy_rewrite_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+/*************************** SR Segment Lists DPOs ****************************/
+static u8 *
+format_sr_segment_list_dpo (u8 * s, va_list * args)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  ip6_address_t *addr;
+  ip6_sr_sl_t *sl;
+
+  index_t index = va_arg (*args, index_t);
+  CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+  s = format (s, "SR: Segment List index:[%d]", index);
+  s = format (s, "\n\tSegments:");
+
+  sl = pool_elt_at_index (sm->sid_lists, index);
+
+  s = format (s, "< ");
+  vec_foreach (addr, sl->segments)
+  {
+    s = format (s, "%U, ", format_ip6_address, addr);
+  }
+  s = format (s, "\b\b > - ");
+  s = format (s, "Weight: %u", sl->weight);
+
+  return s;
+}
+
+const static dpo_vft_t sr_policy_rewrite_vft = {
+  .dv_lock = sr_dpo_lock,
+  .dv_unlock = sr_dpo_unlock,
+  .dv_format = format_sr_segment_list_dpo,
+};
+
+const static char *const sr_pr_encaps_ip6_nodes[] = {
+  "sr-pl-rewrite-encaps",
+  NULL,
+};
+
+const static char *const sr_pr_encaps_ip4_nodes[] = {
+  "sr-pl-rewrite-encaps-v4",
+  NULL,
+};
+
+const static char *const *const sr_pr_encaps_nodes[DPO_PROTO_NUM] = {
+  [DPO_PROTO_IP6] = sr_pr_encaps_ip6_nodes,
+  [DPO_PROTO_IP4] = sr_pr_encaps_ip4_nodes,
+};
+
+const static char *const sr_pr_insert_ip6_nodes[] = {
+  "sr-pl-rewrite-insert",
+  NULL,
+};
+
+const static char *const *const sr_pr_insert_nodes[DPO_PROTO_NUM] = {
+  [DPO_PROTO_IP6] = sr_pr_insert_ip6_nodes,
+};
+
+const static char *const sr_pr_bsid_insert_ip6_nodes[] = {
+  "sr-pl-rewrite-b-insert",
+  NULL,
+};
+
+const static char *const *const sr_pr_bsid_insert_nodes[DPO_PROTO_NUM] = {
+  [DPO_PROTO_IP6] = sr_pr_bsid_insert_ip6_nodes,
+};
+
+const static char *const sr_pr_bsid_encaps_ip6_nodes[] = {
+  "sr-pl-rewrite-b-encaps",
+  NULL,
+};
+
+const static char *const *const sr_pr_bsid_encaps_nodes[DPO_PROTO_NUM] = {
+  [DPO_PROTO_IP6] = sr_pr_bsid_encaps_ip6_nodes,
+};
+
+/********************* SR Policy Rewrite initialization ***********************/
+/**
+ * @brief SR Policy Rewrite initialization
+ */
+clib_error_t *
+sr_policy_rewrite_init (vlib_main_t * vm)
+{
+  ip6_sr_main_t *sm = &sr_main;
+
+  /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
+  mhash_init (&sm->sr_policies_index_hash, sizeof (uword),
+	      sizeof (ip6_address_t));
+
+  /* Init SR VPO DPOs type */
+  sr_pr_encaps_dpo_type =
+    dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_encaps_nodes);
+
+  sr_pr_insert_dpo_type =
+    dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_insert_nodes);
+
+  sr_pr_bsid_encaps_dpo_type =
+    dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_encaps_nodes);
+
+  sr_pr_bsid_insert_dpo_type =
+    dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_insert_nodes);
+
+  /* Register the L2 encaps node used in HW redirect */
+  sm->l2_sr_policy_rewrite_index = sr_policy_rewrite_encaps_node.index;
+
+  sm->fib_table_ip6 = (u32) ~ 0;
+  sm->fib_table_ip4 = (u32) ~ 0;
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_policy_rewrite_init);
+
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c
new file mode 100755
index 00000000..a7903751
--- /dev/null
+++ b/src/vnet/srv6/sr_steering.c
@@ -0,0 +1,573 @@
+/*
+ * sr_steering.c: ipv6 segment routing steering into SR policy
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Packet steering into SR Policies
+ *
+ * This file is in charge of handling the FIB appropiatly to steer packets
+ * through SR Policies as defined in 'sr_policy_rewrite.c'. Notice that here
+ * we are only doing steering. SR policy application is done in
+ * sr_policy_rewrite.c
+ *
+ * Supports:
+ *  - Steering of IPv6 traffic Destination Address based
+ *  - Steering of IPv4 traffic Destination Address based
+ *  - Steering of L2 frames, interface based (sw interface)
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief Steer traffic L2 and L3 traffic through a given SR policy
+ *
+ * @param is_del
+ * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ * @param sr_policy is the index of the SR Policy (alt to bsid)
+ * @param table_id is the VRF where to install the FIB entry for the BSID
+ * @param prefix is the IPv4/v6 address for L3 traffic type
+ * @param mask_width is the mask for L3 traffic type
+ * @param sw_if_index is the incoming interface for L2 traffic
+ * @param traffic_type describes the type of traffic
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
+		    u32 table_id, ip46_address_t * prefix, u32 mask_width,
+		    u32 sw_if_index, u8 traffic_type)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  sr_steering_key_t key;
+  ip6_sr_steering_policy_t *steer_pl;
+  fib_prefix_t pfx = { 0 };
+
+  ip6_sr_policy_t *sr_policy = 0;
+  uword *p = 0;
+
+  memset (&key, 0, sizeof (sr_steering_key_t));
+
+  /* Compute the steer policy key */
+  if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+    {
+      key.l3.prefix.as_u64[0] = prefix->as_u64[0];
+      key.l3.prefix.as_u64[1] = prefix->as_u64[1];
+      key.l3.mask_width = mask_width;
+      key.l3.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+    }
+  else if (traffic_type == SR_STEER_L2)
+    {
+      key.l2.sw_if_index = sw_if_index;
+
+      /* Sanitise the SW_IF_INDEX */
+      if (pool_is_free_index (sm->vnet_main->interface_main.sw_interfaces,
+			      sw_if_index))
+	return -3;
+
+      vnet_sw_interface_t *sw =
+	vnet_get_sw_interface (sm->vnet_main, sw_if_index);
+      if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+	return -3;
+    }
+  else
+    return -1;
+
+  key.traffic_type = traffic_type;
+
+  /* Search for the item */
+  p = mhash_get (&sm->sr_steer_policies_hash, &key);
+
+  if (p)
+    {
+      /* Retrieve Steer Policy function */
+      steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
+
+      if (is_del)
+	{
+	  if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+	    {
+	      /* Remove FIB entry */
+	      pfx.fp_proto = FIB_PROTOCOL_IP6;
+	      pfx.fp_len = steer_pl->classify.l3.mask_width;
+	      pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
+
+	      fib_table_entry_delete (fib_table_find
+				      (FIB_PROTOCOL_IP6,
+				       steer_pl->classify.l3.fib_table),
+				      &pfx, FIB_SOURCE_SR);
+	    }
+	  else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+	    {
+	      /* Remove FIB entry */
+	      pfx.fp_proto = FIB_PROTOCOL_IP4;
+	      pfx.fp_len = steer_pl->classify.l3.mask_width;
+	      pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
+
+	      fib_table_entry_delete (fib_table_find
+				      (FIB_PROTOCOL_IP4,
+				       steer_pl->classify.l3.fib_table), &pfx,
+				      FIB_SOURCE_SR);
+	    }
+	  else if (steer_pl->classify.traffic_type == SR_STEER_L2)
+	    {
+	      /* Remove HW redirection */
+	      vnet_feature_enable_disable ("device-input",
+					   "sr-policy-rewrite-encaps-l2",
+					   sw_if_index, 0, 0, 0);
+	      sm->sw_iface_sr_policies[sw_if_index] = ~(u32) 0;
+
+	      /* Remove promiscous mode from interface */
+	      vnet_main_t *vnm = vnet_get_main ();
+	      ethernet_main_t *em = &ethernet_main;
+	      ethernet_interface_t *eif =
+		ethernet_get_interface (em, sw_if_index);
+
+	      if (!eif)
+		goto cleanup_error_redirection;
+
+	      ethernet_set_flags (vnm, sw_if_index, 0);
+	    }
+
+	  /* Delete SR steering policy entry */
+	  pool_put (sm->steer_policies, steer_pl);
+	  mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+
+	  /* If no more SR policies or steering policies */
+	  if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
+	    {
+	      fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6);
+	      fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6);
+	      sm->fib_table_ip6 = (u32) ~ 0;
+	      sm->fib_table_ip4 = (u32) ~ 0;
+	    }
+
+	  return 1;
+	}
+      else			/* It means user requested to update an existing SR steering policy */
+	{
+	  /* Retrieve SR steering policy */
+	  if (bsid)
+	    {
+	      p = mhash_get (&sm->sr_policies_index_hash, bsid);
+	      if (p)
+		sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+	      else
+		return -2;
+	    }
+	  else
+	    sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+	  if (!sr_policy)
+	    return -2;
+
+	  steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+	  /* Remove old FIB/hw redirection and create a new one */
+	  if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+	    {
+	      /* Remove FIB entry */
+	      pfx.fp_proto = FIB_PROTOCOL_IP6;
+	      pfx.fp_len = steer_pl->classify.l3.mask_width;
+	      pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
+
+	      fib_table_entry_delete (fib_table_find
+				      (FIB_PROTOCOL_IP6,
+				       steer_pl->classify.l3.fib_table),
+				      &pfx, FIB_SOURCE_SR);
+
+	      /* Create a new one */
+	      goto update_fib;
+	    }
+	  else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+	    {
+	      /* Remove FIB entry */
+	      pfx.fp_proto = FIB_PROTOCOL_IP4;
+	      pfx.fp_len = steer_pl->classify.l3.mask_width;
+	      pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
+
+	      fib_table_entry_delete (fib_table_find
+				      (FIB_PROTOCOL_IP4,
+				       steer_pl->classify.l3.fib_table),
+				      &pfx, FIB_SOURCE_SR);
+
+	      /* Create a new one */
+	      goto update_fib;
+	    }
+	  else if (steer_pl->classify.traffic_type == SR_STEER_L2)
+	    {
+	      /* Update L2-HW redirection */
+	      goto update_fib;
+	    }
+	}
+    }
+  else
+    /* delete; steering policy does not exist; complain */
+  if (is_del)
+    return -4;
+
+  /* Retrieve SR policy */
+  if (bsid)
+    {
+      p = mhash_get (&sm->sr_policies_index_hash, bsid);
+      if (p)
+	sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+      else
+	return -2;
+    }
+  else
+    sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+  /* Create a new steering policy */
+  pool_get (sm->steer_policies, steer_pl);
+  memset (steer_pl, 0, sizeof (*steer_pl));
+
+  if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+    {
+      clib_memcpy (&steer_pl->classify.l3.prefix, prefix,
+		   sizeof (ip46_address_t));
+      steer_pl->classify.l3.mask_width = mask_width;
+      steer_pl->classify.l3.fib_table =
+	(table_id != (u32) ~ 0 ? table_id : 0);
+      steer_pl->classify.traffic_type = traffic_type;
+    }
+  else if (traffic_type == SR_STEER_L2)
+    {
+      steer_pl->classify.l2.sw_if_index = sw_if_index;
+      steer_pl->classify.traffic_type = traffic_type;
+    }
+  else
+    {
+      /* Incorrect API usage. Should never get here */
+      pool_put (sm->steer_policies, steer_pl);
+      mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+      return -1;
+    }
+  steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+  /* Create and store key */
+  mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies,
+	     NULL);
+
+  if (traffic_type == SR_STEER_L2)
+    {
+      if (!sr_policy->is_encap)
+	goto cleanup_error_encap;
+
+      if (vnet_feature_enable_disable
+	  ("device-input", "sr-pl-rewrite-encaps-l2", sw_if_index, 1, 0, 0))
+	goto cleanup_error_redirection;
+
+      /* Set promiscous mode on interface */
+      vnet_main_t *vnm = vnet_get_main ();
+      ethernet_main_t *em = &ethernet_main;
+      ethernet_interface_t *eif = ethernet_get_interface (em, sw_if_index);
+
+      if (!eif)
+	goto cleanup_error_redirection;
+
+      ethernet_set_flags (vnm, sw_if_index,
+			  ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+    }
+  else if (traffic_type == SR_STEER_IPV4)
+    if (!sr_policy->is_encap)
+      goto cleanup_error_encap;
+
+update_fib:
+  /* FIB API calls - Recursive route through the BindingSID */
+  if (traffic_type == SR_STEER_IPV6)
+    {
+      pfx.fp_proto = FIB_PROTOCOL_IP6;
+      pfx.fp_len = steer_pl->classify.l3.mask_width;
+      pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
+
+      fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP6,
+						(table_id !=
+						 (u32) ~ 0 ?
+						 table_id : 0)),
+				&pfx, FIB_SOURCE_SR,
+				FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+				FIB_PROTOCOL_IP6,
+				(ip46_address_t *) & sr_policy->bsid, ~0,
+				sm->fib_table_ip6, 1, NULL,
+				FIB_ROUTE_PATH_FLAG_NONE);
+    }
+  else if (traffic_type == SR_STEER_IPV4)
+    {
+      pfx.fp_proto = FIB_PROTOCOL_IP4;
+      pfx.fp_len = steer_pl->classify.l3.mask_width;
+      pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
+
+      fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP4,
+						(table_id !=
+						 (u32) ~ 0 ?
+						 table_id : 0)),
+				&pfx, FIB_SOURCE_SR,
+				FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+				FIB_PROTOCOL_IP6,
+				(ip46_address_t *) & sr_policy->bsid, ~0,
+				sm->fib_table_ip4, 1, NULL,
+				FIB_ROUTE_PATH_FLAG_NONE);
+    }
+  else if (traffic_type == SR_STEER_L2)
+    {
+      if (sw_if_index < vec_len (sm->sw_iface_sr_policies))
+	sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy;
+      else
+	{
+	  vec_resize (sm->sw_iface_sr_policies,
+		      (pool_len (sm->vnet_main->interface_main.sw_interfaces)
+		       - vec_len (sm->sw_iface_sr_policies)));
+	  sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy;
+	}
+    }
+
+  return 0;
+
+cleanup_error_encap:
+  pool_put (sm->steer_policies, steer_pl);
+  mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+  return -5;
+
+cleanup_error_redirection:
+  pool_put (sm->steer_policies, steer_pl);
+  mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+  return -3;
+}
+
+static clib_error_t *
+sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+			    vlib_cli_command_t * cmd)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+
+  int is_del = 0;
+
+  ip46_address_t prefix;
+  u32 dst_mask_width = 0;
+  u32 sw_if_index = (u32) ~ 0;
+  u8 traffic_type = 0;
+  u32 fib_table = (u32) ~ 0;
+
+  ip6_address_t bsid;
+  u32 sr_policy_index = (u32) ~ 0;
+
+  u8 sr_policy_set = 0;
+
+  memset (&prefix, 0, sizeof (ip46_address_t));
+
+  int rv;
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "del"))
+	is_del = 1;
+      else if (!traffic_type
+	       && unformat (input, "l3 %U/%d", unformat_ip6_address,
+			    &prefix.ip6, &dst_mask_width))
+	traffic_type = SR_STEER_IPV6;
+      else if (!traffic_type
+	       && unformat (input, "l3 %U/%d", unformat_ip4_address,
+			    &prefix.ip4, &dst_mask_width))
+	traffic_type = SR_STEER_IPV4;
+      else if (!traffic_type
+	       && unformat (input, "l2 %U", unformat_vnet_sw_interface, vnm,
+			    &sw_if_index))
+	traffic_type = SR_STEER_L2;
+      else if (!sr_policy_set
+	       && unformat (input, "via sr policy index %d",
+			    &sr_policy_index))
+	sr_policy_set = 1;
+      else if (!sr_policy_set
+	       && unformat (input, "via sr policy bsid %U",
+			    unformat_ip6_address, &bsid))
+	sr_policy_set = 1;
+      else if (fib_table == (u32) ~ 0
+	       && unformat (input, "fib-table %d", &fib_table));
+      else
+	break;
+    }
+
+  if (!traffic_type)
+    return clib_error_return (0, "No L2/L3 traffic specified");
+  if (!sr_policy_set)
+    return clib_error_return (0, "No SR policy specified");
+
+  /* Make sure that the prefixes are clean */
+  if (traffic_type == SR_STEER_IPV4)
+    {
+      u32 mask =
+	(dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0);
+      prefix.ip4.as_u32 &= mask;
+    }
+  else if (traffic_type == SR_STEER_IPV6)
+    {
+      ip6_address_t mask;
+      ip6_address_mask_from_width (&mask, dst_mask_width);
+      ip6_address_mask (&prefix.ip6, &mask);
+    }
+
+  rv =
+    sr_steering_policy (is_del, (sr_policy_index == ~(u32) 0 ? &bsid : NULL),
+			sr_policy_index, fib_table, &prefix, dst_mask_width,
+			sw_if_index, traffic_type);
+
+  switch (rv)
+    {
+    case 0:
+      break;
+    case 1:
+      return 0;
+    case -1:
+      return clib_error_return (0, "Incorrect API usage.");
+    case -2:
+      return clib_error_return (0,
+				"The requested SR policy could not be located. Review the BSID/index.");
+    case -3:
+      return clib_error_return (0,
+				"Unable to do SW redirect. Incorrect interface.");
+    case -4:
+      return clib_error_return (0,
+				"The requested SR steering policy could not be deleted.");
+    case -5:
+      return clib_error_return (0,
+				"The SR policy is not an encapsulation one.");
+    default:
+      return clib_error_return (0, "BUG: sr steer policy returns %d", rv);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
+  .path = "sr steer",
+  .short_help = "sr steer (del) [l3 <ip_addr/mask>|l2 <sf_if>]"
+    "via sr policy [index <sr_policy_index>|bsid <bsid_ip6_addr>]"
+    "(fib-table <fib_table_index>)",
+  .long_help =
+    "\tSteer a L2 or L3 traffic through an existing SR policy.\n"
+    "\tExamples:\n"
+    "\t\tsr steer l3 2001::/64 via sr_policy index 5\n"
+    "\t\tsr steer l3 2001::/64 via sr_policy bsid 2010::9999:1\n"
+    "\t\tsr steer l2 GigabitEthernet0/5/0 via sr_policy index 5\n"
+    "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n",
+  .function = sr_steer_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_sr_steering_policies_command_fn (vlib_main_t * vm,
+				      unformat_input_t * input,
+				      vlib_cli_command_t * cmd)
+{
+  ip6_sr_main_t *sm = &sr_main;
+  ip6_sr_steering_policy_t **steer_policies = 0;
+  ip6_sr_steering_policy_t *steer_pl;
+
+  vnet_main_t *vnm = vnet_get_main ();
+
+  ip6_sr_policy_t *pl = 0;
+  int i;
+
+  vlib_cli_output (vm, "SR steering policies:");
+  /* *INDENT-OFF* */
+  pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);}));
+  /* *INDENT-ON* */
+  vlib_cli_output (vm, "Traffic\t\tSR policy BSID");
+  for (i = 0; i < vec_len (steer_policies); i++)
+    {
+      steer_pl = steer_policies[i];
+      pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy);
+      if (steer_pl->classify.traffic_type == SR_STEER_L2)
+	{
+	  vlib_cli_output (vm, "L2 %U\t%U",
+			   format_vnet_sw_if_index_name, vnm,
+			   steer_pl->classify.l2.sw_if_index,
+			   format_ip6_address, &pl->bsid);
+	}
+      else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+	{
+	  vlib_cli_output (vm, "L3 %U/%d\t%U",
+			   format_ip4_address,
+			   &steer_pl->classify.l3.prefix.ip4,
+			   steer_pl->classify.l3.mask_width,
+			   format_ip6_address, &pl->bsid);
+	}
+      else if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+	{
+	  vlib_cli_output (vm, "L3 %U/%d\t%U",
+			   format_ip6_address,
+			   &steer_pl->classify.l3.prefix.ip6,
+			   steer_pl->classify.l3.mask_width,
+			   format_ip6_address, &pl->bsid);
+	}
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = {
+  .path = "show sr steering policies",
+  .short_help = "show sr steering policies",
+  .function = show_sr_steering_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+sr_steering_init (vlib_main_t * vm)
+{
+  ip6_sr_main_t *sm = &sr_main;
+
+  /* Init memory for function keys */
+  mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
+	      sizeof (sr_steering_key_t));
+
+  sm->sw_iface_sr_policies = 0;
+
+  sm->vnet_main = vnet_get_main ();
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_INIT_FUNCTION (sr_steering_init);
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) =
+{
+  .arc_name = "device-input",
+  .node_name = "sr-pl-rewrite-encaps-l2",
+  .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+/* *INDENT-ON* */
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr_steering.md b/src/vnet/srv6/sr_steering.md
new file mode 100644
index 00000000..cf446f81
--- /dev/null
+++ b/src/vnet/srv6/sr_steering.md
@@ -0,0 +1,11 @@
+# Steering packets into a SR Policy     {#srv6_steering_doc}
+
+To steer packets in Transit into an SR policy (T.Insert, T.Encaps and T.Encaps.L2 behaviors), the user needs to create an 'sr steering policy'.
+
+    sr steer l3 2001::/64 via sr policy index 1
+    sr steer l3 2001::/64 via sr policy bsid cafe::1
+    sr steer l3 2001::/64 via sr policy bsid cafe::1 fib-table 3
+    sr steer l3 10.0.0.0/16 via sr policy bsid cafe::1
+    sr steer l2 TenGE0/1/0 via sr policy bsid cafe::1
+
+Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy. Notice that creating an SR steering policy for L2 frames will actually automatically *put the interface into promiscous mode*.
diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h
index 9d3abae5..566e22ec 100644
--- a/src/vnet/vnet_all_api_h.h
+++ b/src/vnet/vnet_all_api_h.h
@@ -50,7 +50,7 @@
 #include <vnet/lisp-cp/one.api.h>
 #include <vnet/session/session.api.h>
 #include <vnet/mpls/mpls.api.h>
-#include <vnet/sr/sr.api.h>
+#include <vnet/srv6/sr.api.h>
 #include <vnet/classify/classify.api.h>
 #include <vnet/flow/flow.api.h>
 #include <vnet/dhcp/dhcp.api.h>
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
index baf45d5c..16d51225 100644
--- a/src/vpp/api/api.c
+++ b/src/vpp/api/api.c
@@ -53,7 +53,7 @@
 #include <vnet/ip/ip6.h>
 #include <vnet/ip/ip6_neighbor.h>
 #if WITH_LIBSSL > 0
-#include <vnet/sr/sr.h>
+#include <vnet/srv6/sr.h>
 #endif
 #include <vlib/vlib.h>
 #include <vlib/unix/unix.h>
diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c
index 000fe0d4..107e83f3 100644
--- a/src/vpp/api/custom_dump.c
+++ b/src/vpp/api/custom_dump.c
@@ -24,7 +24,7 @@
 #include <vnet/dhcp/dhcp_proxy.h>
 #include <vnet/l2tp/l2tp.h>
 #include <vnet/l2/l2_input.h>
-#include <vnet/sr/sr.h>
+#include <vnet/srv6/sr.h>
 #include <vnet/vxlan-gpe/vxlan_gpe.h>
 #include <vnet/classify/policer_classify.h>
 #include <vnet/policer/xlate.h>
diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api
index 7c07c822..99ae4784 100644
--- a/src/vpp/api/vpe.api
+++ b/src/vpp/api/vpe.api
@@ -40,7 +40,7 @@
  * LISP-GPE APIs: see .../src/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c}
  * SESSION APIs: .../vnet/session/{session.api session_api.c}
  * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c}
- * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c}
+ * SR APIs: see .../src/vnet/srv6/{sr.api, sr_api.c}
  * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c}
  * FLOW APIs: see ... /src/vnet/flow/{flow.api, flow_api.c}
  * DHCP APIs: see ... /src/vnet/dhcp/{dhcpk.api, dhcp_api.c}
-- 
cgit 1.2.3-korg


From f55f9b851f59264d737d92c6277a87588c565d24 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Wed, 10 May 2017 21:06:28 +0200
Subject: completelly deprecate os_get_cpu_number, replace new occurences

Change-Id: I82c663bc0866c6c68ba354104b0bb059387f4b9d
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/plugins/flowperpkt/l2_node.c       | 20 ++++++++++----------
 src/plugins/flowperpkt/node.c          | 20 ++++++++++----------
 src/plugins/snat/in2out.c              |  2 +-
 src/plugins/snat/out2in.c              |  2 +-
 src/vlib/main.h                        |  2 +-
 src/vlib/threads.c                     | 12 ++----------
 src/vlib/threads.h                     |  3 +--
 src/vlib/unix/main.c                   |  2 +-
 src/vlibmemory/memory_vlib.c           |  2 +-
 src/vnet/dpo/interface_dpo.c           |  8 ++++----
 src/vnet/lisp-gpe/lisp_gpe_adjacency.c |  2 +-
 src/vppinfra/bihash_template.c         | 16 ++++++++--------
 src/vppinfra/lock.h                    |  6 +++---
 src/vppinfra/mem.h                     |  6 +++---
 src/vppinfra/mhash.c                   |  2 +-
 src/vppinfra/mhash.h                   |  2 +-
 src/vppinfra/mheap.c                   |  4 ++--
 src/vppinfra/os.h                      | 20 ++++++++++++++++++--
 src/vppinfra/smp.c                     |  2 +-
 src/vppinfra/unix-misc.c               | 19 +++++++------------
 20 files changed, 77 insertions(+), 75 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/plugins/flowperpkt/l2_node.c b/src/plugins/flowperpkt/l2_node.c
index fdaf81d1..db80e990 100644
--- a/src/plugins/flowperpkt/l2_node.c
+++ b/src/plugins/flowperpkt/l2_node.c
@@ -102,7 +102,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 		       u8 * src_mac, u8 * dst_mac,
 		       u16 ethertype, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->thread_index;
+  u32 my_thread_index = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
@@ -116,7 +116,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
   vlib_buffer_free_list_t *fl;
 
   /* Find or allocate a buffer */
-  b0 = fm->l2_buffers_per_worker[my_cpu_number];
+  b0 = fm->l2_buffers_per_worker[my_thread_index];
 
   /* Need to allocate a buffer? */
   if (PREDICT_FALSE (b0 == 0))
@@ -130,7 +130,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 	return;
 
       /* Initialize the buffer */
-      b0 = fm->l2_buffers_per_worker[my_cpu_number] =
+      b0 = fm->l2_buffers_per_worker[my_thread_index] =
 	vlib_get_buffer (vm, bi0);
       fl =
 	vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
@@ -142,16 +142,16 @@ add_to_flow_record_l2 (vlib_main_t * vm,
     {
       /* use the current buffer */
       bi0 = vlib_get_buffer_index (vm, b0);
-      offset = fm->l2_next_record_offset_per_worker[my_cpu_number];
+      offset = fm->l2_next_record_offset_per_worker[my_thread_index];
     }
 
   /* Find or allocate a frame */
-  f = fm->l2_frames_per_worker[my_cpu_number];
+  f = fm->l2_frames_per_worker[my_thread_index];
   if (PREDICT_FALSE (f == 0))
     {
       u32 *to_next;
       f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
-      fm->l2_frames_per_worker[my_cpu_number] = f;
+      fm->l2_frames_per_worker[my_thread_index] = f;
 
       /* Enqueue the buffer */
       to_next = vlib_frame_vector_args (f);
@@ -299,13 +299,13 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 	}
 
       vlib_put_frame_to_node (vm, ip4_lookup_node.index,
-			      fm->l2_frames_per_worker[my_cpu_number]);
-      fm->l2_frames_per_worker[my_cpu_number] = 0;
-      fm->l2_buffers_per_worker[my_cpu_number] = 0;
+			      fm->l2_frames_per_worker[my_thread_index]);
+      fm->l2_frames_per_worker[my_thread_index] = 0;
+      fm->l2_buffers_per_worker[my_thread_index] = 0;
       offset = 0;
     }
 
-  fm->l2_next_record_offset_per_worker[my_cpu_number] = offset;
+  fm->l2_next_record_offset_per_worker[my_thread_index] = offset;
 }
 
 void
diff --git a/src/plugins/flowperpkt/node.c b/src/plugins/flowperpkt/node.c
index 0277682d..9bac4166 100644
--- a/src/plugins/flowperpkt/node.c
+++ b/src/plugins/flowperpkt/node.c
@@ -101,7 +101,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 			 u32 src_address, u32 dst_address,
 			 u8 tos, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->thread_index;
+  u32 my_thread_index = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
@@ -115,7 +115,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
   vlib_buffer_free_list_t *fl;
 
   /* Find or allocate a buffer */
-  b0 = fm->ipv4_buffers_per_worker[my_cpu_number];
+  b0 = fm->ipv4_buffers_per_worker[my_thread_index];
 
   /* Need to allocate a buffer? */
   if (PREDICT_FALSE (b0 == 0))
@@ -129,7 +129,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 	return;
 
       /* Initialize the buffer */
-      b0 = fm->ipv4_buffers_per_worker[my_cpu_number] =
+      b0 = fm->ipv4_buffers_per_worker[my_thread_index] =
 	vlib_get_buffer (vm, bi0);
       fl =
 	vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
@@ -141,16 +141,16 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
     {
       /* use the current buffer */
       bi0 = vlib_get_buffer_index (vm, b0);
-      offset = fm->ipv4_next_record_offset_per_worker[my_cpu_number];
+      offset = fm->ipv4_next_record_offset_per_worker[my_thread_index];
     }
 
   /* Find or allocate a frame */
-  f = fm->ipv4_frames_per_worker[my_cpu_number];
+  f = fm->ipv4_frames_per_worker[my_thread_index];
   if (PREDICT_FALSE (f == 0))
     {
       u32 *to_next;
       f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
-      fm->ipv4_frames_per_worker[my_cpu_number] = f;
+      fm->ipv4_frames_per_worker[my_thread_index] = f;
 
       /* Enqueue the buffer */
       to_next = vlib_frame_vector_args (f);
@@ -300,13 +300,13 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 	}
 
       vlib_put_frame_to_node (vm, ip4_lookup_node.index,
-			      fm->ipv4_frames_per_worker[my_cpu_number]);
-      fm->ipv4_frames_per_worker[my_cpu_number] = 0;
-      fm->ipv4_buffers_per_worker[my_cpu_number] = 0;
+			      fm->ipv4_frames_per_worker[my_thread_index]);
+      fm->ipv4_frames_per_worker[my_thread_index] = 0;
+      fm->ipv4_buffers_per_worker[my_thread_index] = 0;
       offset = 0;
     }
 
-  fm->ipv4_next_record_offset_per_worker[my_cpu_number] = offset;
+  fm->ipv4_next_record_offset_per_worker[my_thread_index] = offset;
 }
 
 void
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c
index f7d29c69..bc86a7a4 100644
--- a/src/plugins/snat/in2out.c
+++ b/src/plugins/snat/in2out.c
@@ -1514,7 +1514,7 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
   u32 now = (u32) vlib_time_now (vm);
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c
index 3d7b106a..824406ab 100644
--- a/src/plugins/snat/out2in.c
+++ b/src/plugins/snat/out2in.c
@@ -1168,7 +1168,7 @@ snat_det_out2in_node_fn (vlib_main_t * vm,
   snat_out2in_next_t next_index;
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 329bf073..0e8026d1 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -320,7 +320,7 @@ always_inline void vlib_set_queue_signal_callback
 /* Main routine. */
 int vlib_main (vlib_main_t * vm, unformat_input_t * input);
 
-/* Thread stacks, for os_get_cpu_number */
+/* Thread stacks, for os_get_thread_index */
 extern u8 **vlib_thread_stacks;
 
 /* Number of thread stacks that the application needs */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 9ccfd3a2..b7bc9e26 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,16 +35,8 @@ vl (void *p)
 vlib_worker_thread_t *vlib_worker_threads;
 vlib_thread_main_t vlib_thread_main;
 
-__thread uword vlib_thread_index = 0;
-
-uword
-os_get_cpu_number (void)
-{
-  return vlib_thread_index;
-}
-
 uword
-os_get_ncpus (void)
+os_get_nthreads (void)
 {
   u32 len;
 
@@ -467,7 +459,7 @@ vlib_worker_thread_bootstrap_fn (void *arg)
   w->lwp = syscall (SYS_gettid);
   w->thread_id = pthread_self ();
 
-  vlib_thread_index = w - vlib_worker_threads;
+  __os_thread_index = w - vlib_worker_threads;
 
   rv = (void *) clib_calljmp
     ((uword (*)(uword)) w->thread_function,
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 101d3d4a..17d35a24 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -181,11 +181,10 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 
-extern __thread uword vlib_thread_index;
 static_always_inline uword
 vlib_get_thread_index (void)
 {
-  return vlib_thread_index;
+  return __os_thread_index;
 }
 
 always_inline void
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index db5ddd64..103576db 100644
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -565,7 +565,7 @@ vlib_unix_main (int argc, char *argv[])
 
   vlib_thread_stack_init (0);
 
-  vlib_thread_index = 0;
+  __os_thread_index = 0;
 
   i = clib_calljmp (thread0, (uword) vm,
 		    (void *) (vlib_thread_stacks[0] +
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index acba8b3f..e5d88732 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1333,7 +1333,7 @@ vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
   unix_shared_memory_queue_t *q;
 
   /* Main thread: call the function directly */
-  if (os_get_cpu_number () == 0)
+  if (vlib_get_thread_index () == 0)
     {
       vlib_main_t *vm = vlib_get_main ();
       void (*call_fp) (void *);
diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c
index 50ca756f..8d700c23 100644
--- a/src/vnet/dpo/interface_dpo.c
+++ b/src/vnet/dpo/interface_dpo.c
@@ -231,7 +231,7 @@ interface_dpo_inline (vlib_main_t * vm,
                       vlib_frame_t * from_frame)
 {
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index ();
     vnet_interface_main_t *im;
 
     im = &vnet_get_main ()->interface_main;
@@ -274,13 +274,13 @@ interface_dpo_inline (vlib_main_t * vm,
 
             vlib_increment_combined_counter (im->combined_sw_if_counters
                                              + VNET_INTERFACE_COUNTER_RX,
-                                             cpu_index,
+                                             thread_index,
                                              ido0->ido_sw_if_index,
                                              1,
                                              vlib_buffer_length_in_chain (vm, b0));
             vlib_increment_combined_counter (im->combined_sw_if_counters
                                              + VNET_INTERFACE_COUNTER_RX,
-                                             cpu_index,
+                                             thread_index,
                                              ido1->ido_sw_if_index,
                                              1,
                                              vlib_buffer_length_in_chain (vm, b1));
@@ -331,7 +331,7 @@ interface_dpo_inline (vlib_main_t * vm,
             /* Bump the interface's RX coutners */
             vlib_increment_combined_counter (im->combined_sw_if_counters
                                              + VNET_INTERFACE_COUNTER_RX,
-                                             cpu_index,
+                                             thread_index,
                                              ido0->ido_sw_if_index,
                                              1,
                                              vlib_buffer_length_in_chain (vm, b0));
diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
index d5f3a28a..7db1c9bb 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
@@ -302,7 +302,7 @@ lisp_gpe_increment_stats_counters (lisp_cp_main_t * lcm, ip_adjacency_t * adj,
 
   /* compute payload length starting after GPE */
   u32 bytes = b->current_length - (lisp_data - b->data - b->current_data);
-  vlib_increment_combined_counter (&lgm->counters, os_get_cpu_number (),
+  vlib_increment_combined_counter (&lgm->counters, vlib_get_thread_index (),
 				   p[0], 1, bytes);
 }
 
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
index d8b97b5f..51fadeb8 100644
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -96,12 +96,12 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
   clib_bihash_bucket_t working_bucket __attribute__ ((aligned (8)));
   void *oldheap;
   BVT (clib_bihash_value) * working_copy;
-  u32 cpu_number = os_get_cpu_number ();
+  u32 thread_index = os_get_thread_index ();
 
-  if (cpu_number >= vec_len (h->working_copies))
+  if (thread_index >= vec_len (h->working_copies))
     {
       oldheap = clib_mem_set_heap (h->mheap);
-      vec_validate (h->working_copies, cpu_number);
+      vec_validate (h->working_copies, thread_index);
       clib_mem_set_heap (oldheap);
     }
 
@@ -110,7 +110,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
    * updates from multiple threads will not result in sporadic, spurious
    * lookup failures.
    */
-  working_copy = h->working_copies[cpu_number];
+  working_copy = h->working_copies[thread_index];
 
   h->saved_bucket.as_u64 = b->as_u64;
   oldheap = clib_mem_set_heap (h->mheap);
@@ -119,7 +119,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
     {
       vec_validate_aligned (working_copy, (1 << b->log2_pages) - 1,
 			    sizeof (u64));
-      h->working_copies[cpu_number] = working_copy;
+      h->working_copies[thread_index] = working_copy;
     }
 
   _vec_len (working_copy) = 1 << b->log2_pages;
@@ -132,7 +132,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
   working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy);
   CLIB_MEMORY_BARRIER ();
   b->as_u64 = working_bucket.as_u64;
-  h->working_copies[cpu_number] = working_copy;
+  h->working_copies[thread_index] = working_copy;
 }
 
 static
@@ -233,7 +233,7 @@ int BV (clib_bihash_add_del)
   int i, limit;
   u64 hash, new_hash;
   u32 new_log2_pages;
-  u32 cpu_number = os_get_cpu_number ();
+  u32 thread_index = os_get_thread_index ();
   int mark_bucket_linear;
   int resplit_once;
 
@@ -323,7 +323,7 @@ int BV (clib_bihash_add_del)
   new_log2_pages = h->saved_bucket.log2_pages + 1;
   mark_bucket_linear = 0;
 
-  working_copy = h->working_copies[cpu_number];
+  working_copy = h->working_copies[thread_index];
   resplit_once = 0;
 
   new_v = BV (split_and_rehash) (h, working_copy, new_log2_pages);
diff --git a/src/vppinfra/lock.h b/src/vppinfra/lock.h
index c60ff414..0cd2b4fe 100644
--- a/src/vppinfra/lock.h
+++ b/src/vppinfra/lock.h
@@ -24,7 +24,7 @@ typedef struct
   u32 lock;
 #if CLIB_DEBUG > 0
   pid_t pid;
-  uword cpu_index;
+  uword thread_index;
   void *frame_address;
 #endif
 } *clib_spinlock_t;
@@ -57,7 +57,7 @@ clib_spinlock_lock (clib_spinlock_t * p)
 #if CLIB_DEBUG > 0
   (*p)->frame_address = __builtin_frame_address (0);
   (*p)->pid = getpid ();
-  (*p)->cpu_index = os_get_cpu_number ();
+  (*p)->thread_index = os_get_thread_index ();
 #endif
 }
 
@@ -75,7 +75,7 @@ clib_spinlock_unlock (clib_spinlock_t * p)
 #if CLIB_DEBUG > 0
   (*p)->frame_address = 0;
   (*p)->pid = 0;
-  (*p)->cpu_index = 0;
+  (*p)->thread_index = 0;
 #endif
 }
 
diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h
index 1260eab2..63c5ac16 100644
--- a/src/vppinfra/mem.h
+++ b/src/vppinfra/mem.h
@@ -54,14 +54,14 @@ extern void *clib_per_cpu_mheaps[CLIB_MAX_MHEAPS];
 always_inline void *
 clib_mem_get_per_cpu_heap (void)
 {
-  int cpu = os_get_cpu_number ();
+  int cpu = os_get_thread_index ();
   return clib_per_cpu_mheaps[cpu];
 }
 
 always_inline void *
 clib_mem_set_per_cpu_heap (u8 * new_heap)
 {
-  int cpu = os_get_cpu_number ();
+  int cpu = os_get_thread_index ();
   void *old = clib_per_cpu_mheaps[cpu];
   clib_per_cpu_mheaps[cpu] = new_heap;
   return old;
@@ -83,7 +83,7 @@ clib_mem_alloc_aligned_at_offset (uword size, uword align, uword align_offset,
 	align_offset = align;
     }
 
-  cpu = os_get_cpu_number ();
+  cpu = os_get_thread_index ();
   heap = clib_per_cpu_mheaps[cpu];
   heap = mheap_get_aligned (heap, size, align, align_offset, &offset);
   clib_per_cpu_mheaps[cpu] = heap;
diff --git a/src/vppinfra/mhash.c b/src/vppinfra/mhash.c
index c917e164..00b67c49 100644
--- a/src/vppinfra/mhash.c
+++ b/src/vppinfra/mhash.c
@@ -226,7 +226,7 @@ static uword
 mhash_set_tmp_key (mhash_t * h, const void *key)
 {
   u8 *key_tmp;
-  int my_cpu = os_get_cpu_number ();
+  int my_cpu = os_get_thread_index ();
 
   vec_validate (h->key_tmps, my_cpu);
   key_tmp = h->key_tmps[my_cpu];
diff --git a/src/vppinfra/mhash.h b/src/vppinfra/mhash.h
index 102adf4e..7eb19183 100644
--- a/src/vppinfra/mhash.h
+++ b/src/vppinfra/mhash.h
@@ -93,7 +93,7 @@ mhash_key_to_mem (mhash_t * h, uword key)
     {
       u8 *key_tmp;
 
-      int my_cpu = os_get_cpu_number ();
+      int my_cpu = os_get_thread_index ();
       vec_validate (h->key_tmps, my_cpu);
       key_tmp = h->key_tmps[my_cpu];
       return key_tmp;
diff --git a/src/vppinfra/mheap.c b/src/vppinfra/mheap.c
index 192732db..d4010ceb 100644
--- a/src/vppinfra/mheap.c
+++ b/src/vppinfra/mheap.c
@@ -56,7 +56,7 @@ mheap_maybe_lock (void *v)
   mheap_t *h = mheap_header (v);
   if (v && (h->flags & MHEAP_FLAG_THREAD_SAFE))
     {
-      u32 my_cpu = os_get_cpu_number ();
+      u32 my_cpu = os_get_thread_index ();
       if (h->owner_cpu == my_cpu)
 	{
 	  h->recursion_count++;
@@ -77,7 +77,7 @@ mheap_maybe_unlock (void *v)
   mheap_t *h = mheap_header (v);
   if (v && h->flags & MHEAP_FLAG_THREAD_SAFE)
     {
-      ASSERT (os_get_cpu_number () == h->owner_cpu);
+      ASSERT (os_get_thread_index () == h->owner_cpu);
       if (--h->recursion_count == 0)
 	{
 	  h->owner_cpu = ~0;
diff --git a/src/vppinfra/os.h b/src/vppinfra/os.h
index a5c74f8c..33300716 100644
--- a/src/vppinfra/os.h
+++ b/src/vppinfra/os.h
@@ -56,8 +56,24 @@ void os_out_of_memory (void);
 /* Estimate, measure or divine CPU timestamp clock frequency. */
 f64 os_cpu_clock_frequency (void);
 
-uword os_get_cpu_number (void);
-uword os_get_ncpus (void);
+extern __thread uword __os_thread_index;
+
+static_always_inline uword
+os_get_thread_index (void)
+{
+  return __os_thread_index;
+}
+
+static_always_inline uword
+os_get_cpu_number (void) __attribute__ ((deprecated));
+
+static_always_inline uword
+os_get_cpu_number (void)
+{
+  return __os_thread_index;
+}
+
+uword os_get_nthreads (void);
 
 #include <vppinfra/smp.h>
 
diff --git a/src/vppinfra/smp.c b/src/vppinfra/smp.c
index 8ac19960..f603283e 100644
--- a/src/vppinfra/smp.c
+++ b/src/vppinfra/smp.c
@@ -53,7 +53,7 @@ allocate_per_cpu_mheap (uword cpu)
   void *heap;
   uword vm_size, stack_size, mheap_flags;
 
-  ASSERT (os_get_cpu_number () == cpu);
+  ASSERT (os_get_thread_index () == cpu);
 
   vm_size = (uword) 1 << m->log2_n_per_cpu_vm_bytes;
   stack_size = (uword) 1 << m->log2_n_per_cpu_stack_bytes;
diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c
index 2928369d..361015b4 100644
--- a/src/vppinfra/unix-misc.c
+++ b/src/vppinfra/unix-misc.c
@@ -45,6 +45,8 @@
 #include <fcntl.h>
 #include <stdio.h>		/* for sprintf */
 
+__thread uword __os_thread_index = 0;
+
 clib_error_t *
 unix_file_n_bytes (char *file, uword * result)
 {
@@ -188,14 +190,14 @@ void os_puts (u8 * string, uword string_length, uword is_error)
 void
 os_puts (u8 * string, uword string_length, uword is_error)
 {
-  int cpu = os_get_cpu_number ();
-  int ncpus = os_get_ncpus ();
+  int cpu = os_get_thread_index ();
+  int nthreads = os_get_nthreads ();
   char buf[64];
   int fd = is_error ? 2 : 1;
   struct iovec iovs[2];
   int n_iovs = 0;
 
-  if (ncpus > 1)
+  if (nthreads > 1)
     {
       snprintf (buf, sizeof (buf), "%d: ", cpu);
 
@@ -219,16 +221,9 @@ os_out_of_memory (void)
   os_panic ();
 }
 
-uword os_get_cpu_number (void) __attribute__ ((weak));
-uword
-os_get_cpu_number (void)
-{
-  return 0;
-}
-
-uword os_get_ncpus (void) __attribute__ ((weak));
+uword os_get_nthreads (void) __attribute__ ((weak));
 uword
-os_get_ncpus (void)
+os_get_nthreads (void)
 {
   return 1;
 }
-- 
cgit 1.2.3-korg


From 8c4611b39162da9753caaf654741faa115eaf612 Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Tue, 23 May 2017 03:43:47 -0700
Subject: Labelled attached paths via an MPLS tunnel

Change-Id: Ic86617c9c3217122043656ce2ea70bb106df5b2d
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet/adj/adj.c          |  1 +
 src/vnet/dpo/dpo.c          |  3 ++
 src/vnet/fib/fib_path.c     | 70 ++++++++++++++++++++++++++++-----------------
 src/vnet/fib/fib_walk.c     | 31 +++++++++++---------
 src/vnet/mpls/mpls_tunnel.c |  4 ++-
 test/test_mpls.py           | 27 +++++++++++++++--
 6 files changed, 94 insertions(+), 42 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index bf44383f..f8496913 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -64,6 +64,7 @@ adj_alloc (fib_protocol_t proto)
     adj->ia_nh_proto = proto;
     adj->ia_flags = 0;
     adj->rewrite_header.sw_if_index = ~0;
+    adj->rewrite_header.flags = 0;
     adj->lookup_next_index = 0;
     adj->ia_delegates = NULL;
 
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index dfc2bd92..28aa0c23 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -189,6 +189,9 @@ dpo_set (dpo_id_t *dpo,
 	    break;
 	case IP_LOOKUP_NEXT_MCAST:
 	    dpo->dpoi_type = DPO_ADJACENCY_MCAST;
+            break;
+	case IP_LOOKUP_NEXT_GLEAN:
+	    dpo->dpoi_type = DPO_ADJACENCY_GLEAN;
 	    break;
 	default:
 	    break;
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 255f0dd1..274b0ef4 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -588,6 +588,30 @@ fib_path_attached_next_hop_set (fib_path_t *path)
     }
 }
 
+static const adj_index_t
+fib_path_attached_get_adj (fib_path_t *path,
+                           vnet_link_t link)
+{
+    if (vnet_sw_interface_is_p2p(vnet_get_main(),
+                                 path->attached.fp_interface))
+    {
+        /*
+         * point-2-point interfaces do not require a glean, since
+         * there is nothing to ARP. Install a rewrite/nbr adj instead
+         */
+        return (adj_nbr_add_or_lock(path->fp_nh_proto,
+                                    link,
+                                    &zero_addr,
+                                    path->attached.fp_interface));
+    }
+    else
+    {
+        return (adj_glean_add_or_lock(path->fp_nh_proto,
+                                      path->attached.fp_interface,
+                                      NULL));
+    }
+}
+
 /*
  * create of update the paths recursive adj
  */
@@ -1559,31 +1583,12 @@ fib_path_resolve (fib_node_index_t path_index)
 	{
 	    path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
 	}
-	if (vnet_sw_interface_is_p2p(vnet_get_main(),
-				     path->attached.fp_interface))
-	{
-	    /*
-	     * point-2-point interfaces do not require a glean, since
-	     * there is nothing to ARP. Install a rewrite/nbr adj instead
-	     */
-	    dpo_set(&path->fp_dpo,
-		    DPO_ADJACENCY,
-		    fib_proto_to_dpo(path->fp_nh_proto),
-		    adj_nbr_add_or_lock(
-			path->fp_nh_proto,
-			fib_proto_to_link(path->fp_nh_proto),
-			&zero_addr,
-			path->attached.fp_interface));
-	}
-	else
-	{
-	    dpo_set(&path->fp_dpo,
-		    DPO_ADJACENCY_GLEAN,
-		    fib_proto_to_dpo(path->fp_nh_proto),
-		    adj_glean_add_or_lock(path->fp_nh_proto,
-					  path->attached.fp_interface,
-					  NULL));
-	}
+        dpo_set(&path->fp_dpo,
+                DPO_ADJACENCY,
+                fib_proto_to_dpo(path->fp_nh_proto),
+                fib_path_attached_get_adj(path,
+                                          fib_proto_to_link(path->fp_nh_proto)));
+
 	/*
 	 * become a child of the adjacency so we receive updates
 	 * when the interface state changes
@@ -1969,7 +1974,20 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
 	    case FIB_FORW_CHAIN_TYPE_ETHERNET:
 	    case FIB_FORW_CHAIN_TYPE_NSH:
-                break;
+                {
+                    adj_index_t ai;
+
+                    /*
+                     * get a appropriate link type adj.
+                     */
+                    ai = fib_path_attached_get_adj(
+                            path,
+                            fib_forw_chain_type_to_link_type(fct));
+                    dpo_set(dpo, DPO_ADJACENCY,
+                            fib_forw_chain_type_to_dpo_proto(fct), ai);
+                    adj_unlock(ai);
+                    break;
+                }
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
 	    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
                 {
diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c
index c570476d..70180137 100644
--- a/src/vnet/fib/fib_walk.c
+++ b/src/vnet/fib/fib_walk.c
@@ -322,10 +322,10 @@ typedef enum fib_walk_advance_rc_t_
 static fib_walk_advance_rc_t
 fib_walk_advance (fib_node_index_t fwi)
 {
-    fib_node_back_walk_ctx_t *ctx, *old;
     fib_node_back_walk_rc_t wrc;
     fib_node_ptr_t sibling;
     fib_walk_t *fwalk;
+    uint n_ctxs, ii;
     int more_elts;
 
     /*
@@ -339,12 +339,20 @@ fib_walk_advance (fib_node_index_t fwi)
 
     if (more_elts)
     {
-        old = fwalk->fw_ctx;
 
-	vec_foreach(ctx, fwalk->fw_ctx)
-	{
-	    wrc = fib_node_back_walk_one(&sibling, ctx);
+        /*
+         * loop through the backwalk contexts. This can grow in length
+         * as walks on the same object meet each other. Order is preserved so the
+         * most recently started walk as at the back of the vector.
+         */
+        ii = 0;
+        n_ctxs = vec_len(fwalk->fw_ctx);
+
+        while (ii < n_ctxs)
+        {
+	    wrc = fib_node_back_walk_one(&sibling, &fwalk->fw_ctx[ii]);
 
+            ii++;
 	    fwalk = fib_walk_get(fwi);
 	    fwalk->fw_n_visits++;
 
@@ -356,14 +364,11 @@ fib_walk_advance (fib_node_index_t fwi)
 		 */
 		return (FIB_WALK_ADVANCE_MERGE);
 	    }
-            if (old != fwalk->fw_ctx)
-            {
-                /*
-                 * nasty re-entrant addition of a walk has realloc'd the vector
-                 * break out
-                 */
-		return (FIB_WALK_ADVANCE_MERGE);
-	    }
+
+            /*
+             * re-evaluate the number of backwalk contexts we need to process.
+             */
+            n_ctxs = vec_len(fwalk->fw_ctx);
 	}
 	/*
 	 * move foward to the next node to visit
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
index d6e85e70..776b23ba 100644
--- a/src/vnet/mpls/mpls_tunnel.c
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -273,7 +273,9 @@ mpls_tunnel_stack (adj_index_t ai)
 
         mpls_tunnel_mk_lb(mt,
                           adj->ia_link,
-                          FIB_FORW_CHAIN_TYPE_MPLS_EOS,
+                          (VNET_LINK_MPLS == adj_get_link_type(ai) ?
+                           FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+                           FIB_FORW_CHAIN_TYPE_MPLS_EOS),
                           &dpo);
 
         adj_nbr_midchain_stack(ai, &dpo);
diff --git a/test/test_mpls.py b/test/test_mpls.py
index 0ad1ee69..d0c9e249 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -203,7 +203,10 @@ class TestMPLS(VppTestCase):
         except:
             raise
 
-    def verify_capture_tunneled_ip4(self, src_if, capture, sent, mpls_labels):
+    def verify_capture_tunneled_ip4(self, src_if, capture, sent, mpls_labels,
+                                    ttl=255, top=None):
+        if top is None:
+            top = len(mpls_labels) - 1
         try:
             capture = self.verify_filter(capture, sent)
 
@@ -217,7 +220,7 @@ class TestMPLS(VppTestCase):
 
                 # the MPLS TTL is 255 since it enters a new tunnel
                 self.verify_mpls_stack(
-                    rx, mpls_labels, 255, len(mpls_labels) - 1)
+                    rx, mpls_labels, ttl, top)
 
                 self.assertEqual(rx_ip.src, tx_ip.src)
                 self.assertEqual(rx_ip.dst, tx_ip.dst)
@@ -617,6 +620,26 @@ class TestMPLS(VppTestCase):
         rx = self.pg0.get_capture()
         self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [44, 46])
 
+        #
+        # add a labelled route through the new tunnel
+        #
+        route_10_0_0_4 = VppIpRoute(self, "10.0.0.4", 32,
+                                    [VppRoutePath("0.0.0.0",
+                                                  mpls_tun._sw_if_index,
+                                                  labels=[33])])
+        route_10_0_0_4.add_vpp_config()
+
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip4(self.pg0, "10.0.0.4")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg0.get_capture()
+        self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [44, 46, 33],
+                                         ttl=63, top=2)
+
     def test_v4_exp_null(self):
         """ MPLS V4 Explicit NULL test """
 
-- 
cgit 1.2.3-korg


From 31426c6fee43caf18602a2a7e2ce32c3dda75a29 Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Wed, 24 May 2017 10:32:58 -0700
Subject: Missing VLIB node for IPv6 disposition from mcast MPLS LSP

Change-Id: Ibc0e1910a4926fdfbf74571efb5fd5810bfa09da
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet/dpo/lookup_dpo.c | 24 ++++++++++++++-
 test/test_mpls.py         | 77 ++++++++++++++++++++++++++++++++++++++++++++---
 test/vpp_ip_route.py      |  9 +++---
 3 files changed, 101 insertions(+), 9 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index e5b00a79..cf489d7e 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -1206,7 +1206,7 @@ lookup_dpo_ip_dst_mcast_inline (vlib_main_t * vm,
 
             vnet_buffer (b0)->ip.adj_index[VLIB_TX] = mfei0;
 
-           vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+            vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
                                             n_left_to_next, bi0, next0);
         }
         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -1236,6 +1236,28 @@ VLIB_REGISTER_NODE (lookup_ip4_dst_mcast_node) = {
 VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_mcast_node,
                               lookup_ip4_dst_mcast)
 
+always_inline uword
+lookup_ip6_dst_mcast (vlib_main_t * vm,
+                      vlib_node_runtime_t * node,
+                      vlib_frame_t * from_frame)
+{
+    return (lookup_dpo_ip_dst_mcast_inline(vm, node, from_frame, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_dst_mcast_node) = {
+    .function = lookup_ip6_dst_mcast,
+    .name = "lookup-ip6-dst-mcast",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_lookup_trace,
+    .n_next_nodes = LOOKUP_IP_DST_MCAST_N_NEXT,
+    .next_nodes = {
+        [LOOKUP_IP_DST_MCAST_NEXT_RPF] = "ip6-mfib-forward-rpf",
+    },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_mcast_node,
+                              lookup_ip6_dst_mcast)
+
 static void
 lookup_dpo_mem_show (void)
 {
diff --git a/test/test_mpls.py b/test/test_mpls.py
index d0c9e249..77cec429 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -102,7 +102,10 @@ class TestMPLS(VppTestCase):
             pkts.append(p)
         return pkts
 
-    def create_stream_labelled_ip6(self, src_if, mpls_label, mpls_ttl):
+    def create_stream_labelled_ip6(self, src_if, mpls_label, mpls_ttl,
+                                   dst_ip=None):
+        if dst_ip is None:
+            dst_ip = src_if.remote_ip6
         self.reset_packet_infos()
         pkts = []
         for i in range(0, 257):
@@ -110,7 +113,7 @@ class TestMPLS(VppTestCase):
             payload = self.info_to_payload(info)
             p = (Ether(dst=src_if.local_mac, src=src_if.remote_mac) /
                  MPLS(label=mpls_label, ttl=mpls_ttl) /
-                 IPv6(src=src_if.remote_ip6, dst=src_if.remote_ip6) /
+                 IPv6(src=src_if.remote_ip6, dst=dst_ip) /
                  UDP(sport=1234, dport=1234) /
                  Raw(payload))
             info.data = p.copy()
@@ -928,8 +931,8 @@ class TestMPLS(VppTestCase):
         rx = self.pg3.get_capture(257)
         self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [43])
 
-    def test_mcast_tail(self):
-        """ MPLS Multicast Tail """
+    def test_mcast_ip4_tail(self):
+        """ MPLS IPv4 Multicast Tail """
 
         #
         # Add a multicast route that will forward the traffic
@@ -994,6 +997,72 @@ class TestMPLS(VppTestCase):
                                              dst_ip="232.1.1.1")
         self.send_and_assert_no_replies(self.pg0, tx, "RPF-ID drop 56")
 
+    def test_mcast_ip6_tail(self):
+        """ MPLS IPv6 Multicast Tail """
+
+        #
+        # Add a multicast route that will forward the traffic
+        # post-disposition
+        #
+        route_ff = VppIpMRoute(
+            self,
+            "::",
+            "ff01::1", 32,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            table_id=1,
+            paths=[VppMRoutePath(self.pg1.sw_if_index,
+                                 MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)],
+            is_ip6=1)
+        route_ff.add_vpp_config()
+
+        #
+        # An interface receive label that maps traffic to RX on interface
+        # pg1
+        # by injecting the packet in on pg0, which is in table 0
+        # doing an rpf-id  and matching a route in table 1
+        # if the packet egresses, then we must have matched the route in
+        # table 1
+        #
+        route_34_eos = VppMplsRoute(
+            self, 34, 1,
+            [VppRoutePath("::",
+                          self.pg1.sw_if_index,
+                          nh_table_id=1,
+                          rpf_id=55,
+                          is_ip6=1)],
+            is_multicast=1)
+
+        route_34_eos.add_vpp_config()
+
+        #
+        # Drop due to interface lookup miss
+        #
+        tx = self.create_stream_labelled_ip6(self.pg0, [34], 255,
+                                             dst_ip="ff01::1")
+
+        #
+        # set the RPF-ID of the enrtry to match the input packet's
+        #
+        route_ff.update_rpf_id(55)
+
+        tx = self.create_stream_labelled_ip6(self.pg0, [34], 255,
+                                             dst_ip="ff01::1")
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg1.get_capture(257)
+        self.verify_capture_ip6(self.pg1, rx, tx)
+
+        #
+        # set the RPF-ID of the enrtry to not match the input packet's
+        #
+        route_ff.update_rpf_id(56)
+        tx = self.create_stream_labelled_ip6(self.pg0, [34], 225,
+                                             dst_ip="ff01::1")
+        self.send_and_assert_no_replies(self.pg0, tx, "RPF-ID drop 56")
+
 
 class TestMPLSDisabled(VppTestCase):
     """ MPLS disabled """
diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py
index b68e2105..badb3102 100644
--- a/test/vpp_ip_route.py
+++ b/test/vpp_ip_route.py
@@ -66,10 +66,11 @@ class VppRoutePath(object):
         self.nh_labels = labels
         self.weight = 1
         self.rpf_id = rpf_id
-        if is_ip6:
-            self.nh_addr = inet_pton(AF_INET6, nh_addr)
-        else:
+        self.is_ip4 = 1 if is_ip6 == 0 else 0
+        if self.is_ip4:
             self.nh_addr = inet_pton(AF_INET, nh_addr)
+        else:
+            self.nh_addr = inet_pton(AF_INET6, nh_addr)
         self.is_resolve_host = is_resolve_host
         self.is_resolve_attached = is_resolve_attached
         self.is_interface_rx = is_interface_rx
@@ -400,7 +401,7 @@ class VppMplsRoute(VppObject):
             self._test.vapi.mpls_route_add_del(
                 self.local_label,
                 self.eos_bit,
-                1,
+                path.is_ip4,
                 path.nh_addr,
                 path.nh_itf,
                 is_multicast=self.is_multicast,
-- 
cgit 1.2.3-korg


From 71275e3d1ed4b7a536b7ec8d13995743beccde6b Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Thu, 25 May 2017 12:38:58 -0700
Subject: MPLS hash function improvements

Change-Id: I28e98f445c01493562b6196a4f5b532a51f178af
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet/dpo/mpls_label_dpo.c          |   2 +-
 src/vnet/mpls/mpls_lookup.c            |  68 +++++++++++++++++--
 src/vnet/mpls/mpls_types.h             |   1 +
 test/patches/scapy-2.3.3/mpls.py.patch |   5 ++
 test/test_ip4.py                       |  61 ++++++++++++-----
 test/test_ip6.py                       | 117 ++++++++++++++++++++++++++-------
 6 files changed, 206 insertions(+), 48 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index 18479531..1c451a51 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -356,7 +356,7 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                 }
                 if (PREDICT_TRUE(vnet_buffer(b2)->mpls.first))
                 {
-                    ASSERT(2 != vnet_buffer (b2)->mpls.ttl);
+                    ASSERT(1 != vnet_buffer (b2)->mpls.ttl);
 
                     ttl2 = vnet_buffer(b2)->mpls.ttl - 1;
                 }
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 322e0db0..42e5399c 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -65,14 +65,70 @@ mpls_compute_flow_hash (const mpls_unicast_header_t * hdr,
                         flow_hash_config_t flow_hash_config)
 {
     /*
-     * improve this to include:
-     *  - all labels in the stack.
-     *  - recognise entropy labels.
-     *
      * We need to byte swap so we use the numerical value. i.e. an odd label
-     * leads to an odd bucket. ass opposed to a label above and below value X.
+     * leads to an odd bucket. as opposed to a label above and below value X.
      */
-    return (vnet_mpls_uc_get_label(clib_net_to_host_u32(hdr->label_exp_s_ttl)));
+    u8 next_label_is_entropy;
+    mpls_label_t ho_label;
+    u32 hash, value;
+
+    ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+    hash = vnet_mpls_uc_get_label(ho_label);
+    next_label_is_entropy = 0;
+
+    while (MPLS_EOS != vnet_mpls_uc_get_s(ho_label))
+    {
+        hdr++;
+        ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+        value = vnet_mpls_uc_get_label(ho_label);
+
+        if (1 == next_label_is_entropy)
+        {
+            /*
+             * The label is an entropy value, use it alone as the hash
+             */
+            return (ho_label);
+        }
+        if (MPLS_IETF_ENTROPY_LABEL == value)
+        {
+            /*
+             * we've met a label in the stack indicating that tha next
+             * label is an entropy value
+             */
+            next_label_is_entropy = 1;
+        }
+        else
+        {
+            /*
+             * XOR the label values in the stack together to
+             * build up the hash value
+             */
+            hash ^= value;
+        }
+    }
+
+    /*
+     * check the top nibble for v4 and v6
+     */
+    hdr++;
+
+    switch (((u8*)hdr)[0] >> 4)
+    {
+    case 4:
+        /* incorporate the v4 flow-hash */
+        hash ^= ip4_compute_flow_hash ((const ip4_header_t *)hdr,
+                                       IP_FLOW_HASH_DEFAULT);
+        break;
+    case 6:
+        /* incorporate the v6 flow-hash */
+        hash ^= ip6_compute_flow_hash ((const ip6_header_t *)hdr,
+                                       IP_FLOW_HASH_DEFAULT);
+        break;
+    default:
+        break;
+    }
+
+    return (hash);
 }
 
 static inline uword
diff --git a/src/vnet/mpls/mpls_types.h b/src/vnet/mpls/mpls_types.h
index b1075cdd..f1c3191e 100644
--- a/src/vnet/mpls/mpls_types.h
+++ b/src/vnet/mpls/mpls_types.h
@@ -30,6 +30,7 @@
 #define MPLS_IETF_IMPLICIT_NULL_LABEL        0x00003
 #define MPLS_IETF_ELI_LABEL                  0x00007
 #define MPLS_IETF_GAL_LABEL                  0x0000D
+#define MPLS_IETF_ENTROPY_LABEL              0x0000E
 
 #define MPLS_IETF_IPV4_EXPLICIT_NULL_STRING          "ip4-explicit-null"
 #define MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING    "e-nul"
diff --git a/test/patches/scapy-2.3.3/mpls.py.patch b/test/patches/scapy-2.3.3/mpls.py.patch
index 5c819110..f63a70a3 100644
--- a/test/patches/scapy-2.3.3/mpls.py.patch
+++ b/test/patches/scapy-2.3.3/mpls.py.patch
@@ -11,3 +11,8 @@ index 640a0c5..6af1d4a 100644
             ip_version = (ord(payload[0]) >> 4) & 0xF
             if ip_version == 4:
                 return IP
+@@ -27,3 +29,4 @@ class MPLS(Packet):
+ 
+ bind_layers(Ether, MPLS, type=0x8847)
+ bind_layers(GRE, MPLS, proto=0x8847)
++bind_layers(MPLS, MPLS, s=0)
diff --git a/test/test_ip4.py b/test/test_ip4.py
index 3fe61e26..ddfd2187 100644
--- a/test/test_ip4.py
+++ b/test/test_ip4.py
@@ -6,12 +6,13 @@ import unittest
 from framework import VppTestCase, VppTestRunner
 from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint
 from vpp_ip_route import VppIpRoute, VppRoutePath, VppIpMRoute, \
-    VppMRoutePath, MRouteItfFlags, MRouteEntryFlags
+    VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind
 
 from scapy.packet import Raw
 from scapy.layers.l2 import Ether, Dot1Q, ARP
 from scapy.layers.inet import IP, UDP, ICMP, icmptypes, icmpcodes
 from util import ppp
+from scapy.contrib.mpls import MPLS
 
 
 class TestIPv4(VppTestCase):
@@ -778,10 +779,12 @@ class TestIPLoadBalance(VppTestCase):
             i.admin_up()
             i.config_ip4()
             i.resolve_arp()
+            i.enable_mpls()
 
     def tearDown(self):
         super(TestIPLoadBalance, self).tearDown()
         for i in self.pg_interfaces:
+            i.disable_mpls()
             i.unconfig_ip4()
             i.admin_down()
 
@@ -799,24 +802,37 @@ class TestIPLoadBalance(VppTestCase):
         #
         # An array of packets that differ only in the destination port
         #
-        port_pkts = []
+        port_ip_pkts = []
+        port_mpls_pkts = []
 
         #
         # An array of packets that differ only in the source address
         #
-        src_pkts = []
+        src_ip_pkts = []
+        src_mpls_pkts = []
 
         for ii in range(65):
-            port_pkts.append((Ether(src=self.pg0.remote_mac,
-                                    dst=self.pg0.local_mac) /
-                              IP(dst="10.0.0.1", src="20.0.0.1") /
-                              UDP(sport=1234, dport=1234 + ii) /
-                              Raw('\xa5' * 100)))
-            src_pkts.append((Ether(src=self.pg0.remote_mac,
-                                   dst=self.pg0.local_mac) /
-                             IP(dst="10.0.0.1", src="20.0.0.%d" % ii) /
-                             UDP(sport=1234, dport=1234) /
-                             Raw('\xa5' * 100)))
+            port_ip_hdr = (IP(dst="10.0.0.1", src="20.0.0.1") /
+                           UDP(sport=1234, dport=1234 + ii) /
+                           Raw('\xa5' * 100))
+            port_ip_pkts.append((Ether(src=self.pg0.remote_mac,
+                                       dst=self.pg0.local_mac) /
+                                 port_ip_hdr))
+            port_mpls_pkts.append((Ether(src=self.pg0.remote_mac,
+                                         dst=self.pg0.local_mac) /
+                                   MPLS(label=66, ttl=2) /
+                                   port_ip_hdr))
+
+            src_ip_hdr = (IP(dst="10.0.0.1", src="20.0.0.%d" % ii) /
+                          UDP(sport=1234, dport=1234) /
+                          Raw('\xa5' * 100))
+            src_ip_pkts.append((Ether(src=self.pg0.remote_mac,
+                                      dst=self.pg0.local_mac) /
+                                src_ip_hdr))
+            src_mpls_pkts.append((Ether(src=self.pg0.remote_mac,
+                                        dst=self.pg0.local_mac) /
+                                  MPLS(label=66, ttl=2) /
+                                  src_ip_hdr))
 
         route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32,
                                     [VppRoutePath(self.pg1.remote_ip4,
@@ -825,6 +841,9 @@ class TestIPLoadBalance(VppTestCase):
                                                   self.pg2.sw_if_index)])
         route_10_0_0_1.add_vpp_config()
 
+        binding = VppMplsIpBind(self, 66, "10.0.0.1", 32)
+        binding.add_vpp_config()
+
         #
         # inject the packet on pg0 - expect load-balancing across the 2 paths
         #  - since the default hash config is to use IP src,dst and port
@@ -834,9 +853,13 @@ class TestIPLoadBalance(VppTestCase):
         # be guaranteed. But wuth 64 different packets we do expect some
         # balancing. So instead just ensure there is traffic on each link.
         #
-        self.send_and_expect_load_balancing(self.pg0, port_pkts,
+        self.send_and_expect_load_balancing(self.pg0, port_ip_pkts,
                                             [self.pg1, self.pg2])
-        self.send_and_expect_load_balancing(self.pg0, src_pkts,
+        self.send_and_expect_load_balancing(self.pg0, src_ip_pkts,
+                                            [self.pg1, self.pg2])
+        self.send_and_expect_load_balancing(self.pg0, port_mpls_pkts,
+                                            [self.pg1, self.pg2])
+        self.send_and_expect_load_balancing(self.pg0, src_mpls_pkts,
                                             [self.pg1, self.pg2])
 
         #
@@ -846,14 +869,16 @@ class TestIPLoadBalance(VppTestCase):
         #
         self.vapi.set_ip_flow_hash(0, src=1, dst=1, sport=0, dport=0)
 
-        self.send_and_expect_load_balancing(self.pg0, src_pkts,
+        self.send_and_expect_load_balancing(self.pg0, src_ip_pkts,
+                                            [self.pg1, self.pg2])
+        self.send_and_expect_load_balancing(self.pg0, src_mpls_pkts,
                                             [self.pg1, self.pg2])
 
-        self.pg0.add_stream(port_pkts)
+        self.pg0.add_stream(port_ip_pkts)
         self.pg_enable_capture(self.pg_interfaces)
         self.pg_start()
 
-        rx = self.pg2.get_capture(len(port_pkts))
+        rx = self.pg2.get_capture(len(port_ip_pkts))
 
         #
         # change the flow hash config back to defaults
diff --git a/test/test_ip6.py b/test/test_ip6.py
index ebeffe20..700b3344 100644
--- a/test/test_ip6.py
+++ b/test/test_ip6.py
@@ -7,7 +7,8 @@ from framework import VppTestCase, VppTestRunner
 from vpp_sub_interface import VppSubInterface, VppDot1QSubint
 from vpp_pg_interface import is_ipv6_misc
 from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, VppIpMRoute, \
-    VppMRoutePath, MRouteItfFlags, MRouteEntryFlags
+    VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \
+    VppMplsRoute
 from vpp_neighbor import find_nbr, VppNeighbor
 
 from scapy.packet import Raw
@@ -21,6 +22,7 @@ from util import ppp
 from scapy.utils6 import in6_getnsma, in6_getnsmac, in6_ptop, in6_islladdr, \
     in6_mactoifaceid, in6_ismaddr
 from scapy.utils import inet_pton, inet_ntop
+from scapy.contrib.mpls import MPLS
 
 
 def mk_ll_addr(mac):
@@ -1145,12 +1147,14 @@ class TestIP6LoadBalance(VppTestCase):
             i.admin_up()
             i.config_ip6()
             i.resolve_ndp()
+            i.enable_mpls()
 
     def tearDown(self):
         super(TestIP6LoadBalance, self).tearDown()
         for i in self.pg_interfaces:
             i.unconfig_ip6()
             i.admin_down()
+            i.disable_mpls()
 
     def send_and_expect_load_balancing(self, input, pkts, outputs):
         input.add_stream(pkts)
@@ -1160,31 +1164,69 @@ class TestIP6LoadBalance(VppTestCase):
             rx = oo._get_capture(1)
             self.assertNotEqual(0, len(rx))
 
+    def send_and_expect_one_itf(self, input, pkts, itf):
+        input.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        rx = itf.get_capture(len(pkts))
+
     def test_ip6_load_balance(self):
         """ IPv6 Load-Balancing """
 
         #
         # An array of packets that differ only in the destination port
+        #  - IP only
+        #  - MPLS EOS
+        #  - MPLS non-EOS
+        #  - MPLS non-EOS with an entropy label
         #
-        port_pkts = []
+        port_ip_pkts = []
+        port_mpls_pkts = []
+        port_mpls_neos_pkts = []
+        port_ent_pkts = []
 
         #
         # An array of packets that differ only in the source address
         #
-        src_pkts = []
+        src_ip_pkts = []
+        src_mpls_pkts = []
 
         for ii in range(65):
-            port_pkts.append((Ether(src=self.pg0.remote_mac,
-                                    dst=self.pg0.local_mac) /
-                              IPv6(dst="3000::1", src="3000:1::1") /
-                              UDP(sport=1234, dport=1234 + ii) /
-                              Raw('\xa5' * 100)))
-            src_pkts.append((Ether(src=self.pg0.remote_mac,
-                                   dst=self.pg0.local_mac) /
-                             IPv6(dst="3000::1", src="3000:1::%d" % ii) /
-                             UDP(sport=1234, dport=1234) /
-                             Raw('\xa5' * 100)))
-
+            port_ip_hdr = (IPv6(dst="3000::1", src="3000:1::1") /
+                           UDP(sport=1234, dport=1234 + ii) /
+                           Raw('\xa5' * 100))
+            port_ip_pkts.append((Ether(src=self.pg0.remote_mac,
+                                       dst=self.pg0.local_mac) /
+                                 port_ip_hdr))
+            port_mpls_pkts.append((Ether(src=self.pg0.remote_mac,
+                                         dst=self.pg0.local_mac) /
+                                   MPLS(label=66, ttl=2) /
+                                   port_ip_hdr))
+            port_mpls_neos_pkts.append((Ether(src=self.pg0.remote_mac,
+                                              dst=self.pg0.local_mac) /
+                                        MPLS(label=67, ttl=2) /
+                                        MPLS(label=77, ttl=2) /
+                                        port_ip_hdr))
+            port_ent_pkts.append((Ether(src=self.pg0.remote_mac,
+                                        dst=self.pg0.local_mac) /
+                                  MPLS(label=67, ttl=2) /
+                                  MPLS(label=14, ttl=2) /
+                                  MPLS(label=999, ttl=2) /
+                                  port_ip_hdr))
+            src_ip_hdr = (IPv6(dst="3000::1", src="3000:1::%d" % ii) /
+                          UDP(sport=1234, dport=1234) /
+                          Raw('\xa5' * 100))
+            src_ip_pkts.append((Ether(src=self.pg0.remote_mac,
+                                      dst=self.pg0.local_mac) /
+                                src_ip_hdr))
+            src_mpls_pkts.append((Ether(src=self.pg0.remote_mac,
+                                        dst=self.pg0.local_mac) /
+                                  MPLS(label=66, ttl=2) /
+                                  src_ip_hdr))
+
+        #
+        # A route for the IP pacekts
+        #
         route_3000_1 = VppIpRoute(self, "3000::1", 128,
                                   [VppRoutePath(self.pg1.remote_ip6,
                                                 self.pg1.sw_if_index,
@@ -1195,6 +1237,26 @@ class TestIP6LoadBalance(VppTestCase):
                                   is_ip6=1)
         route_3000_1.add_vpp_config()
 
+        #
+        # a local-label for the EOS packets
+        #
+        binding = VppMplsIpBind(self, 66, "3000::1", 128, is_ip6=1)
+        binding.add_vpp_config()
+
+        #
+        # An MPLS route for the non-EOS packets
+        #
+        route_67 = VppMplsRoute(self, 67, 0,
+                                [VppRoutePath(self.pg1.remote_ip6,
+                                              self.pg1.sw_if_index,
+                                              labels=[67],
+                                              is_ip6=1),
+                                 VppRoutePath(self.pg2.remote_ip6,
+                                              self.pg2.sw_if_index,
+                                              labels=[67],
+                                              is_ip6=1)])
+        route_67.add_vpp_config()
+
         #
         # inject the packet on pg0 - expect load-balancing across the 2 paths
         #  - since the default hash config is to use IP src,dst and port
@@ -1204,11 +1266,23 @@ class TestIP6LoadBalance(VppTestCase):
         # be guaranteed. But wuth 64 different packets we do expect some
         # balancing. So instead just ensure there is traffic on each link.
         #
-        self.send_and_expect_load_balancing(self.pg0, port_pkts,
+        self.send_and_expect_load_balancing(self.pg0, port_ip_pkts,
                                             [self.pg1, self.pg2])
-        self.send_and_expect_load_balancing(self.pg0, src_pkts,
+        self.send_and_expect_load_balancing(self.pg0, src_ip_pkts,
+                                            [self.pg1, self.pg2])
+        self.send_and_expect_load_balancing(self.pg0, port_mpls_pkts,
+                                            [self.pg1, self.pg2])
+        self.send_and_expect_load_balancing(self.pg0, src_mpls_pkts,
+                                            [self.pg1, self.pg2])
+        self.send_and_expect_load_balancing(self.pg0, port_mpls_neos_pkts,
                                             [self.pg1, self.pg2])
 
+        #
+        # The packets with Entropy label in should not load-balance,
+        # since the Entorpy value is fixed.
+        #
+        self.send_and_expect_one_itf(self.pg0, port_ent_pkts, self.pg1)
+
         #
         # change the flow hash config so it's only IP src,dst
         #  - now only the stream with differing source address will
@@ -1216,14 +1290,11 @@ class TestIP6LoadBalance(VppTestCase):
         #
         self.vapi.set_ip_flow_hash(0, is_ip6=1, src=1, dst=1, sport=0, dport=0)
 
-        self.send_and_expect_load_balancing(self.pg0, src_pkts,
+        self.send_and_expect_load_balancing(self.pg0, src_ip_pkts,
                                             [self.pg1, self.pg2])
-
-        self.pg0.add_stream(port_pkts)
-        self.pg_enable_capture(self.pg_interfaces)
-        self.pg_start()
-
-        rx = self.pg2.get_capture(len(port_pkts))
+        self.send_and_expect_load_balancing(self.pg0, src_mpls_pkts,
+                                            [self.pg1, self.pg2])
+        self.send_and_expect_one_itf(self.pg0, port_ip_pkts, self.pg2)
 
         #
         # change the flow hash config back to defaults
-- 
cgit 1.2.3-korg


From 6af1c04f925f0d74fc02789cf8227706ed6a8c2a Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Fri, 26 May 2017 03:48:53 -0700
Subject: MPLS lookup DPO does not pop the label (nor does it handle replicate)

Change-Id: I7de6b96631d1645d0eadd38525860d84d78e316d
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet/dpo/lookup_dpo.c   |  47 +++++++++++++++++---
 src/vnet/ip/ip4_forward.c   |   1 -
 src/vnet/mpls/mpls_lookup.c |  79 +---------------------------------
 src/vnet/mpls/mpls_lookup.h | 102 ++++++++++++++++++++++++++++++++++++++++++++
 test/test_mpls.py           |  20 +++++++++
 5 files changed, 165 insertions(+), 84 deletions(-)
 create mode 100644 src/vnet/mpls/mpls_lookup.h

(limited to 'src/vnet/dpo')

diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index cf489d7e..26363a2f 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -15,8 +15,8 @@
 
 #include <vnet/ip/ip.h>
 #include <vnet/dpo/lookup_dpo.h>
-#include <vnet/dpo/load_balance.h>
-#include <vnet/mpls/mpls.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/mpls/mpls_lookup.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/fib/ip6_fib.h>
@@ -999,7 +999,7 @@ lookup_dpo_mpls_inline (vlib_main_t * vm,
 
         while (n_left_from > 0 && n_left_to_next > 0)
         {
-            u32 bi0, lkdi0, lbi0, fib_index0,  next0;
+            u32 bi0, lkdi0, lbi0, fib_index0, next0, hash0;
             const mpls_unicast_header_t * hdr0;
             const load_balance_t *lb0;
             const lookup_dpo_t * lkd0;
@@ -1043,9 +1043,44 @@ lookup_dpo_mpls_inline (vlib_main_t * vm,
             next0 = dpo0->dpoi_next_node;
             vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
-            vlib_increment_combined_counter
-                (cm, thread_index, lbi0, 1,
-                 vlib_buffer_length_in_chain (vm, b0));
+
+            if (MPLS_IS_REPLICATE & lbi0)
+            {
+                next0 = mpls_lookup_to_replicate_edge;
+                vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+                    (lbi0 & ~MPLS_IS_REPLICATE);
+            }
+            else
+            {
+                lb0 = load_balance_get(lbi0);
+                ASSERT (lb0->lb_n_buckets > 0);
+                ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+                if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+                {
+                    hash0 = vnet_buffer (b0)->ip.flow_hash =
+                        mpls_compute_flow_hash(hdr0, lb0->lb_hash_config);
+                    dpo0 = load_balance_get_fwd_bucket
+                        (lb0,
+                         (hash0 & (lb0->lb_n_buckets_minus_1)));
+                }
+                else
+                {
+                    dpo0 = load_balance_get_bucket_i (lb0, 0);
+                }
+                next0 = dpo0->dpoi_next_node;
+
+                vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+                vlib_increment_combined_counter
+                    (cm, thread_index, lbi0, 1,
+                     vlib_buffer_length_in_chain (vm, b0));
+            }
+
+          vnet_buffer (b0)->mpls.ttl = ((char*)hdr0)[3];
+            vnet_buffer (b0)->mpls.exp = (((char*)hdr0)[2] & 0xe) >> 1;
+            vnet_buffer (b0)->mpls.first = 1;
+            vlib_buffer_advance(b0, sizeof(*hdr0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
             {
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 37467182..d86f22ca 100755
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -2473,7 +2473,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
 		}
 
 	      /* Verify checksum. */
-	      ASSERT (ip0->checksum == ip4_header_checksum (ip0));
 	      ASSERT (ip1->checksum == ip4_header_checksum (ip1));
 	    }
 	  else
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 42e5399c..7cedc384 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -17,7 +17,7 @@
 
 #include <vlib/vlib.h>
 #include <vnet/pg/pg.h>
-#include <vnet/mpls/mpls.h>
+#include <vnet/mpls/mpls_lookup.h>
 #include <vnet/fib/mpls_fib.h>
 #include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/replicate_dpo.h>
@@ -30,7 +30,7 @@ static vlib_node_registration_t mpls_lookup_node;
 /**
  * The arc/edge from the MPLS lookup node to the MPLS replicate node
  */
-static u32 mpls_lookup_to_replicate_edge;
+u32 mpls_lookup_to_replicate_edge;
 
 typedef struct {
   u32 next_index;
@@ -56,81 +56,6 @@ format_mpls_lookup_trace (u8 * s, va_list * args)
   return s;
 }
 
-/*
- * Compute flow hash. 
- * We'll use it to select which adjacency to use for this flow.  And other things.
- */
-always_inline u32
-mpls_compute_flow_hash (const mpls_unicast_header_t * hdr,
-                        flow_hash_config_t flow_hash_config)
-{
-    /*
-     * We need to byte swap so we use the numerical value. i.e. an odd label
-     * leads to an odd bucket. as opposed to a label above and below value X.
-     */
-    u8 next_label_is_entropy;
-    mpls_label_t ho_label;
-    u32 hash, value;
-
-    ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
-    hash = vnet_mpls_uc_get_label(ho_label);
-    next_label_is_entropy = 0;
-
-    while (MPLS_EOS != vnet_mpls_uc_get_s(ho_label))
-    {
-        hdr++;
-        ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
-        value = vnet_mpls_uc_get_label(ho_label);
-
-        if (1 == next_label_is_entropy)
-        {
-            /*
-             * The label is an entropy value, use it alone as the hash
-             */
-            return (ho_label);
-        }
-        if (MPLS_IETF_ENTROPY_LABEL == value)
-        {
-            /*
-             * we've met a label in the stack indicating that tha next
-             * label is an entropy value
-             */
-            next_label_is_entropy = 1;
-        }
-        else
-        {
-            /*
-             * XOR the label values in the stack together to
-             * build up the hash value
-             */
-            hash ^= value;
-        }
-    }
-
-    /*
-     * check the top nibble for v4 and v6
-     */
-    hdr++;
-
-    switch (((u8*)hdr)[0] >> 4)
-    {
-    case 4:
-        /* incorporate the v4 flow-hash */
-        hash ^= ip4_compute_flow_hash ((const ip4_header_t *)hdr,
-                                       IP_FLOW_HASH_DEFAULT);
-        break;
-    case 6:
-        /* incorporate the v6 flow-hash */
-        hash ^= ip6_compute_flow_hash ((const ip6_header_t *)hdr,
-                                       IP_FLOW_HASH_DEFAULT);
-        break;
-    default:
-        break;
-    }
-
-    return (hash);
-}
-
 static inline uword
 mpls_lookup (vlib_main_t * vm,
              vlib_node_runtime_t * node,
diff --git a/src/vnet/mpls/mpls_lookup.h b/src/vnet/mpls/mpls_lookup.h
new file mode 100644
index 00000000..28c9124f
--- /dev/null
+++ b/src/vnet/mpls/mpls_lookup.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_LOOKUP_H__
+#define __MPLS_LOOKUP_H__
+
+#include <vnet/mpls/mpls.h>
+#include <vnet/ip/ip.h>
+
+/**
+ * The arc/edge from the MPLS lookup node to the MPLS replicate node
+ */
+u32 mpls_lookup_to_replicate_edge;
+
+/*
+ * Compute flow hash. 
+ * We'll use it to select which adjacency to use for this flow.  And other things.
+ */
+always_inline u32
+mpls_compute_flow_hash (const mpls_unicast_header_t * hdr,
+                        flow_hash_config_t flow_hash_config)
+{
+    /*
+     * We need to byte swap so we use the numerical value. i.e. an odd label
+     * leads to an odd bucket. as opposed to a label above and below value X.
+     */
+    u8 next_label_is_entropy;
+    mpls_label_t ho_label;
+    u32 hash, value;
+
+    ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+    hash = vnet_mpls_uc_get_label(ho_label);
+    next_label_is_entropy = 0;
+
+    while (MPLS_EOS != vnet_mpls_uc_get_s(ho_label))
+    {
+        hdr++;
+        ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+        value = vnet_mpls_uc_get_label(ho_label);
+
+        if (1 == next_label_is_entropy)
+        {
+            /*
+             * The label is an entropy value, use it alone as the hash
+             */
+            return (ho_label);
+        }
+        if (MPLS_IETF_ENTROPY_LABEL == value)
+        {
+            /*
+             * we've met a label in the stack indicating that tha next
+             * label is an entropy value
+             */
+            next_label_is_entropy = 1;
+        }
+        else
+        {
+            /*
+             * XOR the label values in the stack together to
+             * build up the hash value
+             */
+            hash ^= value;
+        }
+    }
+
+    /*
+     * check the top nibble for v4 and v6
+     */
+    hdr++;
+
+    switch (((u8*)hdr)[0] >> 4)
+    {
+    case 4:
+        /* incorporate the v4 flow-hash */
+        hash ^= ip4_compute_flow_hash ((const ip4_header_t *)hdr,
+                                       IP_FLOW_HASH_DEFAULT);
+        break;
+    case 6:
+        /* incorporate the v6 flow-hash */
+        hash ^= ip6_compute_flow_hash ((const ip6_header_t *)hdr,
+                                       IP_FLOW_HASH_DEFAULT);
+        break;
+    default:
+        break;
+    }
+
+    return (hash);
+}
+
+#endif /* __MPLS_LOOKUP_H__ */
diff --git a/test/test_mpls.py b/test/test_mpls.py
index 77cec429..e3d013af 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -771,6 +771,26 @@ class TestMPLS(VppTestCase):
         rx = self.pg1.get_capture(packet_count)
         self.verify_capture_ip4(self.pg1, rx, tx, ping_resp=1)
 
+        #
+        # Double pop
+        #
+        route_36_neos = VppMplsRoute(self, 36, 0,
+                                     [VppRoutePath("0.0.0.0",
+                                                   0xffffffff)])
+        route_36_neos.add_vpp_config()
+
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_labelled_ip4(self.pg0, [36, 35],
+                                             ping=1, ip_itf=self.pg1)
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg1.get_capture(len(tx))
+        self.verify_capture_ip4(self.pg1, rx, tx, ping_resp=1)
+
+        route_36_neos.remove_vpp_config()
         route_35_eos.remove_vpp_config()
         route_34_eos.remove_vpp_config()
 
-- 
cgit 1.2.3-korg


From bb620d74b247f419eb485886c55148099b0213bb Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Thu, 29 Jun 2017 00:19:08 -0700
Subject: VPP debug image with worker threads hit assert on adding IP route
 with traffic (VPP-892)

When stacking DPOs the VLIB graph is also updated to add the edge between the nodes, if this edge does not yet exist. This addition should be done with the workers stopped.

Change-Id: I327e4d7d26f0b23eb280f17e4619ff2093ff7940
Signed-off-by: Neale Ranns <nranns@cisco.com>
(cherry picked from commit c02bd03ddf5eec9e9c79811360685f13e4ba8ee1)
---
 src/vlib/node.c       | 20 ++++++++++++++++++++
 src/vlib/node_funcs.h |  3 +++
 src/vnet/dpo/dpo.c    | 17 ++++++++++++++++-
 3 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'src/vnet/dpo')

diff --git a/src/vlib/node.c b/src/vlib/node.c
index eecad274..2cda0f06 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -151,6 +151,26 @@ vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index)
   vlib_worker_thread_barrier_release (vm);
 }
 
+uword
+vlib_node_get_next (vlib_main_t * vm, uword node_index, uword next_node_index)
+{
+  vlib_node_main_t *nm = &vm->node_main;
+  vlib_node_t *node;
+  uword *p;
+
+  node = vec_elt (nm->nodes, node_index);
+
+  /* Runtime has to be initialized. */
+  ASSERT (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED);
+
+  if ((p = hash_get (node->next_slot_by_node, next_node_index)))
+    {
+      return p[0];
+    }
+
+  return (~0);
+}
+
 /* Add next node to given node in given slot. */
 uword
 vlib_node_add_next_with_slot (vlib_main_t * vm,
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index d6588a74..c0389b2f 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -1070,6 +1070,9 @@ vlib_node_vectors_per_main_loop_as_integer (vlib_main_t * vm, u32 node_index)
 void
 vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f);
 
+/* Return the edge index if present, ~0 otherwise */
+uword vlib_node_get_next (vlib_main_t * vm, uword node, uword next_node);
+
 /* Add next node to given node in given slot. */
 uword
 vlib_node_add_next_with_slot (vlib_main_t * vm,
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index 28aa0c23..389f995b 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -327,6 +327,8 @@ dpo_get_next_node (dpo_type_t child_type,
 
         vm = vlib_get_main();
 
+        vlib_worker_thread_barrier_sync(vm);
+
         ASSERT(NULL != dpo_nodes[child_type]);
         ASSERT(NULL != dpo_nodes[child_type][child_proto]);
         ASSERT(NULL != dpo_nodes[parent_type]);
@@ -368,6 +370,8 @@ dpo_get_next_node (dpo_type_t child_type,
             }
             cc++;
         }
+
+        vlib_worker_thread_barrier_release(vm);
     }
 
     return (dpo_edges[child_type][child_proto][parent_type][parent_proto]);
@@ -445,10 +449,21 @@ dpo_stack_from_node (u32 child_node_index,
     parent_node =
         vlib_get_node_by_name(vm, (u8*) dpo_nodes[parent_type][parent_proto][0]);
 
-    edge = vlib_node_add_next(vm,
+    edge = vlib_node_get_next(vm,
                               child_node_index,
                               parent_node->index);
 
+    if (~0 == edge)
+    {
+        vlib_worker_thread_barrier_sync(vm);
+
+        edge = vlib_node_add_next(vm,
+                                  child_node_index,
+                                  parent_node->index);
+
+        vlib_worker_thread_barrier_release(vm);
+    }
+
     dpo_stack_i(edge, dpo, parent);
 }
 
-- 
cgit 1.2.3-korg


From da78f957e46c686434149d332a477d7ea055d76a Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Wed, 24 May 2017 09:15:43 -0700
Subject: L2 over MPLS

[support for VPWS/VPLS]
- switch to using dpo_proto_t rather than fib_protocol_t in fib_paths so that we can describe L2 paths
- VLIB nodes to handle pop/push of MPLS labels to L2

Change-Id: Id050d06a11fd2c9c1c81ce5a0654e6c5ae6afa6e
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/plugins/gtpu/gtpu.c                    |   2 +-
 src/plugins/snat/snat.c                    |   2 +-
 src/vat/api_format.c                       |  17 +-
 src/vnet/dhcp/client.c                     |   6 +-
 src/vnet/dhcp/dhcp6_proxy_node.c           |   2 +-
 src/vnet/dpo/dpo.c                         |  19 ++
 src/vnet/dpo/dpo.h                         |  11 +-
 src/vnet/dpo/interface_dpo.c               |  30 +++
 src/vnet/dpo/mpls_label_dpo.c              |  45 +++-
 src/vnet/ethernet/arp.c                    |   4 +-
 src/vnet/fib/fib_api.h                     |   4 +-
 src/vnet/fib/fib_entry.c                   |   8 +-
 src/vnet/fib/fib_entry_src.c               |  16 +-
 src/vnet/fib/fib_entry_src.h               |   4 +-
 src/vnet/fib/fib_entry_src_api.c           |   2 +-
 src/vnet/fib/fib_entry_src_default_route.c |   2 +-
 src/vnet/fib/fib_entry_src_interface.c     |   2 +-
 src/vnet/fib/fib_entry_src_lisp.c          |   8 +-
 src/vnet/fib/fib_entry_src_mpls.c          |   4 +-
 src/vnet/fib/fib_entry_src_rr.c            |  15 +-
 src/vnet/fib/fib_entry_src_special.c       |   2 +-
 src/vnet/fib/fib_path.c                    |  79 +++----
 src/vnet/fib/fib_path.h                    |  11 +-
 src/vnet/fib/fib_path_ext.c                |   3 +
 src/vnet/fib/fib_path_list.c               |   4 +-
 src/vnet/fib/fib_path_list.h               |   4 +-
 src/vnet/fib/fib_table.c                   |   6 +-
 src/vnet/fib/fib_table.h                   |   6 +-
 src/vnet/fib/fib_test.c                    | 338 ++++++++++++++---------------
 src/vnet/fib/fib_types.h                   |   8 +-
 src/vnet/interface_format.c                |  12 +-
 src/vnet/ip/ip4_forward.c                  |   6 +-
 src/vnet/ip/ip6_forward.c                  |   4 +-
 src/vnet/ip/ip6_neighbor.c                 |  10 +-
 src/vnet/ip/ip_api.c                       |  38 ++--
 src/vnet/ip/lookup.c                       |  18 +-
 src/vnet/lisp-gpe/lisp_gpe.c               |  13 +-
 src/vnet/lisp-gpe/lisp_gpe_api.c           |  10 +-
 src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c     |   5 +-
 src/vnet/mfib/ip6_mfib.c                   |   6 +-
 src/vnet/mfib/mfib_entry.c                 |  10 +-
 src/vnet/mfib/mfib_test.c                  |  14 +-
 src/vnet/mpls/mpls.api                     |   4 +-
 src/vnet/mpls/mpls.c                       |  36 +--
 src/vnet/mpls/mpls_api.c                   |  21 +-
 src/vnet/mpls/mpls_tunnel.c                |  75 ++++---
 src/vnet/mpls/mpls_tunnel.h                |  38 ++--
 src/vnet/srmpls/sr_mpls_policy.c           |   6 +-
 src/vnet/srmpls/sr_mpls_steering.c         |   2 +-
 src/vnet/srv6/sr_steering.c                |   4 +-
 src/vnet/vxlan-gpe/vxlan_gpe.c             |   2 +-
 src/vnet/vxlan/vxlan.c                     |   2 +-
 src/vpp/app/vpe_cli.c                      |   2 +-
 test/test_bfd.py                           |   6 +-
 test/test_gre.py                           |  24 +-
 test/test_ip6.py                           |  26 +--
 test/test_map.py                           |  19 +-
 test/test_mpls.py                          | 318 ++++++++++++++++++++++-----
 test/test_p2p_ethernet.py                  |  16 +-
 test/vpp_ip_route.py                       |  24 +-
 test/vpp_mpls_tunnel_interface.py          |   6 +-
 test/vpp_papi_provider.py                  |   4 +-
 62 files changed, 889 insertions(+), 556 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/plugins/gtpu/gtpu.c b/src/plugins/gtpu/gtpu.c
index 84745bd8..3dfb4210 100755
--- a/src/plugins/gtpu/gtpu.c
+++ b/src/plugins/gtpu/gtpu.c
@@ -534,7 +534,7 @@ int vnet_gtpu_add_del_tunnel
 	      fib_node_index_t mfei;
 	      adj_index_t ai;
 	      fib_route_path_t path = {
-		.frp_proto = fp,
+		.frp_proto = fib_proto_to_dpo (fp),
 		.frp_addr = zero_addr,
 		.frp_sw_if_index = 0xffffffff,
 		.frp_fib_index = ~0,
diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c
index 9fbc1e54..f196b5c2 100644
--- a/src/plugins/snat/snat.c
+++ b/src/plugins/snat/snat.c
@@ -135,7 +135,7 @@ snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
                                     (FIB_ENTRY_FLAG_CONNECTED |
                                      FIB_ENTRY_FLAG_LOCAL |
                                      FIB_ENTRY_FLAG_EXCLUSIVE),
-                                    FIB_PROTOCOL_IP4,
+                                    DPO_PROTO_IP4,
                                     NULL,
                                     sw_if_index,
                                     ~0,
diff --git a/src/vat/api_format.c b/src/vat/api_format.c
index f97cdeef..009cf173 100644
--- a/src/vat/api_format.c
+++ b/src/vat/api_format.c
@@ -7498,7 +7498,7 @@ api_mpls_route_add_del (vat_main_t * vam)
   mpls_label_t *next_hop_out_label_stack = NULL;
   mpls_label_t local_label = MPLS_LABEL_INVALID;
   u8 is_eos = 0;
-  u8 next_hop_proto_is_ip4 = 1;
+  dpo_proto_t next_hop_proto = DPO_PROTO_IP4;
 
   /* Parse args required to build the message */
   while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
@@ -7517,13 +7517,13 @@ api_mpls_route_add_del (vat_main_t * vam)
 			 &v4_next_hop_address))
 	{
 	  next_hop_set = 1;
-	  next_hop_proto_is_ip4 = 1;
+	  next_hop_proto = DPO_PROTO_IP4;
 	}
       else if (unformat (i, "via %U", unformat_ip6_address,
 			 &v6_next_hop_address))
 	{
 	  next_hop_set = 1;
-	  next_hop_proto_is_ip4 = 0;
+	  next_hop_proto = DPO_PROTO_IP6;
 	}
       else if (unformat (i, "weight %d", &next_hop_weight))
 	;
@@ -7548,12 +7548,12 @@ api_mpls_route_add_del (vat_main_t * vam)
       else if (unformat (i, "lookup-in-ip4-table %d", &next_hop_table_id))
 	{
 	  next_hop_set = 1;
-	  next_hop_proto_is_ip4 = 1;
+	  next_hop_proto = DPO_PROTO_IP4;
 	}
       else if (unformat (i, "lookup-in-ip6-table %d", &next_hop_table_id))
 	{
 	  next_hop_set = 1;
-	  next_hop_proto_is_ip4 = 0;
+	  next_hop_proto = DPO_PROTO_IP6;
 	}
       else if (unformat (i, "next-hop-table %d", &next_hop_table_id))
 	;
@@ -7599,7 +7599,7 @@ api_mpls_route_add_del (vat_main_t * vam)
       mp->mr_create_table_if_needed = create_table_if_needed;
 
       mp->mr_is_add = is_add;
-      mp->mr_next_hop_proto_is_ip4 = next_hop_proto_is_ip4;
+      mp->mr_next_hop_proto = next_hop_proto;
       mp->mr_is_classify = is_classify;
       mp->mr_is_multipath = is_multipath;
       mp->mr_is_resolve_host = resolve_host;
@@ -7622,13 +7622,14 @@ api_mpls_route_add_del (vat_main_t * vam)
 
       if (next_hop_set)
 	{
-	  if (next_hop_proto_is_ip4)
+	  if (DPO_PROTO_IP4 == next_hop_proto)
 	    {
 	      clib_memcpy (mp->mr_next_hop,
 			   &v4_next_hop_address,
 			   sizeof (v4_next_hop_address));
 	    }
-	  else
+	  else if (DPO_PROTO_IP6 == next_hop_proto)
+
 	    {
 	      clib_memcpy (mp->mr_next_hop,
 			   &v6_next_hop_address,
diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c
index cfe62a6f..dd5e99f2 100644
--- a/src/vnet/dhcp/client.c
+++ b/src/vnet/dhcp/client.c
@@ -296,7 +296,7 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b,
 					&all_0s,
 					FIB_SOURCE_DHCP,
 					FIB_ENTRY_FLAG_NONE,
-					FIB_PROTOCOL_IP4,
+					DPO_PROTO_IP4,
 					&nh,
 					c->sw_if_index,
 					~0,
@@ -605,7 +605,7 @@ dhcp_bound_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now)
 					  c->sw_if_index),
 				      &all_0s,
 				      FIB_SOURCE_DHCP,
-				      FIB_PROTOCOL_IP4,
+				      DPO_PROTO_IP4,
 				      &nh,
 				      c->sw_if_index,
 				      ~0,
@@ -900,7 +900,7 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a)
 					  c->sw_if_index),
 				      &all_0s,
 				      FIB_SOURCE_DHCP,
-				      FIB_PROTOCOL_IP4,
+				      DPO_PROTO_IP4,
 				      &nh,
 				      c->sw_if_index,
 				      ~0,
diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c
index e109cc4c..9c2f5220 100644
--- a/src/vnet/dhcp/dhcp6_proxy_node.c
+++ b/src/vnet/dhcp/dhcp6_proxy_node.c
@@ -857,7 +857,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr,
   else
     {
      const fib_route_path_t path_for_us = {
-          .frp_proto = FIB_PROTOCOL_IP6,
+          .frp_proto = DPO_PROTO_IP6,
           .frp_addr = zero_addr,
           .frp_sw_if_index = 0xffffffff,
           .frp_fib_index = ~0,
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index 389f995b..aa770838 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -109,6 +109,25 @@ vnet_link_to_dpo_proto (vnet_link_t linkt)
     return (0);
 }
 
+vnet_link_t
+dpo_proto_to_link (dpo_proto_t dp)
+{
+    switch (dp)
+    {
+    case DPO_PROTO_IP6:
+        return (VNET_LINK_IP6);
+    case DPO_PROTO_IP4:
+        return (VNET_LINK_IP4);
+    case DPO_PROTO_MPLS:
+        return (VNET_LINK_MPLS);
+    case DPO_PROTO_ETHERNET:
+        return (VNET_LINK_ETHERNET);
+    case DPO_PROTO_NSH:
+        return (VNET_LINK_NSH);
+    }
+    return (~0);
+}
+
 u8 *
 format_dpo_type (u8 * s, va_list * args)
 {
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
index 5aa4e2d2..42fc51d4 100644
--- a/src/vnet/dpo/dpo.h
+++ b/src/vnet/dpo/dpo.h
@@ -59,14 +59,10 @@ typedef u32 index_t;
  */
 typedef enum dpo_proto_t_
 {
-#if CLIB_DEBUG > 0
-    DPO_PROTO_IP4 = 1,
-#else
     DPO_PROTO_IP4 = 0,
-#endif
     DPO_PROTO_IP6,
-    DPO_PROTO_ETHERNET,
     DPO_PROTO_MPLS,
+    DPO_PROTO_ETHERNET,
     DPO_PROTO_NSH,
 } __attribute__((packed)) dpo_proto_t;
 
@@ -272,6 +268,11 @@ extern u8 *format_dpo_type(u8 * s, va_list * args);
  */
 extern u8 *format_dpo_proto(u8 * s, va_list * args);
 
+/**
+ * @brief format a DPO protocol
+ */
+extern vnet_link_t dpo_proto_to_link(dpo_proto_t dp);
+
 /**
  * @brief
  *  Set and stack a DPO.
diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c
index 8d700c23..780bfa2a 100644
--- a/src/vnet/dpo/interface_dpo.c
+++ b/src/vnet/dpo/interface_dpo.c
@@ -195,11 +195,17 @@ const static char* const interface_dpo_ip6_nodes[] =
     "interface-dpo-ip4",
     NULL,
 };
+const static char* const interface_dpo_l2_nodes[] =
+{
+    "interface-dpo-l2",
+    NULL,
+};
 
 const static char* const * const interface_dpo_nodes[DPO_PROTO_NUM] =
 {
     [DPO_PROTO_IP4]  = interface_dpo_ip4_nodes,
     [DPO_PROTO_IP6]  = interface_dpo_ip6_nodes,
+    [DPO_PROTO_ETHERNET]  = interface_dpo_l2_nodes,
     [DPO_PROTO_MPLS] = NULL,
 };
 
@@ -382,6 +388,14 @@ interface_dpo_ip6 (vlib_main_t * vm,
     return (interface_dpo_inline(vm, node, from_frame));
 }
 
+static uword
+interface_dpo_l2 (vlib_main_t * vm,
+                   vlib_node_runtime_t * node,
+                   vlib_frame_t * from_frame)
+{
+    return (interface_dpo_inline(vm, node, from_frame));
+}
+
 VLIB_REGISTER_NODE (interface_dpo_ip4_node) = {
     .function = interface_dpo_ip4,
     .name = "interface-dpo-ip4",
@@ -414,3 +428,19 @@ VLIB_REGISTER_NODE (interface_dpo_ip6_node) = {
 VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip6_node,
                               interface_dpo_ip6)
 
+VLIB_REGISTER_NODE (interface_dpo_l2_node) = {
+    .function = interface_dpo_l2,
+    .name = "interface-dpo-l2",
+    .vector_size = sizeof (u32),
+    .format_trace = format_interface_dpo_trace,
+
+    .n_next_nodes = 2,
+    .next_nodes = {
+        [INTERFACE_DPO_DROP] = "error-drop",
+        [INTERFACE_DPO_INPUT] = "l2-input",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_l2_node,
+                              interface_dpo_l2)
+
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index 1c451a51..b178a902 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -192,7 +192,8 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                               vlib_node_runtime_t * node,
                               vlib_frame_t * from_frame,
                               u8 payload_is_ip4,
-                              u8 payload_is_ip6)
+                              u8 payload_is_ip6,
+                              u8 payload_is_ethernet)
 {
     u32 n_left_from, next_index, * from, * to_next;
 
@@ -320,6 +321,13 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                 ttl2 = ip2->hop_limit;
                 ttl3 = ip3->hop_limit;
             }
+            else if (payload_is_ethernet)
+            {
+                /*
+                 * nothing to chang ein the ethernet header
+                 */
+                ttl0 = ttl1 = ttl2 = ttl3 = 255;
+            }
             else
             {
                 /*
@@ -551,7 +559,7 @@ mpls_label_imposition (vlib_main_t * vm,
                        vlib_node_runtime_t * node,
                        vlib_frame_t * frame)
 {
-    return (mpls_label_imposition_inline(vm, node, frame, 0, 0));
+    return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 0));
 }
 
 VLIB_REGISTER_NODE (mpls_label_imposition_node) = {
@@ -573,7 +581,7 @@ ip4_mpls_label_imposition (vlib_main_t * vm,
                            vlib_node_runtime_t * node,
                            vlib_frame_t * frame)
 {
-    return (mpls_label_imposition_inline(vm, node, frame, 1, 0));
+    return (mpls_label_imposition_inline(vm, node, frame, 1, 0, 0));
 }
 
 VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = {
@@ -595,7 +603,7 @@ ip6_mpls_label_imposition (vlib_main_t * vm,
                            vlib_node_runtime_t * node,
                            vlib_frame_t * frame)
 {
-    return (mpls_label_imposition_inline(vm, node, frame, 0, 1));
+    return (mpls_label_imposition_inline(vm, node, frame, 0, 1, 0));
 }
 
 VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = {
@@ -612,6 +620,28 @@ VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = {
 VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node,
                               ip6_mpls_label_imposition)
 
+static uword
+ethernet_mpls_label_imposition (vlib_main_t * vm,
+                                vlib_node_runtime_t * node,
+                                vlib_frame_t * frame)
+{
+    return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 1));
+}
+
+VLIB_REGISTER_NODE (ethernet_mpls_label_imposition_node) = {
+    .function = ethernet_mpls_label_imposition,
+    .name = "ethernet-mpls-label-imposition",
+    .vector_size = sizeof (u32),
+
+    .format_trace = format_mpls_label_imposition_trace,
+    .n_next_nodes = 1,
+    .next_nodes = {
+        [0] = "error-drop",
+    }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ethernet_mpls_label_imposition_node,
+                              ethernet_mpls_label_imposition)
+
 static void
 mpls_label_dpo_mem_show (void)
 {
@@ -643,11 +673,18 @@ const static char* const mpls_label_imp_mpls_nodes[] =
     "mpls-label-imposition",
     NULL,
 };
+const static char* const mpls_label_imp_ethernet_nodes[] =
+{
+    "ethernet-mpls-label-imposition",
+    NULL,
+};
+
 const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] =
 {
     [DPO_PROTO_IP4]  = mpls_label_imp_ip4_nodes,
     [DPO_PROTO_IP6]  = mpls_label_imp_ip6_nodes,
     [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes,
+    [DPO_PROTO_ETHERNET] = mpls_label_imp_ethernet_nodes,
 };
 
 
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
index 4d9edaf5..8a394006 100644
--- a/src/vnet/ethernet/arp.c
+++ b/src/vnet/ethernet/arp.c
@@ -588,7 +588,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
 	  e->fib_entry_index =
 	    fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
 				      FIB_ENTRY_FLAG_ATTACHED,
-				      FIB_PROTOCOL_IP4, &pfx.fp_addr,
+				      DPO_PROTO_IP4, &pfx.fp_addr,
 				      e->sw_if_index, ~0, 1, NULL,
 				      FIB_ROUTE_PATH_FLAG_NONE);
 	}
@@ -1621,7 +1621,7 @@ arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
 
       fib_table_entry_path_remove (fib_index, &pfx,
 				   FIB_SOURCE_ADJ,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &pfx.fp_addr,
 				   e->sw_if_index, ~0, 1,
 				   FIB_ROUTE_PATH_FLAG_NONE);
diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h
index 73d76a42..d07d6cae 100644
--- a/src/vnet/fib/fib_api.h
+++ b/src/vnet/fib/fib_api.h
@@ -21,7 +21,7 @@ int
 add_del_route_check (fib_protocol_t table_proto,
 		     u32 table_id,
 		     u32 next_hop_sw_if_index,
-		     fib_protocol_t next_hop_table_proto,
+		     dpo_proto_t next_hop_table_proto,
 		     u32 next_hop_table_id,
 		     u8 create_missing_tables,
                      u8 is_rpf_id,
@@ -43,7 +43,7 @@ add_del_route_t_handler (u8 is_multipath,
                          u8 is_rpf_id,
 			 u32 fib_index,
 			 const fib_prefix_t * prefix,
-			 u8 next_hop_proto_is_ip4,
+			 dpo_proto_t next_hop_proto,
 			 const ip46_address_t * next_hop,
 			 u32 next_hop_sw_if_index,
 			 u8 next_hop_fib_index,
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index d7ff1c8c..2027f2be 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -58,12 +58,18 @@ fib_entry_get_index (const fib_entry_t * fib_entry)
     return (fib_entry - fib_entry_pool);
 }
 
-static fib_protocol_t
+fib_protocol_t
 fib_entry_get_proto (const fib_entry_t * fib_entry)
 {
     return (fib_entry->fe_prefix.fp_proto);
 }
 
+dpo_proto_t
+fib_entry_get_dpo_proto (const fib_entry_t * fib_entry)
+{
+    return (fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto));
+}
+
 fib_forward_chain_type_t
 fib_entry_get_default_chain_type (const fib_entry_t *fib_entry)
 {
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index ff73cbf9..173df74f 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -29,12 +29,6 @@
  */
 static fib_entry_src_vft_t fib_entry_src_vft[FIB_SOURCE_MAX];
 
-static fib_protocol_t
-fib_entry_get_proto (const fib_entry_t * fib_entry)
-{
-    return (fib_entry->fe_prefix.fp_proto);
-}
-
 void
 fib_entry_src_register (fib_source_t source,
 			const fib_entry_src_vft_t *vft)
@@ -861,7 +855,7 @@ fib_entry_src_action_add (fib_entry_t *fib_entry,
 	fib_entry_src_vft[source].fesv_add(esrc,
 					   fib_entry,
 					   flags,
-					   fib_entry_get_proto(fib_entry),
+                                           fib_entry_get_dpo_proto(fib_entry),
 					   dpo);
     }
 
@@ -914,7 +908,7 @@ fib_entry_src_action_update (fib_entry_t *fib_entry,
 	fib_entry_src_vft[source].fesv_add(esrc,
 					   fib_entry,
 					   flags,
-					   fib_entry_get_proto(fib_entry),
+					   fib_entry_get_dpo_proto(fib_entry),
 					   dpo);
     }
 
@@ -1106,8 +1100,7 @@ fib_entry_src_action_path_add (fib_entry_t *fib_entry,
                                      source,
                                      flags,
                                      drop_dpo_get(
-                                         fib_proto_to_dpo(
-                                             fib_entry_get_proto(fib_entry))));
+                                         fib_entry_get_dpo_proto(fib_entry)));
 	esrc = fib_entry_src_find(fib_entry, source, NULL);
     }
 
@@ -1166,8 +1159,7 @@ fib_entry_src_action_path_swap (fib_entry_t *fib_entry,
 					     source,
 					     flags,
                                              drop_dpo_get(
-                                                 fib_proto_to_dpo(
-                                                     fib_entry_get_proto(fib_entry))));
+                                                 fib_entry_get_dpo_proto(fib_entry)));
 	esrc = fib_entry_src_find(fib_entry, source, NULL);
     }
 
diff --git a/src/vnet/fib/fib_entry_src.h b/src/vnet/fib/fib_entry_src.h
index 640c174d..35c43936 100644
--- a/src/vnet/fib/fib_entry_src.h
+++ b/src/vnet/fib/fib_entry_src.h
@@ -73,7 +73,7 @@ typedef void (*fib_entry_src_deactivate_t)(fib_entry_src_t *src,
 typedef void (*fib_entry_src_add_t)(fib_entry_src_t *src,
 				    const fib_entry_t *entry,
 				    fib_entry_flag_t flags,
-				    fib_protocol_t proto,
+				    dpo_proto_t proto,
 				    const dpo_id_t *dpo);
 
 /**
@@ -277,6 +277,8 @@ extern void fib_entry_src_mk_lb (fib_entry_t *fib_entry,
 				 fib_forward_chain_type_t fct,
 				 dpo_id_t *dpo_lb);
 
+extern fib_protocol_t fib_entry_get_proto(const fib_entry_t * fib_entry);
+extern dpo_proto_t fib_entry_get_dpo_proto(const fib_entry_t * fib_entry);
 
 /*
  * Per-source registration. declared here so we save a separate .h file for each
diff --git a/src/vnet/fib/fib_entry_src_api.c b/src/vnet/fib/fib_entry_src_api.c
index f895886b..1cdcfbde 100644
--- a/src/vnet/fib/fib_entry_src_api.c
+++ b/src/vnet/fib/fib_entry_src_api.c
@@ -131,7 +131,7 @@ static void
 fib_entry_src_api_add (fib_entry_src_t *src,
 		       const fib_entry_t *entry,
 		       fib_entry_flag_t flags,
-		       fib_protocol_t proto,
+		       dpo_proto_t proto,
 		       const dpo_id_t *dpo)
 {
     if (FIB_ENTRY_FLAG_NONE != flags)
diff --git a/src/vnet/fib/fib_entry_src_default_route.c b/src/vnet/fib/fib_entry_src_default_route.c
index 9f4e7c36..431abb66 100644
--- a/src/vnet/fib/fib_entry_src_default_route.c
+++ b/src/vnet/fib/fib_entry_src_default_route.c
@@ -35,7 +35,7 @@ static void
 fib_entry_src_default_route_add (fib_entry_src_t *src,
 				 const fib_entry_t *entry,
 				 fib_entry_flag_t flags,
-				 fib_protocol_t proto,
+				 dpo_proto_t proto,
 				 const dpo_id_t *dpo)
 {
     src->fes_pl = fib_path_list_create_special(proto,
diff --git a/src/vnet/fib/fib_entry_src_interface.c b/src/vnet/fib/fib_entry_src_interface.c
index bb87818f..6c087f34 100644
--- a/src/vnet/fib/fib_entry_src_interface.c
+++ b/src/vnet/fib/fib_entry_src_interface.c
@@ -35,7 +35,7 @@ static void
 fib_entry_src_interface_add (fib_entry_src_t *src,
                              const fib_entry_t *entry,
                              fib_entry_flag_t flags,
-                             fib_protocol_t proto,
+                             dpo_proto_t proto,
                              const dpo_id_t *dpo)
 {
     src->fes_pl = fib_path_list_create_special(
diff --git a/src/vnet/fib/fib_entry_src_lisp.c b/src/vnet/fib/fib_entry_src_lisp.c
index 7f8b91bb..e72dce63 100644
--- a/src/vnet/fib/fib_entry_src_lisp.c
+++ b/src/vnet/fib/fib_entry_src_lisp.c
@@ -79,10 +79,10 @@ fib_entry_src_lisp_path_remove (fib_entry_src_t *src,
 
 static void
 fib_entry_src_lisp_add (fib_entry_src_t *src,
-		       const fib_entry_t *entry,
-		       fib_entry_flag_t flags,
-		       fib_protocol_t proto,
-		       const dpo_id_t *dpo)
+                        const fib_entry_t *entry,
+                        fib_entry_flag_t flags,
+                        dpo_proto_t proto,
+                        const dpo_id_t *dpo)
 {
     if (FIB_ENTRY_FLAG_NONE != flags)
     {
diff --git a/src/vnet/fib/fib_entry_src_mpls.c b/src/vnet/fib/fib_entry_src_mpls.c
index 14c7310f..a616458f 100644
--- a/src/vnet/fib/fib_entry_src_mpls.c
+++ b/src/vnet/fib/fib_entry_src_mpls.c
@@ -57,13 +57,13 @@ static void
 fib_entry_src_mpls_add (fib_entry_src_t *src,
                         const fib_entry_t *entry,
                         fib_entry_flag_t flags,
-                        fib_protocol_t proto,
+                        dpo_proto_t proto,
                         const dpo_id_t *dpo)
 {
     src->fes_pl =
 	fib_path_list_create_special(proto,
 				     FIB_PATH_LIST_FLAG_DROP,
-				     drop_dpo_get(fib_proto_to_dpo(proto)));
+				     drop_dpo_get(proto));
 }
 
 static void
diff --git a/src/vnet/fib/fib_entry_src_rr.c b/src/vnet/fib/fib_entry_src_rr.c
index d66ef7b1..1153f3f1 100644
--- a/src/vnet/fib/fib_entry_src_rr.c
+++ b/src/vnet/fib/fib_entry_src_rr.c
@@ -35,7 +35,7 @@ fib_entry_src_rr_resolve_via_connected (fib_entry_src_t *src,
 					const fib_entry_t *cover)
 {
     const fib_route_path_t path = {
-	.frp_proto = fib_entry->fe_prefix.fp_proto,
+	.frp_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto),
 	.frp_addr = fib_entry->fe_prefix.fp_addr,
 	.frp_sw_if_index = fib_entry_get_resolving_interface(
 	                       fib_entry_get_index(cover)),
@@ -90,18 +90,17 @@ fib_entry_src_rr_use_covers_pl (fib_entry_src_t *src,
                                 const fib_entry_t *cover)
 {
     fib_node_index_t *entries = NULL;
-    fib_protocol_t proto;
+    dpo_proto_t proto;
 
-    proto = fib_entry->fe_prefix.fp_proto;
+    proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto);
     vec_add1(entries, fib_entry_get_index(fib_entry));
 
     if (fib_path_list_recursive_loop_detect(cover->fe_parent,
                                             &entries))
     {
-        src->fes_pl = fib_path_list_create_special(
-            proto,
-            FIB_PATH_LIST_FLAG_DROP,
-            drop_dpo_get(fib_proto_to_dpo(proto)));
+        src->fes_pl = fib_path_list_create_special(proto,
+                                                   FIB_PATH_LIST_FLAG_DROP,
+                                                   drop_dpo_get(proto));
     }
     else
     {
@@ -126,7 +125,7 @@ fib_entry_src_rr_activate (fib_entry_src_t *src,
      */
     if (FIB_PROTOCOL_MPLS == fib_entry->fe_prefix.fp_proto)
     {
-	src->fes_pl = fib_path_list_create_special(FIB_PROTOCOL_MPLS,
+	src->fes_pl = fib_path_list_create_special(DPO_PROTO_MPLS,
 						   FIB_PATH_LIST_FLAG_DROP,
 						   NULL);
 	fib_path_list_lock(src->fes_pl);
diff --git a/src/vnet/fib/fib_entry_src_special.c b/src/vnet/fib/fib_entry_src_special.c
index 75605d7f..e979e18f 100644
--- a/src/vnet/fib/fib_entry_src_special.c
+++ b/src/vnet/fib/fib_entry_src_special.c
@@ -43,7 +43,7 @@ static void
 fib_entry_src_special_add (fib_entry_src_t *src,
 			   const fib_entry_t *entry,
 			   fib_entry_flag_t flags,
-			   fib_protocol_t proto,
+			   dpo_proto_t proto,
 			   const dpo_id_t *dpo)
 {
     src->fes_pl =
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 3a67a544..58050ccb 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -193,7 +193,7 @@ typedef struct fib_path_t_ {
      * next-hop's address. We can't derive this from the address itself
      * since the address can be all zeros
      */
-    fib_protocol_t fp_nh_proto;
+    dpo_proto_t fp_nh_proto;
 
     /**
      * UCMP [unnormalised] weigth
@@ -381,7 +381,7 @@ format_fib_path (u8 * s, va_list * args)
 
     s = format (s, "      index:%d ", fib_path_get_index(path));
     s = format (s, "pl-index:%d ", path->fp_pl_index);
-    s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto);
+    s = format (s, "%U ", format_dpo_proto, path->fp_nh_proto);
     s = format (s, "weight=%d ", path->fp_weight);
     s = format (s, "pref=%d ", path->fp_preference);
     s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
@@ -454,7 +454,7 @@ format_fib_path (u8 * s, va_list * args)
 	}
 	break;
     case FIB_PATH_TYPE_RECURSIVE:
-	if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
+	if (DPO_PROTO_MPLS == path->fp_nh_proto)
 	{
 	    s = format (s, "via %U %U",
 			format_mpls_unicast_label,
@@ -552,14 +552,14 @@ fib_path_attached_next_hop_get_adj (fib_path_t *path,
 	 * the subnet address (the attached route) links to the
 	 * auto-adj (see below), we want that adj here too.
 	 */
-	return (adj_nbr_add_or_lock(path->fp_nh_proto,
+	return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
 				    link,
 				    &zero_addr,
 				    path->attached_next_hop.fp_interface));
     }
     else
     {
-	return (adj_nbr_add_or_lock(path->fp_nh_proto,
+	return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
 				    link,
 				    &path->attached_next_hop.fp_nh,
 				    path->attached_next_hop.fp_interface));
@@ -575,10 +575,10 @@ fib_path_attached_next_hop_set (fib_path_t *path)
      */
     dpo_set(&path->fp_dpo,
 	    DPO_ADJACENCY,
-	    fib_proto_to_dpo(path->fp_nh_proto),
+	    path->fp_nh_proto,
 	    fib_path_attached_next_hop_get_adj(
 		 path,
-		 fib_proto_to_link(path->fp_nh_proto)));
+		 dpo_proto_to_link(path->fp_nh_proto)));
 
     /*
      * become a child of the adjacency so we receive updates
@@ -607,14 +607,14 @@ fib_path_attached_get_adj (fib_path_t *path,
          * point-2-point interfaces do not require a glean, since
          * there is nothing to ARP. Install a rewrite/nbr adj instead
          */
-        return (adj_nbr_add_or_lock(path->fp_nh_proto,
+        return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
                                     link,
                                     &zero_addr,
                                     path->attached.fp_interface));
     }
     else
     {
-        return (adj_glean_add_or_lock(path->fp_nh_proto,
+        return (adj_glean_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
                                       path->attached.fp_interface,
                                       NULL));
     }
@@ -650,7 +650,7 @@ fib_path_recursive_adj_update (fib_path_t *path,
     if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP)
     {
 	path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
-	dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+	dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto));
     }
     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)
     {
@@ -668,7 +668,7 @@ fib_path_recursive_adj_update (fib_path_t *path,
 	if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR)
 	{
 	    path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
-            dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+            dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto));
 
             /*
              * PIC edge trigger. let the load-balance maps know
@@ -685,7 +685,7 @@ fib_path_recursive_adj_update (fib_path_t *path,
 	if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib)))
 	{
 	    path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
-            dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+            dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto));
 
             /*
              * PIC edge trigger. let the load-balance maps know
@@ -699,7 +699,7 @@ fib_path_recursive_adj_update (fib_path_t *path,
     if (!fib_entry_is_resolved(path->fp_via_fib))
     {
         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
-        dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+        dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto));
 
         /*
          * PIC edge trigger. let the load-balance maps know
@@ -720,9 +720,7 @@ fib_path_recursive_adj_update (fib_path_t *path,
      */
     dpo_copy(dpo, &via_dpo);
 
-    FIB_PATH_DBG(path, "recursive update: %U",
-		 fib_get_lookup_main(path->fp_nh_proto),
-		 &path->fp_dpo, 2);
+    FIB_PATH_DBG(path, "recursive update:");
 
     dpo_reset(&via_dpo);
 }
@@ -804,13 +802,8 @@ fib_path_unresolve (fib_path_t *path)
 static fib_forward_chain_type_t
 fib_path_to_chain_type (const fib_path_t *path)
 {
-    switch (path->fp_nh_proto)
+    if (DPO_PROTO_MPLS == path->fp_nh_proto)
     {
-    case FIB_PROTOCOL_IP4:
-	return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
-    case FIB_PROTOCOL_IP6:
-	return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
-    case FIB_PROTOCOL_MPLS:
         if (FIB_PATH_TYPE_RECURSIVE == path->fp_type &&
             MPLS_EOS == path->recursive.fp_nh.fp_eos)
         {
@@ -821,7 +814,10 @@ fib_path_to_chain_type (const fib_path_t *path)
             return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
         }
     }
-    return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
+    else
+    {
+        return (fib_forw_chain_type_from_dpo_proto(path->fp_nh_proto));
+    }
 }
 
 /*
@@ -927,7 +923,7 @@ FIXME comment
 
             ai = fib_path_attached_next_hop_get_adj(
                      path,
-                     fib_proto_to_link(path->fp_nh_proto));
+                     dpo_proto_to_link(path->fp_nh_proto));
 
             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
             if (if_is_up && adj_is_up(ai))
@@ -935,9 +931,7 @@ FIXME comment
                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
             }
 
-            dpo_set(&path->fp_dpo, DPO_ADJACENCY,
-                    fib_proto_to_dpo(path->fp_nh_proto),
-                    ai);
+            dpo_set(&path->fp_dpo, DPO_ADJACENCY, path->fp_nh_proto, ai);
             adj_unlock(ai);
 
             if (!if_is_up)
@@ -1141,7 +1135,7 @@ fib_path_create (fib_node_index_t pl_index,
 	else
 	{
 	    path->fp_type = FIB_PATH_TYPE_RECURSIVE;
-	    if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
+	    if (DPO_PROTO_MPLS == path->fp_nh_proto)
 	    {
 		path->recursive.fp_nh.fp_local_label = rpath->frp_local_label;
                 path->recursive.fp_nh.fp_eos = rpath->frp_eos;
@@ -1167,7 +1161,7 @@ fib_path_create (fib_node_index_t pl_index,
  */
 fib_node_index_t
 fib_path_create_special (fib_node_index_t pl_index,
-			 fib_protocol_t nh_proto,
+			 dpo_proto_t nh_proto,
 			 fib_path_cfg_flags_t flags,
 			 const dpo_id_t *dpo)
 {
@@ -1433,7 +1427,7 @@ fib_path_cmp_w_route_path (fib_node_index_t path_index,
 	    res = (path->attached.fp_interface - rpath->frp_sw_if_index);
 	    break;
 	case FIB_PATH_TYPE_RECURSIVE:
-            if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
+            if (DPO_PROTO_MPLS == path->fp_nh_proto)
             {
                 res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label;
 
@@ -1535,8 +1529,7 @@ fib_path_recursive_loop_detect (fib_node_index_t path_index,
 	    FIB_PATH_DBG(path, "recursive loop formed");
 	    path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
 
-	    dpo_copy(&path->fp_dpo,
-                    drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+	    dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto));
 	}
 	else
 	{
@@ -1590,8 +1583,7 @@ fib_path_resolve (fib_node_index_t path_index)
      */
     if (fib_path_is_permanent_drop(path))
     {
-	dpo_copy(&path->fp_dpo,
-                 drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+	dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto));
 	path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
 	return (fib_path_is_resolved(path_index));
     }
@@ -1612,9 +1604,9 @@ fib_path_resolve (fib_node_index_t path_index)
 	}
         dpo_set(&path->fp_dpo,
                 DPO_ADJACENCY,
-                fib_proto_to_dpo(path->fp_nh_proto),
+                path->fp_nh_proto,
                 fib_path_attached_get_adj(path,
-                                          fib_proto_to_link(path->fp_nh_proto)));
+                                          dpo_proto_to_link(path->fp_nh_proto)));
 
 	/*
 	 * become a child of the adjacency so we receive updates
@@ -1639,7 +1631,7 @@ fib_path_resolve (fib_node_index_t path_index)
 
 	ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
 
-	if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
+	if (DPO_PROTO_MPLS == path->fp_nh_proto)
 	{
 	    fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label,
                                        path->recursive.fp_nh.fp_eos,
@@ -1680,8 +1672,7 @@ fib_path_resolve (fib_node_index_t path_index)
 	/*
 	 * Resolve via the drop
 	 */
-	dpo_copy(&path->fp_dpo,
-                 drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
+	dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto));
 	break;
     case FIB_PATH_TYPE_DEAG:
     {
@@ -1696,7 +1687,7 @@ fib_path_resolve (fib_node_index_t path_index)
                 LOOKUP_UNICAST);
 
         lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
-                                           fib_proto_to_dpo(path->fp_nh_proto),
+                                           path->fp_nh_proto,
                                            cast,
                                            LOOKUP_INPUT_DST_ADDR,
                                            LOOKUP_TABLE_FROM_CONFIG,
@@ -1707,7 +1698,7 @@ fib_path_resolve (fib_node_index_t path_index)
 	/*
 	 * Resolve via a receive DPO.
 	 */
-	receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
+	receive_dpo_add_or_lock(path->fp_nh_proto,
                                 path->receive.fp_interface,
                                 &path->receive.fp_addr,
                                 &path->fp_dpo);
@@ -1716,7 +1707,7 @@ fib_path_resolve (fib_node_index_t path_index)
 	/*
 	 * Resolve via a receive DPO.
 	 */
-	interface_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
+	interface_dpo_add_or_lock(path->fp_nh_proto,
                                   path->intf_rx.fp_interface,
                                   &path->fp_dpo);
 	break;
@@ -2035,7 +2026,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
                     /*
                      * Create the adj needed for sending IP multicast traffic
                      */
-                    ai = adj_mcast_add_or_lock(path->fp_nh_proto,
+                    ai = adj_mcast_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto),
                                                fib_forw_chain_type_to_link_type(fct),
                                                path->attached.fp_interface);
                     dpo_set(dpo, DPO_ADJACENCY,
@@ -2187,7 +2178,7 @@ fib_path_encode (fib_node_index_t path_list_index,
     return (FIB_PATH_LIST_WALK_CONTINUE);
 }
 
-fib_protocol_t
+dpo_proto_t
 fib_path_get_proto (fib_node_index_t path_index)
 {
     fib_path_t *path;
diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h
index a34cb43f..f986e437 100644
--- a/src/vnet/fib/fib_path.h
+++ b/src/vnet/fib/fib_path.h
@@ -78,6 +78,11 @@ typedef enum fib_path_cfg_attribute_t_ {
      * The path is an interface recieve
      */
     FIB_PATH_CFG_ATTRIBUTE_LOCAL,
+    /**
+     * The path is L2. i.e. the parameters therein are to be interpreted as
+     * pertaining to L2 config.
+     */
+    FIB_PATH_CFG_ATTRIBUTE_L2,
     /**
      * Marker. Add new types before this one, then update it.
      */
@@ -98,6 +103,7 @@ typedef enum fib_path_cfg_attribute_t_ {
     [FIB_PATH_CFG_ATTRIBUTE_ATTACHED] = "attached",	\
     [FIB_PATH_CFG_ATTRIBUTE_INTF_RX] = "interface-rx",	\
     [FIB_PATH_CFG_ATTRIBUTE_RPF_ID] = "rpf-id",         \
+    [FIB_PATH_CFG_ATTRIBUTE_L2] = "l2",         \
 }
 
 #define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \
@@ -118,6 +124,7 @@ typedef enum fib_path_cfg_flags_t_ {
     FIB_PATH_CFG_FLAG_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_ATTACHED),
     FIB_PATH_CFG_FLAG_INTF_RX = (1 << FIB_PATH_CFG_ATTRIBUTE_INTF_RX),
     FIB_PATH_CFG_FLAG_RPF_ID = (1 << FIB_PATH_CFG_ATTRIBUTE_RPF_ID),
+    FIB_PATH_CFG_FLAG_L2 = (1 << FIB_PATH_CFG_ATTRIBUTE_L2),
 } __attribute__ ((packed)) fib_path_cfg_flags_t;
 
 
@@ -131,7 +138,7 @@ extern u8 * format_fib_path(u8 * s, va_list * args);
 extern fib_node_index_t fib_path_create(fib_node_index_t pl_index,
 					const fib_route_path_t *path);
 extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index,
-						fib_protocol_t nh_proto,
+						dpo_proto_t nh_proto,
 						fib_path_cfg_flags_t flags,
 						const dpo_id_t *dpo);
 
@@ -148,7 +155,7 @@ extern int fib_path_is_recursive_constrained(fib_node_index_t path_index);
 extern int fib_path_is_exclusive(fib_node_index_t path_index);
 extern int fib_path_is_deag(fib_node_index_t path_index);
 extern int fib_path_is_looped(fib_node_index_t path_index);
-extern fib_protocol_t fib_path_get_proto(fib_node_index_t path_index);
+extern dpo_proto_t fib_path_get_proto(fib_node_index_t path_index);
 extern void fib_path_destroy(fib_node_index_t path_index);
 extern uword fib_path_hash(fib_node_index_t path_index);
 extern load_balance_path_t * fib_path_append_nh_for_multipath_hash(
diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c
index 26f2b9b6..4438671b 100644
--- a/src/vnet/fib/fib_path_ext.c
+++ b/src/vnet/fib/fib_path_ext.c
@@ -191,6 +191,9 @@ fib_path_ext_stack (fib_path_ext_t *path_ext,
     case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
         parent_fct = child_fct;
 	break;
+    case FIB_FORW_CHAIN_TYPE_ETHERNET:
+        parent_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS;
+	break;
     default:
         return (nhs);
 	break;
diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c
index 7a9c328c..f30fd7ea 100644
--- a/src/vnet/fib/fib_path_list.c
+++ b/src/vnet/fib/fib_path_list.c
@@ -611,7 +611,7 @@ fib_path_list_get_resolving_interface (fib_node_index_t path_list_index)
     return (sw_if_index);
 }
 
-fib_protocol_t
+dpo_proto_t
 fib_path_list_get_proto (fib_node_index_t path_list_index)
 {
     fib_path_list_t *path_list;
@@ -753,7 +753,7 @@ fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf)
 }
 
 fib_node_index_t
-fib_path_list_create_special (fib_protocol_t nh_proto,
+fib_path_list_create_special (dpo_proto_t nh_proto,
 			      fib_path_list_flags_t flags,
 			      const dpo_id_t *dpo)
 {
diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h
index b4b6985b..a54b79e2 100644
--- a/src/vnet/fib/fib_path_list.h
+++ b/src/vnet/fib/fib_path_list.h
@@ -106,7 +106,7 @@ typedef enum fib_path_list_flags_t_ {
 
 extern fib_node_index_t fib_path_list_create(fib_path_list_flags_t flags,
 					     const fib_route_path_t *paths);
-extern fib_node_index_t fib_path_list_create_special(fib_protocol_t nh_proto,
+extern fib_node_index_t fib_path_list_create_special(dpo_proto_t nh_proto,
 						     fib_path_list_flags_t flags,
 						     const dpo_id_t *dpo);
 
@@ -150,7 +150,7 @@ extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index,
 extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index);
 extern int fib_path_list_is_looped(fib_node_index_t path_list_index);
 extern int fib_path_list_is_popular(fib_node_index_t path_list_index);
-extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index);
+extern dpo_proto_t fib_path_list_get_proto(fib_node_index_t path_list_index);
 extern u8 * fib_path_list_format(fib_node_index_t pl_index,
 				 u8 * s);
 extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index,
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 5aa02dd0..6b6cc5cb 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -505,7 +505,7 @@ fib_table_entry_path_add (u32 fib_index,
 			  const fib_prefix_t *prefix,
 			  fib_source_t source,
 			  fib_entry_flag_t flags,
-			  fib_protocol_t next_hop_proto,
+			  dpo_proto_t next_hop_proto,
 			  const ip46_address_t *next_hop,
 			  u32 next_hop_sw_if_index,
 			  u32 next_hop_fib_index,
@@ -664,7 +664,7 @@ void
 fib_table_entry_path_remove (u32 fib_index,
 			     const fib_prefix_t *prefix,
 			     fib_source_t source,
-			     fib_protocol_t next_hop_proto,
+			     dpo_proto_t next_hop_proto,
 			     const ip46_address_t *next_hop,
 			     u32 next_hop_sw_if_index,
 			     u32 next_hop_fib_index,
@@ -755,7 +755,7 @@ fib_table_entry_update_one_path (u32 fib_index,
 				 const fib_prefix_t *prefix,
 				 fib_source_t source,
 				 fib_entry_flag_t flags,
-				 fib_protocol_t next_hop_proto,
+				 dpo_proto_t next_hop_proto,
 				 const ip46_address_t *next_hop,
 				 u32 next_hop_sw_if_index,
 				 u32 next_hop_fib_index,
diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h
index a65fea74..579740e9 100644
--- a/src/vnet/fib/fib_table.h
+++ b/src/vnet/fib/fib_table.h
@@ -288,7 +288,7 @@ extern fib_node_index_t fib_table_entry_path_add(u32 fib_index,
 						 const fib_prefix_t *prefix,
 						 fib_source_t source,
 						 fib_entry_flag_t flags,
-						 fib_protocol_t next_hop_proto,
+						 dpo_proto_t next_hop_proto,
 						 const ip46_address_t *next_hop,
 						 u32 next_hop_sw_if_index,
 						 u32 next_hop_fib_index,
@@ -364,7 +364,7 @@ extern fib_node_index_t fib_table_entry_path_add2(u32 fib_index,
 extern void fib_table_entry_path_remove(u32 fib_index,
 					const fib_prefix_t *prefix,
 					fib_source_t source,
-					fib_protocol_t next_hop_proto,
+					dpo_proto_t next_hop_proto,
 					const ip46_address_t *next_hop,
 					u32 next_hop_sw_if_index,
 					u32 next_hop_fib_index,
@@ -471,7 +471,7 @@ extern fib_node_index_t fib_table_entry_update_one_path(u32 fib_index,
 							const fib_prefix_t *prefix,
 							fib_source_t source,
 							fib_entry_flag_t flags,
-							fib_protocol_t next_hop_proto,
+							dpo_proto_t next_hop_proto,
 							const ip46_address_t *next_hop,
 							u32 next_hop_sw_if_index,
 							u32 next_hop_fib_index,
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index 4c891667..59d5da2a 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -833,7 +833,7 @@ fib_test_v4 (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_ATTACHED),
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -861,7 +861,7 @@ fib_test_v4 (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_LOCAL),
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -911,7 +911,7 @@ fib_test_v4 (void)
     fib_table_entry_path_add(fib_index, &pfx,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -963,7 +963,7 @@ fib_test_v4 (void)
     pfx.fp_len = 0;
     fib_table_entry_path_remove(fib_index, &pfx,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0, // non-recursive path, so no FIB index
@@ -1029,7 +1029,7 @@ fib_test_v4 (void)
                                           &pfx_11_11_11_11_s_32,
                                           FIB_SOURCE_API,
                                           FIB_ENTRY_FLAG_ATTACHED,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
                                           &pfx_10_10_10_1_s_32.fp_addr,
                                           tm->hw[0]->sw_if_index,
                                           ~0, // invalid fib index
@@ -1095,7 +1095,7 @@ fib_test_v4 (void)
                                    &pfx_10_10_10_1_s_32,
                                    FIB_SOURCE_ADJ,
                                    FIB_ENTRY_FLAG_ATTACHED,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &pfx_10_10_10_1_s_32.fp_addr,
                                    tm->hw[0]->sw_if_index,
                                    ~0, // invalid fib index
@@ -1110,7 +1110,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
                                 &pfx_11_11_11_11_s_32,
                                 FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
                                 &pfx_10_10_10_1_s_32.fp_addr,
                                 tm->hw[0]->sw_if_index,
                                 ~0, // invalid fib index
@@ -1144,7 +1144,7 @@ fib_test_v4 (void)
                              &pfx_10_10_10_2_s_32,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &pfx_10_10_10_2_s_32.fp_addr,
                              tm->hw[0]->sw_if_index,
                              ~0, // invalid fib index
@@ -1181,7 +1181,7 @@ fib_test_v4 (void)
 			     &pfx_1_1_1_1_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -1214,7 +1214,7 @@ fib_test_v4 (void)
 			     &pfx_1_1_2_0_s_24,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -1241,7 +1241,7 @@ fib_test_v4 (void)
 			     &pfx_1_1_2_0_s_24,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_2,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -1280,7 +1280,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_2_0_s_24,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_2,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -1327,7 +1327,7 @@ fib_test_v4 (void)
 				   &bgp_100_pfx,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &nh_1_1_1_1,
 				   ~0, // no index provided.
 				   fib_index, // nexthop in same fib as route
@@ -1363,7 +1363,7 @@ fib_test_v4 (void)
 			     &bgp_101_pfx,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_1_1_1_1,
 			     ~0, // no index provided.
 			     fib_index, // nexthop in same fib as route
@@ -1487,7 +1487,7 @@ fib_test_v4 (void)
                                    &bgp_200_pfx,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &pfx_1_1_1_2_s_32.fp_addr,
                                    ~0, // no index provided.
                                    fib_index, // nexthop in same fib as route
@@ -1534,7 +1534,7 @@ fib_test_v4 (void)
 			     &pfx_1_2_3_4_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
                              &nh_10_10_10_1,
                              tm->hw[0]->sw_if_index,
                              ~0,
@@ -1545,7 +1545,7 @@ fib_test_v4 (void)
                                    &pfx_1_2_3_4_s_32,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
                                    &nh_12_12_12_12,
                                    tm->hw[1]->sw_if_index,
                                    ~0,
@@ -1586,7 +1586,7 @@ fib_test_v4 (void)
 			     &pfx_1_2_3_5_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
                              &nh_12_12_12_12,
                              tm->hw[1]->sw_if_index,
                              ~0,
@@ -1597,7 +1597,7 @@ fib_test_v4 (void)
                                    &pfx_1_2_3_5_s_32,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
                                    &nh_10_10_10_1,
                                    tm->hw[0]->sw_if_index,
                                    ~0,
@@ -1669,7 +1669,7 @@ fib_test_v4 (void)
 				    &pfx_6_6_6_6_s_32,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    &nh_10_10_10_1,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -1688,7 +1688,7 @@ fib_test_v4 (void)
                              &pfx_6_6_6_6_s_32,
                              FIB_SOURCE_API,
                              FIB_ENTRY_FLAG_NONE,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &nh_10_10_10_2,
                              tm->hw[0]->sw_if_index,
                              ~0, // invalid fib index
@@ -1770,7 +1770,7 @@ fib_test_v4 (void)
                              &pfx_6_6_6_6_s_32,
                              FIB_SOURCE_API,
                              FIB_ENTRY_FLAG_NONE,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &nh_12_12_12_12,
                              tm->hw[1]->sw_if_index,
                              ~0, // invalid fib index
@@ -1915,7 +1915,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
                                 &pfx_6_6_6_6_s_32,
                                 FIB_SOURCE_API,
-                                FIB_PROTOCOL_IP4,
+                                DPO_PROTO_IP4,
                                 &nh_12_12_12_12,
                                 tm->hw[1]->sw_if_index,
                                 ~0, // invalid fib index
@@ -1995,7 +1995,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
                                 &pfx_6_6_6_6_s_32,
                                 FIB_SOURCE_API,
-                                FIB_PROTOCOL_IP4,
+                                DPO_PROTO_IP4,
                                 &nh_10_10_10_2,
                                 tm->hw[0]->sw_if_index,
                                 ~0, // invalid fib index
@@ -2026,7 +2026,7 @@ fib_test_v4 (void)
                                    &bgp_44_s_32,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &pfx_1_2_3_4_s_32.fp_addr,
                                    ~0,
                                    fib_index,
@@ -2037,7 +2037,7 @@ fib_test_v4 (void)
                                    &bgp_44_s_32,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &pfx_1_2_3_5_s_32.fp_addr,
                                    ~0,
                                    fib_index,
@@ -2107,7 +2107,7 @@ fib_test_v4 (void)
                                    &bgp_201_pfx,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &pfx_1_1_1_200_s_32.fp_addr,
                                    ~0, // no index provided.
                                    fib_index, // nexthop in same fib as route
@@ -2151,7 +2151,7 @@ fib_test_v4 (void)
 			     &pfx_1_1_1_0_s_24,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -2209,7 +2209,7 @@ fib_test_v4 (void)
 			     &pfx_1_1_1_0_s_28,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_2,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -2244,7 +2244,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_0_s_28,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_2,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -2275,7 +2275,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_0_s_24,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -2316,7 +2316,7 @@ fib_test_v4 (void)
 	                           &pfx_1_1_1_2_s_32,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &nh_10_10_10_1,
 				   tm->hw[0]->sw_if_index,
 				   ~0, // invalid fib index
@@ -2351,7 +2351,7 @@ fib_test_v4 (void)
                                    &bgp_201_pfx,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &pfx_1_1_1_2_s_32.fp_addr,
                                    ~0,
                                    fib_index,
@@ -2362,7 +2362,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
                                 &bgp_201_pfx,
                                 FIB_SOURCE_API,
-                                FIB_PROTOCOL_IP4,
+                                DPO_PROTO_IP4,
                                 &pfx_1_1_1_2_s_32.fp_addr,
                                 ~0,
                                 fib_index,
@@ -2375,7 +2375,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&bgp_201_pfx,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_1_1_1_200_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index,
@@ -2405,7 +2405,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&bgp_200_pfx,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_1_1_1_2_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index,
@@ -2446,7 +2446,7 @@ fib_test_v4 (void)
 			     &bgp_102,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &pfx_1_1_1_1_s_32.fp_addr,
 			     ~0, // no index provided.
 			     fib_index, // same as route
@@ -2457,7 +2457,7 @@ fib_test_v4 (void)
 			     &bgp_102,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &pfx_1_1_1_2_s_32.fp_addr,
 			     ~0, // no index provided.
 			     fib_index, // same as route's FIB
@@ -2483,7 +2483,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&bgp_102,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_1_1_1_1_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2492,7 +2492,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&bgp_102,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_1_1_1_2_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2507,7 +2507,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&bgp_100_pfx,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_1_1_1_1_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2516,7 +2516,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&bgp_101_pfx,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_1_1_1_1_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2546,7 +2546,7 @@ fib_test_v4 (void)
 			     &bgp_200_pfx,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     ~0, // no index provided.
 			     fib_index, // Same as route's FIB
@@ -2593,7 +2593,7 @@ fib_test_v4 (void)
 			     &bgp_201_pfx,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_3,
 			     ~0, // no index provided.
 			     fib_index,
@@ -2639,7 +2639,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
     				&bgp_200_pfx,
     				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
     				&nh_10_10_10_1,
     				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2648,7 +2648,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
     				&bgp_201_pfx,
     				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
     				&nh_10_10_10_3,
     				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2707,7 +2707,7 @@ fib_test_v4 (void)
 			     &pfx_5_5_5_5_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &pfx_5_5_5_6_s_32.fp_addr,
 			     ~0, // no index provided.
 			     fib_index,
@@ -2718,7 +2718,7 @@ fib_test_v4 (void)
 			     &pfx_5_5_5_6_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &pfx_5_5_5_7_s_32.fp_addr,
 			     ~0, // no index provided.
 			     fib_index,
@@ -2729,7 +2729,7 @@ fib_test_v4 (void)
 			     &pfx_5_5_5_7_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &pfx_5_5_5_5_s_32.fp_addr,
 			     ~0, // no index provided.
 			     fib_index,
@@ -2768,7 +2768,7 @@ fib_test_v4 (void)
 			     &pfx_5_5_5_6_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0,
@@ -2801,7 +2801,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_5_5_5_6_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -2826,7 +2826,7 @@ fib_test_v4 (void)
 				    &pfx_5_5_5_5_s_32,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    &nh_10_10_10_1,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -2868,7 +2868,7 @@ fib_test_v4 (void)
 				    &pfx_5_5_5_5_s_32,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    &pfx_5_5_5_6_s_32.fp_addr,
 				    ~0, // no index provided.
 				    fib_index,
@@ -2892,7 +2892,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_5_5_5_5_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_5_5_5_6_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2901,7 +2901,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_5_5_5_6_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_5_5_5_7_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2910,7 +2910,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_5_5_5_7_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_5_5_5_5_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2919,7 +2919,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_5_5_5_6_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_2,
 				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2943,7 +2943,7 @@ fib_test_v4 (void)
 			     &pfx_5_5_5_6_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &pfx_5_5_5_6_s_32.fp_addr,
 			     ~0, // no index provided.
 			     fib_index,
@@ -2957,7 +2957,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_5_5_5_6_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_5_5_5_6_s_32.fp_addr,
 				~0, // no index provided.
 				fib_index, // same as route's FIB
@@ -2991,7 +2991,7 @@ fib_test_v4 (void)
 				   &pfx_23_23_23_0_s_24,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &pfx_23_23_23_23_s_32.fp_addr,
 				   ~0, // recursive
 				   fib_index,
@@ -3021,7 +3021,7 @@ fib_test_v4 (void)
 				   &pfx_0_0_0_0_s_0,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &pfx_23_23_23_23_s_32.fp_addr,
 				   ~0, // recursive
 				   fib_index,
@@ -3051,7 +3051,7 @@ fib_test_v4 (void)
 			     &bgp_200_pfx,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_1_1_1_1,
 			     ~0,
 			     fib_index,
@@ -3081,7 +3081,7 @@ fib_test_v4 (void)
 			     &pfx_1_1_1_0_s_28,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -3099,7 +3099,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_1_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0, // invalid fib index
@@ -3116,7 +3116,7 @@ fib_test_v4 (void)
 			     &pfx_1_1_1_1_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -3140,7 +3140,7 @@ fib_test_v4 (void)
 			     &pfx_1_1_1_3_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_2,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -3152,7 +3152,7 @@ fib_test_v4 (void)
 			     &bgp_200_pfx,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &pfx_1_1_1_3_s_32.fp_addr,
 			     ~0,
 			     fib_index,
@@ -3177,7 +3177,7 @@ fib_test_v4 (void)
                                  &bgp_78s[ii],
                                  FIB_SOURCE_API,
                                  FIB_ENTRY_FLAG_NONE,
-                                 FIB_PROTOCOL_IP4,
+                                 DPO_PROTO_IP4,
                                  &pfx_1_1_1_3_s_32.fp_addr,
                                  ~0,
                                  fib_index,
@@ -3188,7 +3188,7 @@ fib_test_v4 (void)
                                  &bgp_78s[ii],
                                  FIB_SOURCE_API,
                                  FIB_ENTRY_FLAG_NONE,
-                                 FIB_PROTOCOL_IP4,
+                                 DPO_PROTO_IP4,
                                  &nh_1_1_1_1,
                                  ~0,
                                  fib_index,
@@ -3238,7 +3238,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_1_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0, // invalid fib index
@@ -3277,7 +3277,7 @@ fib_test_v4 (void)
 			     &pfx_1_1_1_1_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -3307,7 +3307,7 @@ fib_test_v4 (void)
 			     &bgp_200_pfx,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &pfx_1_1_1_2_s_32.fp_addr,
 			     ~0,
 			     fib_index,
@@ -3320,7 +3320,7 @@ fib_test_v4 (void)
                                  &bgp_78s[ii],
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &pfx_1_1_1_2_s_32.fp_addr,
 			     ~0,
 			     fib_index,
@@ -3354,7 +3354,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_1_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -3391,7 +3391,7 @@ fib_test_v4 (void)
                              &pfx_1_1_1_1_s_32,
                              FIB_SOURCE_API,
                              FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
                              &nh_10_10_10_1,
                              tm->hw[0]->sw_if_index,
                              ~0,
@@ -3412,7 +3412,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
                                 &bgp_200_pfx,
                                 FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
                                 &pfx_1_1_1_2_s_32.fp_addr,
                                 ~0,
                                 fib_index,
@@ -3421,7 +3421,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&bgp_200_pfx,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_1_1_1_1,
 				~0,
 				fib_index,
@@ -3430,7 +3430,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&bgp_200_pfx,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&pfx_1_1_1_3_s_32.fp_addr,
 				~0,
 				fib_index,
@@ -3481,7 +3481,7 @@ fib_test_v4 (void)
 			     &pfx_4_4_4_4_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0,
@@ -3492,7 +3492,7 @@ fib_test_v4 (void)
 			     &pfx_4_4_4_4_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_2,
 			     tm->hw[0]->sw_if_index,
 			     ~0,
@@ -3503,7 +3503,7 @@ fib_test_v4 (void)
 			     &pfx_4_4_4_4_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_3,
 			     tm->hw[0]->sw_if_index,
 			     ~0,
@@ -3539,7 +3539,7 @@ fib_test_v4 (void)
     for (ii = 0; ii < 4; ii++)
     {
 	fib_route_path_t r_path = {
-	    .frp_proto = FIB_PROTOCOL_IP4,
+	    .frp_proto = DPO_PROTO_IP4,
 	    .frp_addr = {
 		.ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii),
 	    },
@@ -3588,7 +3588,7 @@ fib_test_v4 (void)
 			     &pfx_4_4_4_4_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &zero_addr,
 			     ~0,
 			     fib_index,
@@ -3648,7 +3648,7 @@ fib_test_v4 (void)
                                    &pfx_34_34_1_1_s_32,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &nh_10_10_10_1,
                                    tm->hw[0]->sw_if_index,
                                    0,
@@ -3659,7 +3659,7 @@ fib_test_v4 (void)
                                    &pfx_34_1_1_1_s_32,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &pfx_34_34_1_1_s_32.fp_addr,
                                    ~0,
                                    fib_index,
@@ -3670,7 +3670,7 @@ fib_test_v4 (void)
                                    &pfx_34_1_1_1_s_32,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &pfx_34_34_1_1_s_32.fp_addr,
                                    ~0,
                                    fib_index,
@@ -3691,7 +3691,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_2_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -3700,7 +3700,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_1_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -3709,7 +3709,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_2_0_s_24,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -3751,7 +3751,7 @@ fib_test_v4 (void)
 			     &pfx_4_1_1_1_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &zero_addr,
 			     tm->hw[0]->sw_if_index,
 			     fib_index,
@@ -3805,7 +3805,7 @@ fib_test_v4 (void)
 				   &pfx_2001_s_64,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &nh_10_10_10_1,
 				   tm->hw[0]->sw_if_index,
 				   fib_index,
@@ -3863,7 +3863,7 @@ fib_test_v4 (void)
                              &pfx_12_10_10_2_s_32,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &pfx_12_10_10_2_s_32.fp_addr,
                              tm->hw[0]->sw_if_index,
                              ~0, // invalid fib index
@@ -3897,7 +3897,7 @@ fib_test_v4 (void)
                              &pfx_10_10_10_127_s_32,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &pfx_10_10_10_127_s_32.fp_addr,
                              tm->hw[1]->sw_if_index,
                              ~0, // invalid fib index
@@ -3945,7 +3945,7 @@ fib_test_v4 (void)
                                    &pfx_10_10_10_3_s_32,
                                    FIB_SOURCE_ADJ,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &nh_10_10_10_3,
                                    tm->hw[0]->sw_if_index,
                                    fib_index,
@@ -3956,7 +3956,7 @@ fib_test_v4 (void)
                                    &pfx_10_10_10_3_s_32,
                                    FIB_SOURCE_ADJ,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &nh_12_12_12_12,
                                    tm->hw[1]->sw_if_index,
                                    fib_index,
@@ -3975,7 +3975,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
                                 &pfx_10_10_10_3_s_32,
                                 FIB_SOURCE_ADJ,
-                                FIB_PROTOCOL_IP4,
+                                DPO_PROTO_IP4,
                                 &nh_10_10_10_3,
                                 tm->hw[0]->sw_if_index,
                                 fib_index,
@@ -3992,7 +3992,7 @@ fib_test_v4 (void)
                                    &pfx_10_10_10_3_s_32,
                                    FIB_SOURCE_ADJ,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &nh_10_10_10_3,
                                    tm->hw[0]->sw_if_index,
                                    fib_index,
@@ -4011,7 +4011,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
                                 &pfx_10_10_10_3_s_32,
                                 FIB_SOURCE_ADJ,
-                                FIB_PROTOCOL_IP4,
+                                DPO_PROTO_IP4,
                                 &nh_12_12_12_12,
                                 tm->hw[1]->sw_if_index,
                                 fib_index,
@@ -4030,7 +4030,7 @@ fib_test_v4 (void)
     fib_table_entry_path_remove(fib_index,
                                 &pfx_10_10_10_3_s_32,
                                 FIB_SOURCE_ADJ,
-                                FIB_PROTOCOL_IP4,
+                                DPO_PROTO_IP4,
                                 &nh_10_10_10_3,
                                 tm->hw[0]->sw_if_index,
                                 fib_index,
@@ -4269,7 +4269,7 @@ fib_test_v6 (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_ATTACHED),
-				    FIB_PROTOCOL_IP6,
+				    DPO_PROTO_IP6,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0,
@@ -4300,7 +4300,7 @@ fib_test_v6 (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_LOCAL),
-				    FIB_PROTOCOL_IP6,
+				    DPO_PROTO_IP6,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -4345,7 +4345,7 @@ fib_test_v6 (void)
     fib_table_entry_path_add(fib_index, &pfx_0_0,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP6,
+			     DPO_PROTO_IP6,
 			     &nh_2001_2,
 			     tm->hw[0]->sw_if_index,
 			     ~0,
@@ -4389,7 +4389,7 @@ fib_test_v6 (void)
      */
     fib_table_entry_path_remove(fib_index, &pfx_0_0,
 				FIB_SOURCE_API,	
-				FIB_PROTOCOL_IP6,
+				DPO_PROTO_IP6,
 				&nh_2001_2,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -4466,7 +4466,7 @@ fib_test_v6 (void)
                              &pfx_2001_1_2_s_128,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP6,
+                             DPO_PROTO_IP6,
                              &pfx_2001_1_2_s_128.fp_addr,
                              tm->hw[0]->sw_if_index,
                              ~0,
@@ -4505,7 +4505,7 @@ fib_test_v6 (void)
                              &pfx_2001_1_3_s_128,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP6,
+                             DPO_PROTO_IP6,
                              &pfx_2001_1_3_s_128.fp_addr,
                              tm->hw[0]->sw_if_index,
                              ~0,
@@ -4559,7 +4559,7 @@ fib_test_v6 (void)
 			     &pfx_2001_a_s_64,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP6,
+			     DPO_PROTO_IP6,
 			     &nh_2001_2,
 			     tm->hw[0]->sw_if_index,
 			     ~0,
@@ -4573,7 +4573,7 @@ fib_test_v6 (void)
 			     &pfx_2001_b_s_64,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP6,
+			     DPO_PROTO_IP6,
 			     &nh_2001_2,
 			     tm->hw[0]->sw_if_index,
 			     ~0,
@@ -4608,7 +4608,7 @@ fib_test_v6 (void)
 				   &pfx_1_1_1_1_s_32,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP6,
+				   DPO_PROTO_IP6,
 				   &nh_2001_2,
 				   tm->hw[0]->sw_if_index,
 				   ~0,
@@ -4646,7 +4646,7 @@ fib_test_v6 (void)
 			     &pfx_2001_c_s_64,
 			     FIB_SOURCE_CLI,
 			     FIB_ENTRY_FLAG_ATTACHED,
-			     FIB_PROTOCOL_IP6,
+			     DPO_PROTO_IP6,
 			     NULL,
 			     tm->hw[0]->sw_if_index,
 			     ~0,
@@ -4663,7 +4663,7 @@ fib_test_v6 (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_2001_c_s_64,
 				FIB_SOURCE_CLI,
-				FIB_PROTOCOL_IP6,
+				DPO_PROTO_IP6,
 				NULL,
 				tm->hw[0]->sw_if_index,
 				~0,
@@ -4748,7 +4748,7 @@ fib_test_v6 (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_ATTACHED),
-				    FIB_PROTOCOL_IP6,
+				    DPO_PROTO_IP6,
 				    NULL,
 				    tm->hw[1]->sw_if_index,
 				    ~0,
@@ -4767,7 +4767,7 @@ fib_test_v6 (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_LOCAL),
-				    FIB_PROTOCOL_IP6,
+				    DPO_PROTO_IP6,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -5095,7 +5095,7 @@ fib_test_ae (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_ATTACHED),
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0,
@@ -5111,7 +5111,7 @@ fib_test_ae (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_LOCAL),
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -5140,7 +5140,7 @@ fib_test_ae (void)
                              &pfx_10_10_10_1_s_32,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &pfx_10_10_10_1_s_32.fp_addr,
                              tm->hw[0]->sw_if_index,
                              ~0, // invalid fib index
@@ -5167,7 +5167,7 @@ fib_test_ae (void)
 				    &local_pfx,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -5209,7 +5209,7 @@ fib_test_ae (void)
                              &pfx_10_10_10_2_s_32,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &pfx_10_10_10_2_s_32.fp_addr,
                              tm->hw[0]->sw_if_index,
                              ~0, // invalid fib index
@@ -5243,7 +5243,7 @@ fib_test_ae (void)
 				    &local_pfx,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -5280,7 +5280,7 @@ fib_test_ae (void)
                              &pfx_10_10_10_3_s_32,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &pfx_10_10_10_3_s_32.fp_addr,
                              tm->hw[0]->sw_if_index,
                              ~0, // invalid fib index
@@ -5352,7 +5352,7 @@ fib_test_ae (void)
 				    &local_pfx,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    &pfx_10_10_10_2_s_32.fp_addr,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -5375,7 +5375,7 @@ fib_test_ae (void)
 				    &local_pfx,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -5407,7 +5407,7 @@ fib_test_ae (void)
                                           &pfx_10_0_0_0_s_8,
                                           FIB_SOURCE_API,
                                           FIB_ENTRY_FLAG_NONE,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
                                           &pfx_10_10_10_3_s_32.fp_addr,
                                           tm->hw[0]->sw_if_index,
                                           ~0, // invalid fib index
@@ -5463,7 +5463,7 @@ fib_test_ae (void)
                                           &local_pfx,
                                           FIB_SOURCE_API,
                                           FIB_ENTRY_FLAG_NONE,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
                                           &pfx_10_10_10_1_s_32.fp_addr,
                                           tm->hw[0]->sw_if_index,
                                           ~0, // invalid fib index
@@ -5499,7 +5499,7 @@ fib_test_ae (void)
 				    &local_pfx,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -5538,7 +5538,7 @@ fib_test_ae (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_ATTACHED),
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0,
@@ -5626,7 +5626,7 @@ fib_test_pref (void)
      * 2 high, 2 medium and 2 low preference non-recursive paths
      */
     fib_route_path_t nr_path_hi_1 = {
-        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_proto = DPO_PROTO_IP4,
         .frp_sw_if_index = tm->hw[0]->sw_if_index,
         .frp_fib_index = ~0,
         .frp_weight = 1,
@@ -5637,7 +5637,7 @@ fib_test_pref (void)
         },
     };
     fib_route_path_t nr_path_hi_2 = {
-        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_proto = DPO_PROTO_IP4,
         .frp_sw_if_index = tm->hw[0]->sw_if_index,
         .frp_fib_index = ~0,
         .frp_weight = 1,
@@ -5648,7 +5648,7 @@ fib_test_pref (void)
         },
     };
     fib_route_path_t nr_path_med_1 = {
-        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_proto = DPO_PROTO_IP4,
         .frp_sw_if_index = tm->hw[1]->sw_if_index,
         .frp_fib_index = ~0,
         .frp_weight = 1,
@@ -5659,7 +5659,7 @@ fib_test_pref (void)
         },
     };
     fib_route_path_t nr_path_med_2 = {
-        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_proto = DPO_PROTO_IP4,
         .frp_sw_if_index = tm->hw[1]->sw_if_index,
         .frp_fib_index = ~0,
         .frp_weight = 1,
@@ -5670,7 +5670,7 @@ fib_test_pref (void)
         },
     };
     fib_route_path_t nr_path_low_1 = {
-        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_proto = DPO_PROTO_IP4,
         .frp_sw_if_index = tm->hw[2]->sw_if_index,
         .frp_fib_index = ~0,
         .frp_weight = 1,
@@ -5681,7 +5681,7 @@ fib_test_pref (void)
         },
     };
     fib_route_path_t nr_path_low_2 = {
-        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_proto = DPO_PROTO_IP4,
         .frp_sw_if_index = tm->hw[2]->sw_if_index,
         .frp_fib_index = ~0,
         .frp_weight = 1,
@@ -5897,7 +5897,7 @@ fib_test_pref (void)
         },
     };
     fib_route_path_t r_path_hi = {
-        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_proto = DPO_PROTO_IP4,
         .frp_sw_if_index = ~0,
         .frp_fib_index = 0,
         .frp_weight = 1,
@@ -5906,7 +5906,7 @@ fib_test_pref (void)
         .frp_addr = pfx_1_1_1_1_s_32.fp_addr,
     };
     fib_route_path_t r_path_med = {
-        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_proto = DPO_PROTO_IP4,
         .frp_sw_if_index = ~0,
         .frp_fib_index = 0,
         .frp_weight = 1,
@@ -5915,7 +5915,7 @@ fib_test_pref (void)
         .frp_addr = pfx_1_1_1_2_s_32.fp_addr,
     };
     fib_route_path_t r_path_low = {
-        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_proto = DPO_PROTO_IP4,
         .frp_sw_if_index = ~0,
         .frp_fib_index = 0,
         .frp_weight = 1,
@@ -6099,7 +6099,7 @@ fib_test_label (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_ATTACHED),
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0,
@@ -6115,7 +6115,7 @@ fib_test_label (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_LOCAL),
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -6145,7 +6145,7 @@ fib_test_label (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_ATTACHED),
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[1]->sw_if_index,
 				    ~0,
@@ -6161,7 +6161,7 @@ fib_test_label (void)
 				    FIB_SOURCE_INTERFACE,
 				    (FIB_ENTRY_FLAG_CONNECTED |
 				     FIB_ENTRY_FLAG_LOCAL),
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    NULL,
 				    tm->hw[1]->sw_if_index,
 				    ~0, // invalid fib index
@@ -6243,7 +6243,7 @@ fib_test_label (void)
 				    &pfx_1_1_1_1_s_32,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    &nh_10_10_10_1,
 				    tm->hw[0]->sw_if_index,
 				    ~0, // invalid fib index
@@ -6282,7 +6282,7 @@ fib_test_label (void)
 				   &pfx_1_1_1_1_s_32,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &nh_10_10_11_1,
 				   tm->hw[1]->sw_if_index,
 				   ~0, // invalid fib index
@@ -6360,7 +6360,7 @@ fib_test_label (void)
 				   &pfx_1_1_1_1_s_32,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &nh_10_10_11_2,
 				   tm->hw[1]->sw_if_index,
 				   ~0, // invalid fib index
@@ -6440,7 +6440,7 @@ fib_test_label (void)
 				    &pfx_2_2_2_2_s_32,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    &pfx_1_1_1_1_s_32.fp_addr,
 				    ~0,
 				    fib_index,
@@ -6612,7 +6612,7 @@ fib_test_label (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_1_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_10_1,
 				tm->hw[0]->sw_if_index,
 				~0, // invalid fib index
@@ -6669,7 +6669,7 @@ fib_test_label (void)
     fib_table_entry_path_remove(fib_index,
 				&pfx_1_1_1_1_s_32,
 				FIB_SOURCE_API,
-				FIB_PROTOCOL_IP4,
+				DPO_PROTO_IP4,
 				&nh_10_10_11_1,
 				tm->hw[1]->sw_if_index,
 				~0, // invalid fib index
@@ -6711,7 +6711,7 @@ fib_test_label (void)
 			     &pfx_1_1_1_1_s_32,
 			     FIB_SOURCE_API,
 			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
+			     DPO_PROTO_IP4,
 			     &nh_10_10_10_1,
 			     tm->hw[0]->sw_if_index,
 			     ~0, // invalid fib index
@@ -6842,7 +6842,7 @@ fib_test_label (void)
 					  &pfx_1_1_1_2_s_32,
 					  FIB_SOURCE_API,
 					  FIB_ENTRY_FLAG_NONE,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
 					  &nh_10_10_10_1,
 					  tm->hw[0]->sw_if_index,
 					  ~0, // invalid fib index
@@ -6884,7 +6884,7 @@ fib_test_label (void)
 				   &pfx_2_2_2_2_s_32,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &pfx_1_1_1_2_s_32.fp_addr,
 				   ~0,
 				   fib_index,
@@ -6912,7 +6912,7 @@ fib_test_label (void)
 					  &pfx_1_1_1_2_s_32,
 					  FIB_SOURCE_API,
 					  FIB_ENTRY_FLAG_NONE,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
 					  &nh_10_10_11_1,
 					  tm->hw[1]->sw_if_index,
 					  ~0, // invalid fib index
@@ -6945,7 +6945,7 @@ fib_test_label (void)
 					  &pfx_1_1_1_2_s_32,
 					  FIB_SOURCE_API,
 					  FIB_ENTRY_FLAG_NONE,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
 					  &nh_10_10_11_1,
 					  tm->hw[1]->sw_if_index,
 					  ~0, // invalid fib index
@@ -6987,7 +6987,7 @@ fib_test_label (void)
 				    &pfx_2_2_2_3_s_32,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    &pfx_1_1_1_1_s_32.fp_addr,
 				    ~0,
 				    fib_index,
@@ -7031,7 +7031,7 @@ fib_test_label (void)
 				    &pfx_2_2_2_4_s_32,
 				    FIB_SOURCE_API,
 				    FIB_ENTRY_FLAG_NONE,
-				    FIB_PROTOCOL_IP4,
+				    DPO_PROTO_IP4,
 				    &pfx_1_1_1_1_s_32.fp_addr,
 				    ~0,
 				    fib_index,
@@ -7081,7 +7081,7 @@ fib_test_label (void)
 					  &pfx_2_2_5_5_s_32,
 					  FIB_SOURCE_API,
 					  FIB_ENTRY_FLAG_NONE,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
 					  &nh_10_10_11_1,
 					  tm->hw[1]->sw_if_index,
 					  ~0, // invalid fib index
@@ -7689,7 +7689,7 @@ fib_test_bfd (void)
                                     FIB_SOURCE_INTERFACE,
                                     (FIB_ENTRY_FLAG_CONNECTED |
                                      FIB_ENTRY_FLAG_ATTACHED),
-                                    FIB_PROTOCOL_IP4,
+                                    DPO_PROTO_IP4,
                                     NULL,
                                     tm->hw[0]->sw_if_index,
                                     ~0, // invalid fib index
@@ -7706,7 +7706,7 @@ fib_test_bfd (void)
                                     FIB_SOURCE_INTERFACE,
                                     (FIB_ENTRY_FLAG_CONNECTED |
                                      FIB_ENTRY_FLAG_LOCAL),
-                                    FIB_PROTOCOL_IP4,
+                                    DPO_PROTO_IP4,
                                     NULL,
                                     tm->hw[0]->sw_if_index,
                                     ~0, // invalid fib index
@@ -7780,7 +7780,7 @@ fib_test_bfd (void)
                                    &pfx_10_10_10_1_s_32,
                                    FIB_SOURCE_ADJ,
                                    FIB_ENTRY_FLAG_ATTACHED,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &nh_10_10_10_1,
                                    tm->hw[0]->sw_if_index,
                                    ~0, // invalid fib index
@@ -7819,7 +7819,7 @@ fib_test_bfd (void)
                              &pfx_10_10_10_2_s_32,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &nh_10_10_10_2,
                              tm->hw[0]->sw_if_index,
                              ~0, // invalid fib index
@@ -7851,7 +7851,7 @@ fib_test_bfd (void)
                              &pfx_10_10_10_2_s_32,
                              FIB_SOURCE_ADJ,
                              FIB_ENTRY_FLAG_ATTACHED,
-                             FIB_PROTOCOL_IP4,
+                             DPO_PROTO_IP4,
                              &nh_10_10_10_2,
                              tm->hw[0]->sw_if_index,
                              ~0, // invalid fib index
@@ -7907,7 +7907,7 @@ fib_test_bfd (void)
                                    &pfx_200_0_0_0_s_24,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &nh_10_10_10_2,
                                    ~0, // recursive
                                    0, // default fib index
@@ -7926,7 +7926,7 @@ fib_test_bfd (void)
                                    &pfx_200_0_0_0_s_24,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &nh_10_10_10_1,
                                    ~0, // recursive
                                    0, // default fib index
@@ -8065,7 +8065,7 @@ fib_test_bfd (void)
                                    &pfx_5_5_5_5_s_32,
                                    FIB_SOURCE_CLI,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &nh_10_10_10_1,
                                    tm->hw[0]->sw_if_index,
                                    ~0, // invalid fib index
@@ -8096,7 +8096,7 @@ fib_test_bfd (void)
                                    &pfx_5_5_5_5_s_32,
                                    FIB_SOURCE_CLI,
                                    FIB_ENTRY_FLAG_NONE,
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &nh_10_10_10_2,
                                    tm->hw[0]->sw_if_index,
                                    ~0, // invalid fib index
@@ -8234,7 +8234,7 @@ lfib_test (void)
 				   &pfx,
 				   FIB_SOURCE_CLI,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &zero_addr,
 				   ~0,
 				   fib_index,
@@ -8285,7 +8285,7 @@ lfib_test (void)
 				   &pfx,
 				   FIB_SOURCE_CLI,
 				   FIB_ENTRY_FLAG_NONE,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
 				   &zero_addr,
 				   ~0,
 				   lfib_index,
@@ -8363,7 +8363,7 @@ lfib_test (void)
 					  &pfx_1200,
 					  FIB_SOURCE_API,
 					  FIB_ENTRY_FLAG_NONE,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
 					  &nh_10_10_10_1,
 					  tm->hw[0]->sw_if_index,
 					  ~0, // invalid fib index
@@ -8389,7 +8389,7 @@ lfib_test (void)
     	},
     };
     fib_route_path_t *rpaths = NULL, rpath = {
-    	.frp_proto = FIB_PROTOCOL_MPLS,
+        .frp_proto = DPO_PROTO_MPLS,
     	.frp_local_label = 1200,
         .frp_eos = MPLS_NON_EOS,
     	.frp_sw_if_index = ~0, // recurive
@@ -8545,7 +8545,7 @@ lfib_test (void)
 					  &pfx_2500,
 					  FIB_SOURCE_API,
 					  FIB_ENTRY_FLAG_NONE,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
 					  NULL,
 					  tm->hw[0]->sw_if_index,
 					  ~0, // invalid fib index
@@ -8590,7 +8590,7 @@ lfib_test (void)
 					  &pfx_3500,
 					  FIB_SOURCE_API,
 					  FIB_ENTRY_FLAG_MULTICAST,
-					  FIB_PROTOCOL_IP4,
+					  DPO_PROTO_IP4,
 					  &nh_10_10_10_1,
 					  tm->hw[0]->sw_if_index,
 					  ~0, // invalid fib index
@@ -8610,7 +8610,7 @@ lfib_test (void)
 				   &pfx_3500,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_MULTICAST,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
                                    NULL,
                                    tm->hw[0]->sw_if_index,
                                    ~0, // invalid fib index
@@ -8637,7 +8637,7 @@ lfib_test (void)
 				   &pfx_3500,
 				   FIB_SOURCE_API,
 				   FIB_ENTRY_FLAG_MULTICAST,
-				   FIB_PROTOCOL_IP4,
+				   DPO_PROTO_IP4,
                                    NULL,
                                    5, // rpf-id
                                    0, // default table
diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h
index a209ff3c..f11a55da 100644
--- a/src/vnet/fib/fib_types.h
+++ b/src/vnet/fib/fib_types.h
@@ -32,9 +32,9 @@ typedef u32 fib_node_index_t;
  * Protocol Type. packed so it consumes a u8 only
  */
 typedef enum fib_protocol_t_ {
-    FIB_PROTOCOL_IP4 = 0,
-    FIB_PROTOCOL_IP6,
-    FIB_PROTOCOL_MPLS,
+    FIB_PROTOCOL_IP4 = DPO_PROTO_IP4,
+    FIB_PROTOCOL_IP6 = DPO_PROTO_IP6,
+    FIB_PROTOCOL_MPLS = DPO_PROTO_MPLS,
 }  __attribute__ ((packed)) fib_protocol_t;
 
 #define FIB_PROTOCOLS {			\
@@ -338,7 +338,7 @@ typedef struct fib_route_path_t_ {
      * The protocol of the address below. We need this since the all
      * zeros address is ambiguous.
      */
-    fib_protocol_t frp_proto;
+    dpo_proto_t frp_proto;
 
     union {
 	/**
diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c
index df7e9388..5694bb2f 100644
--- a/src/vnet/interface_format.c
+++ b/src/vnet/interface_format.c
@@ -165,9 +165,15 @@ format_vnet_sw_if_index_name (u8 * s, va_list * args)
 {
   vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
   u32 sw_if_index = va_arg (*args, u32);
-  return format (s, "%U",
-		 format_vnet_sw_interface_name, vnm,
-		 vnet_get_sw_interface (vnm, sw_if_index));
+  vnet_sw_interface_t *si;
+
+  si = vnet_get_sw_interface_safe (vnm, sw_if_index);
+
+  if (NULL == si)
+    {
+      return format (s, "DELETED");
+    }
+  return format (s, "%U", format_vnet_sw_interface_name, vnm, si);
 }
 
 u8 *
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index ee17ea88..7a8d7a0c 100755
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -724,7 +724,7 @@ ip4_add_interface_routes (u32 sw_if_index,
                                        FIB_SOURCE_INTERFACE,
                                        (FIB_ENTRY_FLAG_CONNECTED |
                                         FIB_ENTRY_FLAG_ATTACHED),
-                                       FIB_PROTOCOL_IP4,
+                                       DPO_PROTO_IP4,
                                        /* No next-hop address */
                                        NULL,
                                        sw_if_index,
@@ -767,7 +767,7 @@ ip4_add_interface_routes (u32 sw_if_index,
       fib_table_entry_update_one_path (fib_index, &net_pfx,
                                        FIB_SOURCE_INTERFACE,
                                        (FIB_ENTRY_FLAG_ATTACHED),
-                                       FIB_PROTOCOL_IP4,
+                                       DPO_PROTO_IP4,
                                        &net_pfx.fp_addr,
                                        sw_if_index,
                                        // invalid FIB index
@@ -803,7 +803,7 @@ ip4_add_interface_routes (u32 sw_if_index,
                                    FIB_SOURCE_INTERFACE,
                                    (FIB_ENTRY_FLAG_CONNECTED |
                                     FIB_ENTRY_FLAG_LOCAL),
-                                   FIB_PROTOCOL_IP4,
+                                   DPO_PROTO_IP4,
                                    &pfx.fp_addr,
                                    sw_if_index,
                                    // invalid FIB index
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index bc66416e..8ae08a01 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -355,7 +355,7 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
 				       FIB_SOURCE_INTERFACE,
 				       (FIB_ENTRY_FLAG_CONNECTED |
 					FIB_ENTRY_FLAG_ATTACHED),
-				       FIB_PROTOCOL_IP6,
+				       DPO_PROTO_IP6,
 				       /* No next-hop address */
 				       NULL, sw_if_index,
 				       /* invalid FIB index */
@@ -390,7 +390,7 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
 				   FIB_SOURCE_INTERFACE,
 				   (FIB_ENTRY_FLAG_CONNECTED |
 				    FIB_ENTRY_FLAG_LOCAL),
-				   FIB_PROTOCOL_IP6,
+				   DPO_PROTO_IP6,
 				   &pfx.fp_addr,
 				   sw_if_index, ~0,
 				   1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index e8eebd4e..6a9139ab 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -284,7 +284,7 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm,
 		(ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index),
 		 &pfx,
 		 FIB_SOURCE_ADJ,
-		 FIB_PROTOCOL_IP6,
+		 DPO_PROTO_IP6,
 		 &pfx.fp_addr,
 		 n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
 	      pool_put (nm->neighbor_pool, n);
@@ -645,7 +645,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
 	  n->fib_entry_index =
 	    fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
 				      FIB_ENTRY_FLAG_ATTACHED,
-				      FIB_PROTOCOL_IP6, &pfx.fp_addr,
+				      DPO_PROTO_IP6, &pfx.fp_addr,
 				      n->key.sw_if_index, ~0, 1, NULL,
 				      FIB_ROUTE_PATH_FLAG_NONE);
 	}
@@ -776,7 +776,7 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
 	(ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index),
 	 &pfx,
 	 FIB_SOURCE_ADJ,
-	 FIB_PROTOCOL_IP6,
+	 DPO_PROTO_IP6,
 	 &pfx.fp_addr, n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
     }
   pool_put (nm->neighbor_pool, n);
@@ -4110,7 +4110,7 @@ ip6_neighbor_proxy_add_del (u32 sw_if_index, ip6_address_t * addr, u8 is_del)
       fib_table_entry_path_remove (fib_index,
 				   &pfx,
 				   FIB_SOURCE_IP6_ND_PROXY,
-				   FIB_PROTOCOL_IP6,
+				   DPO_PROTO_IP6,
 				   &nh,
 				   sw_if_index,
 				   ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
@@ -4124,7 +4124,7 @@ ip6_neighbor_proxy_add_del (u32 sw_if_index, ip6_address_t * addr, u8 is_del)
 				&pfx,
 				FIB_SOURCE_IP6_ND_PROXY,
 				FIB_ENTRY_FLAG_NONE,
-				FIB_PROTOCOL_IP6,
+				DPO_PROTO_IP6,
 				&nh,
 				sw_if_index,
 				~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index 4cbf75a3..0676a387 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -156,9 +156,9 @@ copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg)
   int is_ip4;
   vl_api_fib_path_t *fp = (vl_api_fib_path_t *) fp_arg;
 
-  if (api_rpath->rpath.frp_proto == FIB_PROTOCOL_IP4)
+  if (api_rpath->rpath.frp_proto == DPO_PROTO_IP4)
     fp->afi = IP46_TYPE_IP4;
-  else if (api_rpath->rpath.frp_proto == FIB_PROTOCOL_IP6)
+  else if (api_rpath->rpath.frp_proto == DPO_PROTO_IP6)
     fp->afi = IP46_TYPE_IP6;
   else
     {
@@ -714,7 +714,7 @@ add_del_route_t_handler (u8 is_multipath,
 			 u8 is_rpf_id,
 			 u32 fib_index,
 			 const fib_prefix_t * prefix,
-			 u8 next_hop_proto_is_ip4,
+			 dpo_proto_t next_hop_proto,
 			 const ip46_address_t * next_hop,
 			 u32 next_hop_sw_if_index,
 			 u8 next_hop_fib_index,
@@ -726,8 +726,7 @@ add_del_route_t_handler (u8 is_multipath,
   vnet_classify_main_t *cm = &vnet_classify_main;
   fib_route_path_flags_t path_flags = FIB_ROUTE_PATH_FLAG_NONE;
   fib_route_path_t path = {
-    .frp_proto = (next_hop_proto_is_ip4 ?
-		  FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6),
+    .frp_proto = next_hop_proto,
     .frp_addr = (NULL == next_hop ? zero_addr : *next_hop),
     .frp_sw_if_index = next_hop_sw_if_index,
     .frp_fib_index = next_hop_fib_index,
@@ -740,7 +739,7 @@ add_del_route_t_handler (u8 is_multipath,
 
   if (MPLS_LABEL_INVALID != next_hop_via_label)
     {
-      path.frp_proto = FIB_PROTOCOL_MPLS;
+      path.frp_proto = DPO_PROTO_MPLS;
       path.frp_local_label = next_hop_via_label;
       path.frp_eos = MPLS_NON_EOS;
     }
@@ -855,7 +854,7 @@ int
 add_del_route_check (fib_protocol_t table_proto,
 		     u32 table_id,
 		     u32 next_hop_sw_if_index,
-		     fib_protocol_t next_hop_table_proto,
+		     dpo_proto_t next_hop_table_proto,
 		     u32 next_hop_table_id,
 		     u8 create_missing_tables,
 		     u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index)
@@ -887,11 +886,18 @@ add_del_route_check (fib_protocol_t table_proto,
     }
   else
     {
+      fib_protocol_t fib_nh_proto;
+
+      if (next_hop_table_proto > DPO_PROTO_MPLS)
+	return (0);
+
+      fib_nh_proto = dpo_proto_to_fib (next_hop_table_proto);
+
       if (is_rpf_id)
-	*next_hop_fib_index = mfib_table_find (next_hop_table_proto,
+	*next_hop_fib_index = mfib_table_find (fib_nh_proto,
 					       ntohl (next_hop_table_id));
       else
-	*next_hop_fib_index = fib_table_find (next_hop_table_proto,
+	*next_hop_fib_index = fib_table_find (fib_nh_proto,
 					      ntohl (next_hop_table_id));
 
       if (~0 == *next_hop_fib_index)
@@ -900,12 +906,12 @@ add_del_route_check (fib_protocol_t table_proto,
 	    {
 	      if (is_rpf_id)
 		*next_hop_fib_index =
-		  mfib_table_find_or_create_and_lock (next_hop_table_proto,
+		  mfib_table_find_or_create_and_lock (fib_nh_proto,
 						      ntohl
 						      (next_hop_table_id));
 	      else
 		*next_hop_fib_index =
-		  fib_table_find_or_create_and_lock (next_hop_table_proto,
+		  fib_table_find_or_create_and_lock (fib_nh_proto,
 						     ntohl
 						     (next_hop_table_id));
 	    }
@@ -930,7 +936,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
   rv = add_del_route_check (FIB_PROTOCOL_IP4,
 			    mp->table_id,
 			    mp->next_hop_sw_if_index,
-			    FIB_PROTOCOL_IP4,
+			    DPO_PROTO_IP4,
 			    mp->next_hop_table_id,
 			    mp->create_vrf_if_needed, 0,
 			    &fib_index, &next_hop_fib_index);
@@ -970,7 +976,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 				   mp->classify_table_index,
 				   mp->is_resolve_host,
 				   mp->is_resolve_attached, 0, 0,
-				   fib_index, &pfx, 1,
+				   fib_index, &pfx, DPO_PROTO_IP4,
 				   &nh,
 				   ntohl (mp->next_hop_sw_if_index),
 				   next_hop_fib_index,
@@ -990,7 +996,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
   rv = add_del_route_check (FIB_PROTOCOL_IP6,
 			    mp->table_id,
 			    mp->next_hop_sw_if_index,
-			    FIB_PROTOCOL_IP6,
+			    DPO_PROTO_IP6,
 			    mp->next_hop_table_id,
 			    mp->create_vrf_if_needed, 0,
 			    &fib_index, &next_hop_fib_index);
@@ -1030,7 +1036,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 				   mp->classify_table_index,
 				   mp->is_resolve_host,
 				   mp->is_resolve_attached, 0, 0,
-				   fib_index, &pfx, 0,
+				   fib_index, &pfx, DPO_PROTO_IP6,
 				   &nh, ntohl (mp->next_hop_sw_if_index),
 				   next_hop_fib_index,
 				   mp->next_hop_weight,
@@ -1106,7 +1112,7 @@ mroute_add_del_handler (u8 is_add,
 
   fib_route_path_t path = {
     .frp_sw_if_index = next_hop_sw_if_index,
-    .frp_proto = prefix->fp_proto,
+    .frp_proto = fib_proto_to_dpo (prefix->fp_proto),
   };
 
   if (is_local)
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index 533d010a..41e46070 100755
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -423,7 +423,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
 	{
 	  rpath.frp_weight = 1;
 	  rpath.frp_eos = MPLS_NON_EOS;
-	  rpath.frp_proto = FIB_PROTOCOL_MPLS;
+	  rpath.frp_proto = DPO_PROTO_MPLS;
 	  rpath.frp_sw_if_index = ~0;
 	  vec_add1 (rpaths, rpath);
 	}
@@ -449,7 +449,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
 			 &rpath.frp_sw_if_index))
 	{
 	  rpath.frp_weight = 1;
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
 	  vec_add1 (rpaths, rpath);
 	}
 
@@ -460,7 +460,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
 			 &rpath.frp_sw_if_index))
 	{
 	  rpath.frp_weight = 1;
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
+	  rpath.frp_proto = DPO_PROTO_IP6;
 	  vec_add1 (rpaths, rpath);
 	}
       else if (unformat (line_input, "weight %u", &weight))
@@ -479,7 +479,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
 	{
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
 	  vec_add1 (rpaths, rpath);
 	}
       else if (unformat (line_input, "via %U next-hop-table %d",
@@ -488,7 +488,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
 	{
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
+	  rpath.frp_proto = DPO_PROTO_IP6;
 	  vec_add1 (rpaths, rpath);
 	}
       else if (unformat (line_input, "via %U",
@@ -501,7 +501,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
 	  rpath.frp_fib_index = table_id;
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
 	  vec_add1 (rpaths, rpath);
 	}
       else if (unformat (line_input, "via %U",
@@ -510,13 +510,13 @@ vnet_ip_route_cmd (vlib_main_t * vm,
 	  rpath.frp_fib_index = table_id;
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
+	  rpath.frp_proto = DPO_PROTO_IP6;
 	  vec_add1 (rpaths, rpath);
 	}
       else if (unformat (line_input,
 			 "lookup in table %d", &rpath.frp_fib_index))
 	{
-	  rpath.frp_proto = pfx.fp_proto;
+	  rpath.frp_proto = fib_proto_to_dpo (pfx.fp_proto);
 	  rpath.frp_sw_if_index = ~0;
 	  vec_add1 (rpaths, rpath);
 	}
@@ -526,7 +526,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
 			 &rpath.frp_sw_if_index))
 	{
 	  rpath.frp_weight = 1;
-	  rpath.frp_proto = prefixs[0].fp_proto;
+	  rpath.frp_proto = fib_proto_to_dpo (prefixs[0].fp_proto);
 	  vec_add1 (rpaths, rpath);
 	}
       else if (vec_len (prefixs) > 0 &&
diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c
index 0acc7349..018895ad 100644
--- a/src/vnet/lisp-gpe/lisp_gpe.c
+++ b/src/vnet/lisp-gpe/lisp_gpe.c
@@ -454,7 +454,7 @@ vnet_gpe_add_del_native_fwd_rpath (vnet_gpe_native_fwd_rpath_args_t * a)
   fib_route_path_t *rpath;
   u8 ip_version;
 
-  ip_version = a->rpath.frp_proto == FIB_PROTOCOL_IP4 ? IP4 : IP6;
+  ip_version = a->rpath.frp_proto == DPO_PROTO_IP4 ? IP4 : IP6;
 
   if (a->is_add)
     {
@@ -511,7 +511,7 @@ gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input,
 			 &rpath.frp_sw_if_index))
 	{
 	  rpath.frp_weight = 1;
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
 	}
       else if (unformat (line_input, "via %U %U",
 			 unformat_ip6_address,
@@ -520,21 +520,21 @@ gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input,
 			 &rpath.frp_sw_if_index))
 	{
 	  rpath.frp_weight = 1;
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
+	  rpath.frp_proto = DPO_PROTO_IP6;
 	}
       else if (unformat (line_input, "via %U",
 			 unformat_ip4_address, &rpath.frp_addr.ip4))
 	{
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
 	}
       else if (unformat (line_input, "via %U",
 			 unformat_ip6_address, &rpath.frp_addr.ip6))
 	{
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
+	  rpath.frp_proto = DPO_PROTO_IP6;
 	}
       else
 	{
@@ -549,7 +549,8 @@ gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input,
     }
   else
     {
-      rpath.frp_fib_index = fib_table_find (rpath.frp_proto, table_id);
+      rpath.frp_fib_index =
+	fib_table_find (dpo_proto_to_fib (rpath.frp_proto), table_id);
       if ((u32) ~ 0 == rpath.frp_fib_index)
 	{
 	  error = clib_error_return (0, "Nonexistent table id %d", table_id);
diff --git a/src/vnet/lisp-gpe/lisp_gpe_api.c b/src/vnet/lisp-gpe/lisp_gpe_api.c
index f1663699..4367a719 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_api.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_api.c
@@ -455,10 +455,10 @@ static void
     clib_memcpy (&a->rpath.frp_addr.ip6, mp->nh_addr, sizeof (ip6_address_t));
 
   a->is_add = mp->is_add;
-  a->rpath.frp_proto = mp->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
-  a->rpath.frp_fib_index = fib_table_find (a->rpath.frp_proto,
-					   clib_net_to_host_u32
-					   (mp->table_id));
+  a->rpath.frp_proto = mp->is_ip4 ? DPO_PROTO_IP4 : DPO_PROTO_IP6;
+  a->rpath.frp_fib_index =
+    fib_table_find (dpo_proto_to_fib (a->rpath.frp_proto),
+		    clib_net_to_host_u32 (mp->table_id));
   if (~0 == a->rpath.frp_fib_index)
     {
       rv = VNET_API_ERROR_INVALID_VALUE;
@@ -484,7 +484,7 @@ gpe_native_fwd_rpaths_copy (vl_api_gpe_native_fwd_rpath_t * dst,
   vec_foreach (e, src)
   {
     memset (&dst[i], 0, sizeof (*dst));
-    table = fib_table_get (e->frp_fib_index, e->frp_proto);
+    table = fib_table_get (e->frp_fib_index, dpo_proto_to_fib (e->frp_proto));
     dst[i].fib_index = table->ft_table_id;
     dst[i].nh_sw_if_index = e->frp_sw_if_index;
     dst[i].is_ip4 = is_ip4;
diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
index 395b493a..ac048149 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
@@ -225,6 +225,7 @@ lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths)
 {
   const lisp_gpe_adjacency_t *ladj;
   fib_route_path_t *rpaths = NULL;
+  fib_protocol_t fp;
   u8 best_priority;
   u32 ii;
 
@@ -239,9 +240,9 @@ lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths)
 
     ladj = lisp_gpe_adjacency_get (paths[ii].lisp_adj);
 
-    ip_address_to_46 (&ladj->remote_rloc,
-		      &rpaths[ii].frp_addr, &rpaths[ii].frp_proto);
+    ip_address_to_46 (&ladj->remote_rloc, &rpaths[ii].frp_addr, &fp);
 
+    rpaths[ii].frp_proto = fib_proto_to_dpo (fp);
     rpaths[ii].frp_sw_if_index = ladj->sw_if_index;
     rpaths[ii].frp_weight = (paths[ii].weight ? paths[ii].weight : 1);
   }
diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c
index 5c6f8126..5e48e919 100644
--- a/src/vnet/mfib/ip6_mfib.c
+++ b/src/vnet/mfib/ip6_mfib.c
@@ -158,7 +158,7 @@ ip6_create_mfib_with_table_id (u32 table_id)
         .fp_proto = FIB_PROTOCOL_IP6,
     };
     const fib_route_path_t path_for_us = {
-        .frp_proto = FIB_PROTOCOL_IP6,
+        .frp_proto = DPO_PROTO_IP6,
         .frp_addr = zero_addr,
         .frp_sw_if_index = 0xffffffff,
         .frp_fib_index = ~0,
@@ -222,7 +222,7 @@ ip6_mfib_table_destroy (ip6_mfib_t *mfib)
         .fp_proto = FIB_PROTOCOL_IP6,
     };
     const fib_route_path_t path_for_us = {
-        .frp_proto = FIB_PROTOCOL_IP6,
+        .frp_proto = DPO_PROTO_IP6,
         .frp_addr = zero_addr,
         .frp_sw_if_index = 0xffffffff,
         .frp_fib_index = ~0,
@@ -259,7 +259,7 @@ void
 ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable)
 {
     const fib_route_path_t path = {
-        .frp_proto = FIB_PROTOCOL_IP6,
+        .frp_proto = DPO_PROTO_IP6,
         .frp_addr = zero_addr,
         .frp_sw_if_index = sw_if_index,
         .frp_fib_index = ~0,
diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c
index cf25b67a..b37f8825 100644
--- a/src/vnet/mfib/mfib_entry.c
+++ b/src/vnet/mfib/mfib_entry.c
@@ -764,18 +764,16 @@ mfib_entry_update (fib_node_index_t mfib_entry_index,
          * entry
          */
         fib_node_index_t old_pl_index;
-        fib_protocol_t fp;
+        dpo_proto_t dp;
         dpo_id_t dpo = DPO_INVALID;
 
-        fp = mfib_entry_get_proto(mfib_entry);
+        dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry));
         old_pl_index = msrc->mfes_pl;
 
-        dpo_set(&dpo, DPO_REPLICATE,
-                fib_proto_to_dpo(fp),
-                repi);
+        dpo_set(&dpo, DPO_REPLICATE, dp, repi);
 
         msrc->mfes_pl =
-            fib_path_list_create_special(fp,
+            fib_path_list_create_special(dp,
                                          FIB_PATH_LIST_FLAG_EXCLUSIVE,
                                          &dpo);
 
diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c
index 7c92ae99..57787eca 100644
--- a/src/vnet/mfib/mfib_test.c
+++ b/src/vnet/mfib/mfib_test.c
@@ -387,7 +387,7 @@ mfib_test_i (fib_protocol_t PROTO,
 
 
     fib_route_path_t path_via_if0 = {
-        .frp_proto = PROTO,
+        .frp_proto = fib_proto_to_dpo(PROTO),
         .frp_addr = zero_addr,
         .frp_sw_if_index = tm->hw[0]->sw_if_index,
         .frp_fib_index = ~0,
@@ -411,7 +411,7 @@ mfib_test_i (fib_protocol_t PROTO,
                                      MFIB_ITF_FLAG_ACCEPT));
 
     fib_route_path_t path_via_if1 = {
-        .frp_proto = PROTO,
+        .frp_proto = fib_proto_to_dpo(PROTO),
         .frp_addr = zero_addr,
         .frp_sw_if_index = tm->hw[1]->sw_if_index,
         .frp_fib_index = ~0,
@@ -419,7 +419,7 @@ mfib_test_i (fib_protocol_t PROTO,
         .frp_flags = 0,
     };
     fib_route_path_t path_via_if2 = {
-        .frp_proto = PROTO,
+        .frp_proto = fib_proto_to_dpo(PROTO),
         .frp_addr = zero_addr,
         .frp_sw_if_index = tm->hw[2]->sw_if_index,
         .frp_fib_index = ~0,
@@ -427,7 +427,7 @@ mfib_test_i (fib_protocol_t PROTO,
         .frp_flags = 0,
     };
     fib_route_path_t path_via_if3 = {
-        .frp_proto = PROTO,
+        .frp_proto = fib_proto_to_dpo(PROTO),
         .frp_addr = zero_addr,
         .frp_sw_if_index = tm->hw[3]->sw_if_index,
         .frp_fib_index = ~0,
@@ -435,7 +435,7 @@ mfib_test_i (fib_protocol_t PROTO,
         .frp_flags = 0,
     };
     fib_route_path_t path_for_us = {
-        .frp_proto = PROTO,
+        .frp_proto = fib_proto_to_dpo(PROTO),
         .frp_addr = zero_addr,
         .frp_sw_if_index = 0xffffffff,
         .frp_fib_index = ~0,
@@ -1121,7 +1121,7 @@ mfib_test_i (fib_protocol_t PROTO,
                                            &pfx_3500,
                                            FIB_SOURCE_API,
                                            FIB_ENTRY_FLAG_MULTICAST,
-                                           FIB_PROTOCOL_IP4,
+                                           DPO_PROTO_IP4,
                                            &nh_10_10_10_1,
                                            tm->hw[0]->sw_if_index,
                                            ~0, // invalid fib index
@@ -1138,7 +1138,7 @@ mfib_test_i (fib_protocol_t PROTO,
      * An (S,G) that resolves via the mLDP head-end
      */
     fib_route_path_t path_via_mldp = {
-        .frp_proto = FIB_PROTOCOL_MPLS,
+        .frp_proto = DPO_PROTO_MPLS,
         .frp_local_label = pfx_3500.fp_label,
         .frp_eos = MPLS_EOS,
         .frp_sw_if_index = 0xffffffff,
diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api
index 67f1045d..5973a0a6 100644
--- a/src/vnet/mpls/mpls.api
+++ b/src/vnet/mpls/mpls.api
@@ -156,7 +156,7 @@ manual_endian manual_print define mpls_tunnel_details
     @param mr_is_interface_rx - Interface Receive path
     @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface
                                 is used as the RPF-ID
-    @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4
+    @param mr_next_hop_proto - The next-hop protocol, of type dpo_proto_t
     @param mr_next_hop_weight - The weight, for UCMP
     @param mr_next_hop[16] - the nextop address
     @param mr_next_hop_sw_if_index - the next-hop SW interface
@@ -182,7 +182,7 @@ autoreply define mpls_route_add_del
   u8 mr_is_resolve_attached;
   u8 mr_is_interface_rx;
   u8 mr_is_rpf_id;
-  u8 mr_next_hop_proto_is_ip4;
+  u8 mr_next_hop_proto;
   u8 mr_next_hop_weight;
   u8 mr_next_hop_preference;
   u8 mr_next_hop[16];
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
index 068d31f4..266ba42c 100644
--- a/src/vnet/mpls/mpls.c
+++ b/src/vnet/mpls/mpls.c
@@ -261,7 +261,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
 			 &rpath.frp_sw_if_index,
 			 &rpath.frp_weight))
       {
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
 	  vec_add1(rpaths, rpath);
       }
 
@@ -272,7 +272,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
 			 &rpath.frp_sw_if_index,
 			 &rpath.frp_weight))
       {
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
+	  rpath.frp_proto = DPO_PROTO_IP6;
 	  vec_add1(rpaths, rpath);
       }
 
@@ -283,7 +283,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
 			 &rpath.frp_sw_if_index))
       {
 	  rpath.frp_weight = 1;
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
 	  vec_add1(rpaths, rpath);
       }
       else if (unformat (line_input, "rx-ip4 %U",
@@ -291,7 +291,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
 			 &rpath.frp_sw_if_index))
       {
 	  rpath.frp_weight = 1;
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
           rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX;
 	  vec_add1(rpaths, rpath);
       }
@@ -302,7 +302,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
 			 &rpath.frp_sw_if_index))
       {
 	  rpath.frp_weight = 1;
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
+	  rpath.frp_proto = DPO_PROTO_IP6;
 	  vec_add1(rpaths, rpath);
       }
       else if (unformat (line_input, "via %U next-hop-table %d",
@@ -312,7 +312,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
       {
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
 	  vec_add1(rpaths, rpath);
       }
       else if (unformat (line_input, "via %U next-hop-table %d",
@@ -322,7 +322,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
       {
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
+	  rpath.frp_proto = DPO_PROTO_IP6;
 	  vec_add1(rpaths, rpath);
       }
       else if (unformat (line_input, "via %U",
@@ -336,7 +336,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
 	  rpath.frp_fib_index = table_id;
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
+	  rpath.frp_proto = DPO_PROTO_IP4;
 	  vec_add1(rpaths, rpath);
       }
       else if (unformat (line_input, "via %U",
@@ -346,7 +346,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
 	  rpath.frp_fib_index = table_id;
 	  rpath.frp_weight = 1;
 	  rpath.frp_sw_if_index = ~0;
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
+	  rpath.frp_proto = DPO_PROTO_IP6;
 	  vec_add1(rpaths, rpath);
       }
       else if (unformat (line_input, "%d", &local_label))
@@ -355,7 +355,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
 			 "ip4-lookup-in-table %d",
 			 &rpath.frp_fib_index))
       {
-          rpath.frp_proto = FIB_PROTOCOL_IP4;
+          rpath.frp_proto = DPO_PROTO_IP4;
           rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
 	  pfx.fp_payload_proto = DPO_PROTO_IP4;
 	  vec_add1(rpaths, rpath);
@@ -364,7 +364,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
 			 "ip6-lookup-in-table %d",
 			 &rpath.frp_fib_index))
       {
-          rpath.frp_proto = FIB_PROTOCOL_IP6;
+          rpath.frp_proto = DPO_PROTO_IP6;
           rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
 	  vec_add1(rpaths, rpath);
 	  pfx.fp_payload_proto = DPO_PROTO_IP6;
@@ -373,11 +373,21 @@ vnet_mpls_local_label (vlib_main_t * vm,
 			 "mpls-lookup-in-table %d",
 			 &rpath.frp_fib_index))
       {
-          rpath.frp_proto = FIB_PROTOCOL_MPLS;
+          rpath.frp_proto = DPO_PROTO_MPLS;
           rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
 	  pfx.fp_payload_proto = DPO_PROTO_MPLS;
 	  vec_add1(rpaths, rpath);
       }
+      else if (unformat (line_input,
+			 "l2-input-on %U",
+			 unformat_vnet_sw_interface, vnm,
+			 &rpath.frp_sw_if_index))
+      {
+          rpath.frp_proto = DPO_PROTO_ETHERNET;
+	  pfx.fp_payload_proto = DPO_PROTO_ETHERNET;
+          rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX;
+	  vec_add1(rpaths, rpath);
+      }
       else if (unformat (line_input, "out-label %U",
                          unformat_mpls_unicast_label,
 			 &out_label))
@@ -440,7 +450,7 @@ vnet_mpls_local_label (vlib_main_t * vm,
       pfx.fp_proto = FIB_PROTOCOL_MPLS;
       pfx.fp_len = 21;
       pfx.fp_label = local_label;
-      pfx.fp_payload_proto = fib_proto_to_dpo(rpaths[0].frp_proto);
+      pfx.fp_payload_proto = rpaths[0].frp_proto;
 
       /*
        * the CLI parsing stored table Ids, swap to FIB indicies
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
index 92fb24a6..737299e6 100644
--- a/src/vnet/mpls/mpls_api.c
+++ b/src/vnet/mpls/mpls_api.c
@@ -144,14 +144,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
   };
   if (pfx.fp_eos)
     {
-      if (mp->mr_next_hop_proto_is_ip4)
-	{
-	  pfx.fp_payload_proto = DPO_PROTO_IP4;
-	}
-      else
-	{
-	  pfx.fp_payload_proto = DPO_PROTO_IP6;
-	}
+      pfx.fp_payload_proto = mp->mr_next_hop_proto;
     }
   else
     {
@@ -161,7 +154,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
   rv = add_del_route_check (FIB_PROTOCOL_MPLS,
 			    mp->mr_table_id,
 			    mp->mr_next_hop_sw_if_index,
-			    dpo_proto_to_fib (pfx.fp_payload_proto),
+			    pfx.fp_payload_proto,
 			    mp->mr_next_hop_table_id,
 			    mp->mr_create_table_if_needed,
 			    mp->mr_is_rpf_id,
@@ -173,9 +166,9 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
   ip46_address_t nh;
   memset (&nh, 0, sizeof (nh));
 
-  if (mp->mr_next_hop_proto_is_ip4)
+  if (DPO_PROTO_IP4 == mp->mr_next_hop_proto)
     memcpy (&nh.ip4, mp->mr_next_hop, sizeof (nh.ip4));
-  else
+  else if (DPO_PROTO_IP6 == mp->mr_next_hop_proto)
     memcpy (&nh.ip6, mp->mr_next_hop, sizeof (nh.ip6));
 
   n_labels = mp->mr_next_hop_n_out_labels;
@@ -202,7 +195,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
 				   mp->mr_is_interface_rx,
 				   mp->mr_is_rpf_id,
 				   fib_index, &pfx,
-				   mp->mr_next_hop_proto_is_ip4,
+				   mp->mr_next_hop_proto,
 				   &nh, ntohl (mp->mr_next_hop_sw_if_index),
 				   next_hop_fib_index,
 				   mp->mr_next_hop_weight,
@@ -243,13 +236,13 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp)
 
   if (mp->mt_next_hop_proto_is_ip4)
     {
-      rpath.frp_proto = FIB_PROTOCOL_IP4;
+      rpath.frp_proto = DPO_PROTO_IP4;
       clib_memcpy (&rpath.frp_addr.ip4,
 		   mp->mt_next_hop, sizeof (rpath.frp_addr.ip4));
     }
   else
     {
-      rpath.frp_proto = FIB_PROTOCOL_IP6;
+      rpath.frp_proto = DPO_PROTO_IP6;
       clib_memcpy (&rpath.frp_addr.ip6,
 		   mp->mt_next_hop, sizeof (rpath.frp_addr.ip6));
     }
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
index c025cc58..6452a60b 100644
--- a/src/vnet/mpls/mpls_tunnel.c
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -171,7 +171,7 @@ mpls_tunnel_mk_lb (mpls_tunnel_t *mt,
     vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list));
     vec_reset_length(ctx.next_hops);
 
-    lb_proto = vnet_link_to_dpo_proto(linkt);
+    lb_proto = fib_forw_chain_type_to_dpo_proto(fct);
 
     fib_path_list_walk(mt->mt_path_list,
                        mpls_tunnel_collect_forwarding,
@@ -313,12 +313,34 @@ mpls_tunnel_restack (mpls_tunnel_t *mt)
     /*
      * walk all the adjacencies on the MPLS interface and restack them
      */
-    FOR_EACH_FIB_PROTOCOL(proto)
+    if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2)
     {
-        adj_nbr_walk(mt->mt_sw_if_index,
-                     proto,
-                     mpls_adj_walk_cb,
-                     NULL);
+        /*
+         * Stack a load-balance that drops, whilst we have no paths
+         */
+        vnet_hw_interface_t * hi;
+        dpo_id_t dpo = DPO_INVALID;
+
+        mpls_tunnel_mk_lb(mt,
+                          VNET_LINK_MPLS,
+                          FIB_FORW_CHAIN_TYPE_ETHERNET,
+                          &dpo);
+
+        hi = vnet_get_hw_interface(vnet_get_main(), mt->mt_hw_if_index);
+        dpo_stack_from_node(hi->tx_node_index,
+                            &mt->mt_l2_lb,
+                            &dpo);
+        dpo_reset(&dpo);
+    }
+    else
+    {
+        FOR_EACH_FIB_PROTOCOL(proto)
+        {
+            adj_nbr_walk(mt->mt_sw_if_index,
+                         proto,
+                         mpls_adj_walk_cb,
+                         NULL);
+        }
     }
 }
 
@@ -495,7 +517,7 @@ mpls_tunnel_tx (vlib_main_t * vm,
 
           b0 = vlib_get_buffer(vm, bi0);
 
-          vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj;
+          vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_lb.dpoi_index;
 
           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
             {
@@ -506,7 +528,7 @@ mpls_tunnel_tx (vlib_main_t * vm,
 
           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
                                            to_next, n_left_to_next,
-                                           bi0, mt->mt_l2_tx_arc);
+                                           bi0, mt->mt_l2_lb.dpoi_next_node);
         }
 
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -565,8 +587,7 @@ vnet_mpls_tunnel_del (u32 sw_if_index)
     if (FIB_NODE_INDEX_INVALID != mt->mt_path_list)
         fib_path_list_child_remove(mt->mt_path_list,
                                    mt->mt_sibling_index);
-    if (ADJ_INDEX_INVALID != mt->mt_l2_adj)
-        adj_unlock(mt->mt_l2_adj);
+    dpo_reset(&mt->mt_l2_lb);
 
     vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index);
     pool_put(mpls_tunnel_pool, mt);
@@ -587,12 +608,13 @@ vnet_mpls_tunnel_create (u8 l2_only,
     memset (mt, 0, sizeof (*mt));
     mti = mt - mpls_tunnel_pool;
     fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL);
-    mt->mt_l2_adj = ADJ_INDEX_INVALID;
     mt->mt_path_list = FIB_NODE_INDEX_INVALID;
     mt->mt_sibling_index = FIB_NODE_INDEX_INVALID;
 
     if (is_multicast)
         mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST;
+    if (l2_only)
+        mt->mt_flags |= MPLS_TUNNEL_FLAG_L2;
 
     /*
      * Create a new, or re=use and old, tunnel HW interface
@@ -614,7 +636,7 @@ vnet_mpls_tunnel_create (u8 l2_only,
                                  mti,
                                  mpls_tunnel_hw_interface_class.index,
                                  mti);
-        hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index);
+        hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
     }
 
     /*
@@ -624,19 +646,6 @@ vnet_mpls_tunnel_create (u8 l2_only,
     vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0);
     mpls_tunnel_db[mt->mt_sw_if_index] = mti;
 
-    if (l2_only)
-    {
-        mt->mt_l2_adj =
-            adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list),
-                                VNET_LINK_ETHERNET,
-                                &zero_addr,
-                                mt->mt_sw_if_index);
-
-        mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(),
-                                                    hi->tx_node_index,
-                                                    "adj-l2-midchain");
-    }
-
     return (mt->mt_sw_if_index);
 }
 
@@ -803,7 +812,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm,
                            &rpath.frp_sw_if_index))
         {
             rpath.frp_weight = 1;
-            rpath.frp_proto = FIB_PROTOCOL_IP4;
+            rpath.frp_proto = DPO_PROTO_IP4;
         }
 
         else if (unformat (line_input, "via %U %U",
@@ -813,7 +822,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm,
                            &rpath.frp_sw_if_index))
         {
             rpath.frp_weight = 1;
-            rpath.frp_proto = FIB_PROTOCOL_IP6;
+            rpath.frp_proto = DPO_PROTO_IP6;
         }
         else if (unformat (line_input, "via %U",
                            unformat_ip6_address,
@@ -822,7 +831,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm,
             rpath.frp_fib_index = 0;
             rpath.frp_weight = 1;
             rpath.frp_sw_if_index = ~0;
-            rpath.frp_proto = FIB_PROTOCOL_IP6;
+            rpath.frp_proto = DPO_PROTO_IP6;
         }
         else if (unformat (line_input, "via %U",
                            unformat_ip4_address,
@@ -831,7 +840,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm,
             rpath.frp_fib_index = 0;
             rpath.frp_weight = 1;
             rpath.frp_sw_if_index = ~0;
-            rpath.frp_proto = FIB_PROTOCOL_IP4;
+            rpath.frp_proto = DPO_PROTO_IP4;
         }
         else if (unformat (line_input, "l2-only"))
             l2_only = 1;
@@ -915,6 +924,14 @@ format_mpls_tunnel (u8 * s, va_list * args)
     s = format(s, "%U", format_fib_path_ext_list, &mt->mt_path_exts);
     s = format(s, "\n");
 
+    if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2)
+    {
+        s = format(s, " forwarding: %U\n",
+                   format_fib_forw_chain_type,
+                   FIB_FORW_CHAIN_TYPE_ETHERNET);
+        s = format(s, " %U\n", format_dpo_id, &mt->mt_l2_lb, 2);
+    }
+
     return (s);
 }
 
diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h
index 4cb0a860..285817c3 100644
--- a/src/vnet/mpls/mpls_tunnel.h
+++ b/src/vnet/mpls/mpls_tunnel.h
@@ -22,15 +22,20 @@
 typedef enum mpls_tunnel_attribute_t_
 {
     MPLS_TUNNEL_ATTRIBUTE_FIRST = 0,
+    /**
+     * @brief The tunnel is L2 only
+     */
+    MPLS_TUNNEL_ATTRIBUTE_L2 = MPLS_TUNNEL_ATTRIBUTE_FIRST,
     /**
      * @brief The tunnel has an underlying multicast LSP
      */
-    MPLS_TUNNEL_ATTRIBUTE_MCAST = MPLS_TUNNEL_ATTRIBUTE_FIRST,
+    MPLS_TUNNEL_ATTRIBUTE_MCAST,
     MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST,
 } mpls_tunnel_attribute_t;
 
 #define MPLS_TUNNEL_ATTRIBUTES {		  \
     [MPLS_TUNNEL_ATTRIBUTE_MCAST]  = "multicast", \
+    [MPLS_TUNNEL_ATTRIBUTE_L2]     = "L2",   \
 }
 #define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item)		\
     for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST;		\
@@ -39,6 +44,7 @@ typedef enum mpls_tunnel_attribute_t_
 
 typedef enum mpls_tunnel_flag_t_ {
     MPLS_TUNNEL_FLAG_NONE   = 0,
+    MPLS_TUNNEL_FLAG_L2     = (1 << MPLS_TUNNEL_ATTRIBUTE_L2),
     MPLS_TUNNEL_FLAG_MCAST  = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST),
 } __attribute__ ((packed)) mpls_tunnel_flags_t;
 
@@ -60,14 +66,19 @@ typedef struct mpls_tunnel_t_
 
     /**
      * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET
-     * adjacency
+     * load-balance
+     */
+    dpo_id_t mt_l2_lb;
+
+    /**
+     * @brief The HW interface index of the tunnel interfaces
      */
-    adj_index_t mt_l2_adj;
+    u32 mt_hw_if_index;
 
     /**
-     * @brief on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain
+     * @brief The SW interface index of the tunnel interfaces
      */
-    u32 mt_l2_tx_arc;
+    u32 mt_sw_if_index;
 
     /**
      * @brief The path-list over which the tunnel's destination is reachable
@@ -83,23 +94,6 @@ typedef struct mpls_tunnel_t_
      * A vector of path extensions o hold the label stack for each path
      */
     fib_path_ext_list_t mt_path_exts;
-
-    /**
-     * @brief Flag to indicate the tunnel is only for L2 traffic, that is
-     * this tunnel belongs in a bridge domain.
-     */
-    u8 mt_l2_only;
-
-    /**
-     * @brief The HW interface index of the tunnel interfaces
-     */
-    u32 mt_hw_if_index;
-
-    /**
-     * @brief The SW interface index of the tunnel interfaces
-     */
-    u32 mt_sw_if_index;
-
 } mpls_tunnel_t;
 
 /**
diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c
index 5ebbc60d..db4ad2a7 100755
--- a/src/vnet/srmpls/sr_mpls_policy.c
+++ b/src/vnet/srmpls/sr_mpls_policy.c
@@ -75,7 +75,7 @@ create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
   segment_list->segments = vec_dup (sl);
 
   fib_route_path_t path = {
-    .frp_proto = FIB_PROTOCOL_MPLS,
+    .frp_proto = DPO_PROTO_MPLS,
     .frp_sw_if_index = ~0,
     .frp_fib_index = 0,
     .frp_weight = segment_list->weight,
@@ -203,7 +203,7 @@ sr_mpls_policy_del (mpls_label_t bsid, u32 index)
     segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
 
     fib_route_path_t path = {
-      .frp_proto = FIB_PROTOCOL_MPLS,
+      .frp_proto = DPO_PROTO_MPLS,
       .frp_sw_if_index = ~0,
       .frp_fib_index = 0,
       .frp_weight = segment_list->weight,
@@ -308,7 +308,7 @@ sr_mpls_policy_mod (mpls_label_t bsid, u32 index, u8 operation,
 
       mpls_eos_bit_t eos;
       fib_route_path_t path = {
-	.frp_proto = FIB_PROTOCOL_MPLS,
+	.frp_proto = DPO_PROTO_MPLS,
 	.frp_sw_if_index = ~0,
 	.frp_fib_index = 0,
 	.frp_weight = segment_list->weight,
diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c
index 37707049..3a9aea2d 100755
--- a/src/vnet/srmpls/sr_mpls_steering.c
+++ b/src/vnet/srmpls/sr_mpls_steering.c
@@ -218,7 +218,7 @@ sr_mpls_steering_policy (int is_del, mpls_label_t bsid, u32 sr_policy_index,
 update_fib:;
 
   fib_route_path_t path = {
-    .frp_proto = FIB_PROTOCOL_MPLS,
+    .frp_proto = DPO_PROTO_MPLS,
     .frp_local_label = sr_policy->bsid,
     .frp_eos = MPLS_EOS,
     .frp_sw_if_index = ~0,
diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c
index a7903751..704adaa7 100755
--- a/src/vnet/srv6/sr_steering.c
+++ b/src/vnet/srv6/sr_steering.c
@@ -310,7 +310,7 @@ update_fib:
 						 table_id : 0)),
 				&pfx, FIB_SOURCE_SR,
 				FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
-				FIB_PROTOCOL_IP6,
+				DPO_PROTO_IP6,
 				(ip46_address_t *) & sr_policy->bsid, ~0,
 				sm->fib_table_ip6, 1, NULL,
 				FIB_ROUTE_PATH_FLAG_NONE);
@@ -327,7 +327,7 @@ update_fib:
 						 table_id : 0)),
 				&pfx, FIB_SOURCE_SR,
 				FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
-				FIB_PROTOCOL_IP6,
+				DPO_PROTO_IP6,
 				(ip46_address_t *) & sr_policy->bsid, ~0,
 				sm->fib_table_ip4, 1, NULL,
 				FIB_ROUTE_PATH_FLAG_NONE);
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c
index 97bb1b15..462c79a0 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.c
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.c
@@ -638,7 +638,7 @@ int vnet_vxlan_gpe_add_del_tunnel
               fib_node_index_t mfei;
               adj_index_t ai;
               fib_route_path_t path = {
-                  .frp_proto = fp,
+                  .frp_proto = fib_proto_to_dpo(fp),
                   .frp_addr = zero_addr,
                   .frp_sw_if_index = 0xffffffff,
                   .frp_fib_index = ~0,
diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c
index 1b3df2a8..dc973372 100644
--- a/src/vnet/vxlan/vxlan.c
+++ b/src/vnet/vxlan/vxlan.c
@@ -505,7 +505,7 @@ int vnet_vxlan_add_del_tunnel
               fib_node_index_t mfei;
               adj_index_t ai;
               fib_route_path_t path = {
-                  .frp_proto = fp,
+                  .frp_proto = fib_proto_to_dpo(fp),
                   .frp_addr = zero_addr,
                   .frp_sw_if_index = 0xffffffff,
                   .frp_fib_index = ~0,
diff --git a/src/vpp/app/vpe_cli.c b/src/vpp/app/vpe_cli.c
index 94bdc84c..fcc496ad 100644
--- a/src/vpp/app/vpe_cli.c
+++ b/src/vpp/app/vpe_cli.c
@@ -98,7 +98,7 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm,
 
       vec_add2 (rpaths, rpath, 1);
 
-      rpath->frp_proto = FIB_PROTOCOL_IP4;
+      rpath->frp_proto = DPO_PROTO_IP4;
       rpath->frp_addr = next_hops[i];
       rpath->frp_sw_if_index = sw_if_index;
       rpath->frp_fib_index = ~0;
diff --git a/test/test_bfd.py b/test/test_bfd.py
index be42cdad..4cb6d379 100644
--- a/test/test_bfd.py
+++ b/test/test_bfd.py
@@ -20,7 +20,7 @@ from vpp_pg_interface import CaptureTimeoutError, is_ipv6_misc
 from vpp_lo_interface import VppLoInterface
 from util import ppp
 from vpp_papi_provider import UnexpectedApiReturnValueError
-from vpp_ip_route import VppIpRoute, VppRoutePath
+from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
 
 USEC_IN_SEC = 1000000
 
@@ -1678,12 +1678,12 @@ class BFDFIBTestCase(VppTestCase):
         ip_2001_s_64 = VppIpRoute(self, "2001::", 64,
                                   [VppRoutePath(self.pg0.remote_ip6,
                                                 self.pg0.sw_if_index,
-                                                is_ip6=1)],
+                                                proto=DPO_PROTO_IP6)],
                                   is_ip6=1)
         ip_2002_s_64 = VppIpRoute(self, "2002::", 64,
                                   [VppRoutePath(self.pg0.remote_ip6,
                                                 0xffffffff,
-                                                is_ip6=1)],
+                                                proto=DPO_PROTO_IP6)],
                                   is_ip6=1)
         ip_2001_s_64.add_vpp_config()
         ip_2002_s_64.add_vpp_config()
diff --git a/test/test_gre.py b/test/test_gre.py
index 18b67dbd..1afc44fb 100644
--- a/test/test_gre.py
+++ b/test/test_gre.py
@@ -6,7 +6,7 @@ from logging import *
 from framework import VppTestCase, VppTestRunner
 from vpp_sub_interface import VppDot1QSubint
 from vpp_gre_interface import VppGreInterface, VppGre6Interface
-from vpp_ip_route import VppIpRoute, VppRoutePath
+from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
 from vpp_papi_provider import L2_VTR_OP
 
 from scapy.packet import Raw
@@ -516,11 +516,12 @@ class TestGRE(VppTestCase):
         gre_if.admin_up()
         gre_if.config_ip6()
 
-        route_via_tun = VppIpRoute(self, "4004::1", 128,
-                                   [VppRoutePath("0::0",
-                                                 gre_if.sw_if_index,
-                                                 is_ip6=1)],
-                                   is_ip6=1)
+        route_via_tun = VppIpRoute(
+            self, "4004::1", 128,
+            [VppRoutePath("0::0",
+                          gre_if.sw_if_index,
+                          proto=DpoProto.DPO_PROTO_IP6)],
+            is_ip6=1)
 
         route_via_tun.add_vpp_config()
 
@@ -542,11 +543,12 @@ class TestGRE(VppTestCase):
         #
         # Add a route that resolves the tunnel's destination
         #
-        route_tun_dst = VppIpRoute(self, "1002::1", 128,
-                                   [VppRoutePath(self.pg2.remote_ip6,
-                                                 self.pg2.sw_if_index,
-                                                 is_ip6=1)],
-                                   is_ip6=1)
+        route_tun_dst = VppIpRoute(
+            self, "1002::1", 128,
+            [VppRoutePath(self.pg2.remote_ip6,
+                          self.pg2.sw_if_index,
+                          proto=DpoProto.DPO_PROTO_IP6)],
+            is_ip6=1)
         route_tun_dst.add_vpp_config()
 
         #
diff --git a/test/test_ip6.py b/test/test_ip6.py
index 593f6868..285ce181 100644
--- a/test/test_ip6.py
+++ b/test/test_ip6.py
@@ -8,7 +8,7 @@ from vpp_sub_interface import VppSubInterface, VppDot1QSubint
 from vpp_pg_interface import is_ipv6_misc
 from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, VppIpMRoute, \
     VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \
-    VppMplsRoute
+    VppMplsRoute, DpoProto
 from vpp_neighbor import find_nbr, VppNeighbor
 
 from scapy.packet import Raw
@@ -490,7 +490,7 @@ class TestIPv6(TestIPv6ND):
                                     inet=AF_INET6))
 
     def test_ns_duplicates(self):
-        """ ARP Duplicates"""
+        """ ND Duplicates"""
 
         #
         # Generate some hosts on the LAN
@@ -537,7 +537,7 @@ class TestIPv6(TestIPv6ND):
 
         #
         # remove the duplicate on pg1
-        # packet stream shoud generate ARPs out of pg1
+        # packet stream shoud generate NSs out of pg1
         #
         ns_pg1.remove_vpp_config()
 
@@ -1347,10 +1347,10 @@ class TestIP6LoadBalance(VppTestCase):
         route_3000_1 = VppIpRoute(self, "3000::1", 128,
                                   [VppRoutePath(self.pg1.remote_ip6,
                                                 self.pg1.sw_if_index,
-                                                is_ip6=1),
+                                                proto=DpoProto.DPO_PROTO_IP6),
                                    VppRoutePath(self.pg2.remote_ip6,
                                                 self.pg2.sw_if_index,
-                                                is_ip6=1)],
+                                                proto=DpoProto.DPO_PROTO_IP6)],
                                   is_ip6=1)
         route_3000_1.add_vpp_config()
 
@@ -1367,11 +1367,11 @@ class TestIP6LoadBalance(VppTestCase):
                                 [VppRoutePath(self.pg1.remote_ip6,
                                               self.pg1.sw_if_index,
                                               labels=[67],
-                                              is_ip6=1),
+                                              proto=DpoProto.DPO_PROTO_IP6),
                                  VppRoutePath(self.pg2.remote_ip6,
                                               self.pg2.sw_if_index,
                                               labels=[67],
-                                              is_ip6=1)])
+                                              proto=DpoProto.DPO_PROTO_IP6)])
         route_67.add_vpp_config()
 
         #
@@ -1441,20 +1441,20 @@ class TestIP6LoadBalance(VppTestCase):
         route_3000_2 = VppIpRoute(self, "3000::2", 128,
                                   [VppRoutePath(self.pg3.remote_ip6,
                                                 self.pg3.sw_if_index,
-                                                is_ip6=1),
+                                                proto=DpoProto.DPO_PROTO_IP6),
                                    VppRoutePath(self.pg4.remote_ip6,
                                                 self.pg4.sw_if_index,
-                                                is_ip6=1)],
+                                                proto=DpoProto.DPO_PROTO_IP6)],
                                   is_ip6=1)
         route_3000_2.add_vpp_config()
 
         route_4000_1 = VppIpRoute(self, "4000::1", 128,
                                   [VppRoutePath("3000::1",
                                                 0xffffffff,
-                                                is_ip6=1),
+                                                proto=DpoProto.DPO_PROTO_IP6),
                                    VppRoutePath("3000::2",
                                                 0xffffffff,
-                                                is_ip6=1)],
+                                                proto=DpoProto.DPO_PROTO_IP6)],
                                   is_ip6=1)
         route_4000_1.add_vpp_config()
 
@@ -1485,14 +1485,14 @@ class TestIP6LoadBalance(VppTestCase):
         route_5000_2 = VppIpRoute(self, "5000::2", 128,
                                   [VppRoutePath(self.pg3.remote_ip6,
                                                 self.pg3.sw_if_index,
-                                                is_ip6=1)],
+                                                proto=DpoProto.DPO_PROTO_IP6)],
                                   is_ip6=1)
         route_5000_2.add_vpp_config()
 
         route_6000_1 = VppIpRoute(self, "6000::1", 128,
                                   [VppRoutePath("5000::2",
                                                 0xffffffff,
-                                                is_ip6=1)],
+                                                proto=DpoProto.DPO_PROTO_IP6)],
                                   is_ip6=1)
         route_6000_1.add_vpp_config()
 
diff --git a/test/test_map.py b/test/test_map.py
index 9ac3948a..bbf4aec2 100644
--- a/test/test_map.py
+++ b/test/test_map.py
@@ -4,7 +4,7 @@ import unittest
 import socket
 
 from framework import VppTestCase, VppTestRunner
-from vpp_ip_route import VppIpRoute, VppRoutePath
+from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
 
 from scapy.layers.l2 import Ether, Raw
 from scapy.layers.inet import IP, UDP, ICMP
@@ -75,7 +75,7 @@ class TestMAP(VppTestCase):
                                map_br_pfx_len,
                                [VppRoutePath(self.pg1.remote_ip6,
                                              self.pg1.sw_if_index,
-                                             is_ip6=1)],
+                                             proto=DpoProto.DPO_PROTO_IP6)],
                                is_ip6=1)
         map_route.add_vpp_config()
 
@@ -138,13 +138,12 @@ class TestMAP(VppTestCase):
         # Add a route to 4001::1. Expect the encapped traffic to be
         # sent via that routes next-hop
         #
-        pre_res_route = VppIpRoute(self,
-                                   "4001::1",
-                                   128,
-                                   [VppRoutePath(self.pg1.remote_hosts[2].ip6,
-                                                 self.pg1.sw_if_index,
-                                                 is_ip6=1)],
-                                   is_ip6=1)
+        pre_res_route = VppIpRoute(
+            self, "4001::1", 128,
+            [VppRoutePath(self.pg1.remote_hosts[2].ip6,
+                          self.pg1.sw_if_index,
+                          proto=DpoProto.DPO_PROTO_IP6)],
+            is_ip6=1)
         pre_res_route.add_vpp_config()
 
         self.send_and_assert_encapped(v4, map_src,
@@ -156,7 +155,7 @@ class TestMAP(VppTestCase):
         #
         pre_res_route.modify([VppRoutePath(self.pg1.remote_hosts[3].ip6,
                                            self.pg1.sw_if_index,
-                                           is_ip6=1)])
+                                           proto=DpoProto.DPO_PROTO_IP6)])
         pre_res_route.add_vpp_config()
 
         self.send_and_assert_encapped(v4, map_src,
diff --git a/test/test_mpls.py b/test/test_mpls.py
index e3d013af..b2226a74 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -6,7 +6,7 @@ import socket
 from framework import VppTestCase, VppTestRunner
 from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \
     VppMplsIpBind, VppIpMRoute, VppMRoutePath, \
-    MRouteItfFlags, MRouteEntryFlags
+    MRouteItfFlags, MRouteEntryFlags, DpoProto
 from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface
 
 from scapy.packet import Raw
@@ -16,6 +16,38 @@ from scapy.layers.inet6 import IPv6
 from scapy.contrib.mpls import MPLS
 
 
+def verify_filter(capture, sent):
+    if not len(capture) == len(sent):
+        # filter out any IPv6 RAs from the capture
+        for p in capture:
+            if p.haslayer(IPv6):
+                capture.remove(p)
+    return capture
+
+
+def verify_mpls_stack(tst, rx, mpls_labels, ttl=255, num=0):
+    # the rx'd packet has the MPLS label popped
+    eth = rx[Ether]
+    tst.assertEqual(eth.type, 0x8847)
+
+    rx_mpls = rx[MPLS]
+
+    for ii in range(len(mpls_labels)):
+        tst.assertEqual(rx_mpls.label, mpls_labels[ii])
+        tst.assertEqual(rx_mpls.cos, 0)
+        if ii == num:
+            tst.assertEqual(rx_mpls.ttl, ttl)
+        else:
+            tst.assertEqual(rx_mpls.ttl, 255)
+        if ii == len(mpls_labels) - 1:
+            tst.assertEqual(rx_mpls.s, 1)
+        else:
+            # not end of stack
+            tst.assertEqual(rx_mpls.s, 0)
+            # pop the label to expose the next
+            rx_mpls = rx_mpls[MPLS].payload
+
+
 class TestMPLS(VppTestCase):
     """ MPLS Test Case """
 
@@ -120,18 +152,9 @@ class TestMPLS(VppTestCase):
             pkts.append(p)
         return pkts
 
-    @staticmethod
-    def verify_filter(capture, sent):
-        if not len(capture) == len(sent):
-            # filter out any IPv6 RAs from the capture
-            for p in capture:
-                if p.haslayer(IPv6):
-                    capture.remove(p)
-        return capture
-
     def verify_capture_ip4(self, src_if, capture, sent, ping_resp=0):
         try:
-            capture = self.verify_filter(capture, sent)
+            capture = verify_filter(capture, sent)
 
             self.assertEqual(len(capture), len(sent))
 
@@ -158,33 +181,10 @@ class TestMPLS(VppTestCase):
         except:
             raise
 
-    def verify_mpls_stack(self, rx, mpls_labels, ttl=255, num=0):
-        # the rx'd packet has the MPLS label popped
-        eth = rx[Ether]
-        self.assertEqual(eth.type, 0x8847)
-
-        rx_mpls = rx[MPLS]
-
-        for ii in range(len(mpls_labels)):
-            self.assertEqual(rx_mpls.label, mpls_labels[ii])
-            self.assertEqual(rx_mpls.cos, 0)
-            if ii == num:
-                self.assertEqual(rx_mpls.ttl, ttl)
-            else:
-                self.assertEqual(rx_mpls.ttl, 255)
-
-            if ii == len(mpls_labels) - 1:
-                self.assertEqual(rx_mpls.s, 1)
-            else:
-                # not end of stack
-                self.assertEqual(rx_mpls.s, 0)
-                # pop the label to expose the next
-                rx_mpls = rx_mpls[MPLS].payload
-
     def verify_capture_labelled_ip4(self, src_if, capture, sent,
                                     mpls_labels):
         try:
-            capture = self.verify_filter(capture, sent)
+            capture = verify_filter(capture, sent)
 
             self.assertEqual(len(capture), len(sent))
 
@@ -195,8 +195,8 @@ class TestMPLS(VppTestCase):
                 rx_ip = rx[IP]
 
                 # the MPLS TTL is copied from the IP
-                self.verify_mpls_stack(
-                    rx, mpls_labels, rx_ip.ttl, len(mpls_labels) - 1)
+                verify_mpls_stack(self, rx, mpls_labels, rx_ip.ttl,
+                                  len(mpls_labels) - 1)
 
                 self.assertEqual(rx_ip.src, tx_ip.src)
                 self.assertEqual(rx_ip.dst, tx_ip.dst)
@@ -211,7 +211,7 @@ class TestMPLS(VppTestCase):
         if top is None:
             top = len(mpls_labels) - 1
         try:
-            capture = self.verify_filter(capture, sent)
+            capture = verify_filter(capture, sent)
 
             self.assertEqual(len(capture), len(sent))
 
@@ -222,8 +222,7 @@ class TestMPLS(VppTestCase):
                 rx_ip = rx[IP]
 
                 # the MPLS TTL is 255 since it enters a new tunnel
-                self.verify_mpls_stack(
-                    rx, mpls_labels, ttl, top)
+                verify_mpls_stack(self, rx, mpls_labels, ttl, top)
 
                 self.assertEqual(rx_ip.src, tx_ip.src)
                 self.assertEqual(rx_ip.dst, tx_ip.dst)
@@ -236,13 +235,13 @@ class TestMPLS(VppTestCase):
     def verify_capture_labelled(self, src_if, capture, sent,
                                 mpls_labels, ttl=254, num=0):
         try:
-            capture = self.verify_filter(capture, sent)
+            capture = verify_filter(capture, sent)
 
             self.assertEqual(len(capture), len(sent))
 
             for i in range(len(capture)):
                 rx = capture[i]
-                self.verify_mpls_stack(rx, mpls_labels, ttl, num)
+                verify_mpls_stack(self, rx, mpls_labels, ttl, num)
         except:
             raise
 
@@ -1049,7 +1048,7 @@ class TestMPLS(VppTestCase):
                           self.pg1.sw_if_index,
                           nh_table_id=1,
                           rpf_id=55,
-                          is_ip6=1)],
+                          proto=DpoProto.DPO_PROTO_IP6)],
             is_multicast=1)
 
         route_34_eos.add_vpp_config()
@@ -1440,19 +1439,20 @@ class TestMPLSPIC(VppTestCase):
         for ii in range(64):
             dst = "3000::%d" % ii
             local_label = 1600 + ii
-            vpn_routes.append(VppIpRoute(self, dst, 128,
-                                         [VppRoutePath(self.pg2.remote_ip6,
-                                                       0xffffffff,
-                                                       nh_table_id=1,
-                                                       is_resolve_attached=1,
-                                                       is_ip6=1),
-                                          VppRoutePath(self.pg3.remote_ip6,
-                                                       0xffffffff,
-                                                       nh_table_id=1,
-                                                       is_ip6=1,
-                                                       is_resolve_attached=1)],
-                                         table_id=1,
-                                         is_ip6=1))
+            vpn_routes.append(VppIpRoute(
+                self, dst, 128,
+                [VppRoutePath(self.pg2.remote_ip6,
+                              0xffffffff,
+                              nh_table_id=1,
+                              is_resolve_attached=1,
+                              proto=DpoProto.DPO_PROTO_IP6),
+                 VppRoutePath(self.pg3.remote_ip6,
+                              0xffffffff,
+                              nh_table_id=1,
+                              proto=DpoProto.DPO_PROTO_IP6,
+                              is_resolve_attached=1)],
+                table_id=1,
+                is_ip6=1))
             vpn_routes[ii].add_vpp_config()
 
             vpn_bindings.append(VppMplsIpBind(self, local_label, dst, 128,
@@ -1525,5 +1525,211 @@ class TestMPLSPIC(VppTestCase):
         self.assertNotEqual(0, len(rx1))
 
 
+class TestMPLSL2(VppTestCase):
+    """ MPLS-L2 """
+
+    def setUp(self):
+        super(TestMPLSL2, self).setUp()
+
+        # create 2 pg interfaces
+        self.create_pg_interfaces(range(2))
+
+        # use pg0 as the core facing interface
+        self.pg0.admin_up()
+        self.pg0.config_ip4()
+        self.pg0.resolve_arp()
+        self.pg0.enable_mpls()
+
+        # use the other 2 for customer facg L2 links
+        for i in self.pg_interfaces[1:]:
+            i.admin_up()
+
+    def tearDown(self):
+        super(TestMPLSL2, self).tearDown()
+        for i in self.pg_interfaces[1:]:
+            i.admin_down()
+
+        self.pg0.disable_mpls()
+        self.pg0.unconfig_ip4()
+        self.pg0.admin_down()
+
+    def verify_capture_tunneled_ethernet(self, capture, sent, mpls_labels,
+                                         ttl=255, top=None):
+        if top is None:
+            top = len(mpls_labels) - 1
+
+        capture = verify_filter(capture, sent)
+
+        self.assertEqual(len(capture), len(sent))
+
+        for i in range(len(capture)):
+            tx = sent[i]
+            rx = capture[i]
+
+            # the MPLS TTL is 255 since it enters a new tunnel
+            verify_mpls_stack(self, rx, mpls_labels, ttl, top)
+
+            tx_eth = tx[Ether]
+            rx_eth = Ether(str(rx[MPLS].payload))
+
+            self.assertEqual(rx_eth.src, tx_eth.src)
+            self.assertEqual(rx_eth.dst, tx_eth.dst)
+
+    def test_vpws(self):
+        """ Virtual Private Wire Service """
+
+        #
+        # Create an MPLS tunnel that pushes 1 label
+        #
+        mpls_tun_1 = VppMPLSTunnelInterface(self,
+                                            [VppRoutePath(self.pg0.remote_ip4,
+                                                          self.pg0.sw_if_index,
+                                                          labels=[42])],
+                                            is_l2=1)
+        mpls_tun_1.add_vpp_config()
+        mpls_tun_1.admin_up()
+
+        #
+        # Create a label entry to for 55 that does L2 input to the tunnel
+        #
+        route_55_eos = VppMplsRoute(
+            self, 55, 1,
+            [VppRoutePath("0.0.0.0",
+                          mpls_tun_1.sw_if_index,
+                          is_interface_rx=1,
+                          proto=DpoProto.DPO_PROTO_ETHERNET)])
+        route_55_eos.add_vpp_config()
+
+        #
+        # Cross-connect the tunnel with one of the customers L2 interfaces
+        #
+        self.vapi.sw_interface_set_l2_xconnect(self.pg1.sw_if_index,
+                                               mpls_tun_1.sw_if_index,
+                                               enable=1)
+        self.vapi.sw_interface_set_l2_xconnect(mpls_tun_1.sw_if_index,
+                                               self.pg1.sw_if_index,
+                                               enable=1)
+
+        #
+        # inject a packet from the core
+        #
+        pcore = (Ether(dst=self.pg0.local_mac,
+                       src=self.pg0.remote_mac) /
+                 MPLS(label=55, ttl=64) /
+                 Ether(dst="00:00:de:ad:ba:be",
+                       src="00:00:de:ad:be:ef") /
+                 IP(src="10.10.10.10", dst="11.11.11.11") /
+                 UDP(sport=1234, dport=1234) /
+                 Raw('\xa5' * 100))
+
+        self.pg0.add_stream(pcore * 65)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg1.get_capture(65)
+        tx = pcore[MPLS].payload
+
+        self.assertEqual(rx0[0][Ether].dst, tx[Ether].dst)
+        self.assertEqual(rx0[0][Ether].src, tx[Ether].src)
+
+        #
+        # Inject a packet from the custoer/L2 side
+        #
+        self.pg1.add_stream(tx * 65)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0.get_capture(65)
+
+        self.verify_capture_tunneled_ethernet(rx0, tx*65, [42])
+
+    def test_vpls(self):
+        """ Virtual Private LAN Service """
+        #
+        # Create an L2 MPLS tunnel
+        #
+        mpls_tun = VppMPLSTunnelInterface(self,
+                                          [VppRoutePath(self.pg0.remote_ip4,
+                                                        self.pg0.sw_if_index,
+                                                        labels=[42])],
+                                          is_l2=1)
+        mpls_tun.add_vpp_config()
+        mpls_tun.admin_up()
+
+        #
+        # Create a label entry to for 55 that does L2 input to the tunnel
+        #
+        route_55_eos = VppMplsRoute(
+            self, 55, 1,
+            [VppRoutePath("0.0.0.0",
+                          mpls_tun.sw_if_index,
+                          is_interface_rx=1,
+                          proto=DpoProto.DPO_PROTO_ETHERNET)])
+        route_55_eos.add_vpp_config()
+
+        #
+        # add to tunnel to the customers bridge-domain
+        #
+        self.vapi.sw_interface_set_l2_bridge(mpls_tun.sw_if_index,
+                                             bd_id=1)
+        self.vapi.sw_interface_set_l2_bridge(self.pg1.sw_if_index,
+                                             bd_id=1)
+
+        #
+        # Packet from the customer interface and from the core
+        #
+        p_cust = (Ether(dst="00:00:de:ad:ba:be",
+                        src="00:00:de:ad:be:ef") /
+                  IP(src="10.10.10.10", dst="11.11.11.11") /
+                  UDP(sport=1234, dport=1234) /
+                  Raw('\xa5' * 100))
+        p_core = (Ether(src="00:00:de:ad:ba:be",
+                        dst="00:00:de:ad:be:ef") /
+                  IP(dst="10.10.10.10", src="11.11.11.11") /
+                  UDP(sport=1234, dport=1234) /
+                  Raw('\xa5' * 100))
+
+        #
+        # The BD is learning, so send in one of each packet to learn
+        #
+        p_core_encap = (Ether(dst=self.pg0.local_mac,
+                              src=self.pg0.remote_mac) /
+                        MPLS(label=55, ttl=64) /
+                        p_core)
+
+        self.pg1.add_stream(p_cust)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        self.pg0.add_stream(p_core_encap)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # we've learnt this so expect it be be forwarded
+        rx0 = self.pg1.get_capture(1)
+
+        self.assertEqual(rx0[0][Ether].dst, p_core[Ether].dst)
+        self.assertEqual(rx0[0][Ether].src, p_core[Ether].src)
+
+        #
+        # now a stream in each direction
+        #
+        self.pg1.add_stream(p_cust * 65)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0.get_capture(65)
+
+        self.verify_capture_tunneled_ethernet(rx0, p_cust*65, [42])
+
+        #
+        # remove interfaces from customers bridge-domain
+        #
+        self.vapi.sw_interface_set_l2_bridge(mpls_tun.sw_if_index,
+                                             bd_id=1,
+                                             enable=0)
+        self.vapi.sw_interface_set_l2_bridge(self.pg1.sw_if_index,
+                                             bd_id=1,
+                                             enable=0)
+
 if __name__ == '__main__':
     unittest.main(testRunner=VppTestRunner)
diff --git a/test/test_p2p_ethernet.py b/test/test_p2p_ethernet.py
index 37a1d18b..8688f7e6 100644
--- a/test/test_p2p_ethernet.py
+++ b/test/test_p2p_ethernet.py
@@ -11,7 +11,7 @@ from scapy.layers.inet6 import IPv6
 
 from framework import VppTestCase, VppTestRunner, running_extended_tests
 from vpp_sub_interface import VppP2PSubint
-from vpp_ip_route import VppIpRoute, VppRoutePath
+from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
 from util import mactobinary
 
 
@@ -219,7 +219,7 @@ class P2PEthernetIPV6(VppTestCase):
         route_8000 = VppIpRoute(self, "8000::", 64,
                                 [VppRoutePath(self.pg0.remote_ip6,
                                               self.pg0.sw_if_index,
-                                              is_ip6=1)],
+                                              proto=DpoProto.DPO_PROTO_IP6)],
                                 is_ip6=1)
         route_8000.add_vpp_config()
 
@@ -239,7 +239,7 @@ class P2PEthernetIPV6(VppTestCase):
         route_9001 = VppIpRoute(self, "9001::", 64,
                                 [VppRoutePath(self.pg1.remote_ip6,
                                               self.pg1.sw_if_index,
-                                              is_ip6=1)],
+                                              proto=DpoProto.DPO_PROTO_IP6)],
                                 is_ip6=1)
         route_9001.add_vpp_config()
 
@@ -264,7 +264,7 @@ class P2PEthernetIPV6(VppTestCase):
         route_3 = VppIpRoute(self, "9000::", 64,
                              [VppRoutePath(self.pg1._remote_hosts[0].ip6,
                                            self.pg1.sw_if_index,
-                                           is_ip6=1)],
+                                           proto=DpoProto.DPO_PROTO_IP6)],
                              is_ip6=1)
         route_3.add_vpp_config()
 
@@ -289,7 +289,7 @@ class P2PEthernetIPV6(VppTestCase):
         route_9001 = VppIpRoute(self, "9000::", 64,
                                 [VppRoutePath(self.pg1._remote_hosts[0].ip6,
                                               self.pg1.sw_if_index,
-                                              is_ip6=1)],
+                                              proto=DpoProto.DPO_PROTO_IP6)],
                                 is_ip6=1)
         route_9001.add_vpp_config()
 
@@ -310,19 +310,19 @@ class P2PEthernetIPV6(VppTestCase):
         route_8000 = VppIpRoute(self, "8000::", 64,
                                 [VppRoutePath(self.pg0.remote_ip6,
                                               self.pg0.sw_if_index,
-                                              is_ip6=1)],
+                                              proto=DpoProto.DPO_PROTO_IP6)],
                                 is_ip6=1)
         route_8000.add_vpp_config()
         route_8001 = VppIpRoute(self, "8001::", 64,
                                 [VppRoutePath(self.p2p_sub_ifs[0].remote_ip6,
                                               self.p2p_sub_ifs[0].sw_if_index,
-                                              is_ip6=1)],
+                                              proto=DpoProto.DPO_PROTO_IP6)],
                                 is_ip6=1)
         route_8001.add_vpp_config()
         route_8002 = VppIpRoute(self, "8002::", 64,
                                 [VppRoutePath(self.p2p_sub_ifs[1].remote_ip6,
                                               self.p2p_sub_ifs[1].sw_if_index,
-                                              is_ip6=1)],
+                                              proto=DpoProto.DPO_PROTO_IP6)],
                                 is_ip6=1)
         route_8002.add_vpp_config()
 
diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py
index badb3102..2c489e3c 100644
--- a/test/vpp_ip_route.py
+++ b/test/vpp_ip_route.py
@@ -29,6 +29,14 @@ class MRouteEntryFlags:
     MFIB_ENTRY_FLAG_INHERIT_ACCEPT = 8
 
 
+class DpoProto:
+    DPO_PROTO_IP4 = 0
+    DPO_PROTO_IP6 = 1
+    DPO_PROTO_MPLS = 2
+    DPO_PROTO_ETHERNET = 3
+    DPO_PROTO_NSH = 4
+
+
 def find_route(test, ip_addr, len, table_id=0, inet=AF_INET):
     if inet == AF_INET:
         s = 4
@@ -55,22 +63,24 @@ class VppRoutePath(object):
             nh_table_id=0,
             labels=[],
             nh_via_label=MPLS_LABEL_INVALID,
-            is_ip6=0,
             rpf_id=0,
             is_interface_rx=0,
             is_resolve_host=0,
-            is_resolve_attached=0):
+            is_resolve_attached=0,
+            proto=DpoProto.DPO_PROTO_IP4):
         self.nh_itf = nh_sw_if_index
         self.nh_table_id = nh_table_id
         self.nh_via_label = nh_via_label
         self.nh_labels = labels
         self.weight = 1
         self.rpf_id = rpf_id
-        self.is_ip4 = 1 if is_ip6 == 0 else 0
-        if self.is_ip4:
+        self.proto = proto
+        if self.proto is DpoProto.DPO_PROTO_IP6:
+            self.nh_addr = inet_pton(AF_INET6, nh_addr)
+        elif self.proto is DpoProto.DPO_PROTO_IP4:
             self.nh_addr = inet_pton(AF_INET, nh_addr)
         else:
-            self.nh_addr = inet_pton(AF_INET6, nh_addr)
+            self.nh_addr = inet_pton(AF_INET6, "::")
         self.is_resolve_host = is_resolve_host
         self.is_resolve_attached = is_resolve_attached
         self.is_interface_rx = is_interface_rx
@@ -401,7 +411,7 @@ class VppMplsRoute(VppObject):
             self._test.vapi.mpls_route_add_del(
                 self.local_label,
                 self.eos_bit,
-                path.is_ip4,
+                path.proto,
                 path.nh_addr,
                 path.nh_itf,
                 is_multicast=self.is_multicast,
@@ -420,7 +430,7 @@ class VppMplsRoute(VppObject):
         for path in self.paths:
             self._test.vapi.mpls_route_add_del(self.local_label,
                                                self.eos_bit,
-                                               1,
+                                               path.proto,
                                                path.nh_addr,
                                                path.nh_itf,
                                                is_rpf_id=path.is_rpf_id,
diff --git a/test/vpp_mpls_tunnel_interface.py b/test/vpp_mpls_tunnel_interface.py
index f2001574..0542b05c 100644
--- a/test/vpp_mpls_tunnel_interface.py
+++ b/test/vpp_mpls_tunnel_interface.py
@@ -9,13 +9,14 @@ class VppMPLSTunnelInterface(VppInterface):
     VPP MPLS Tunnel interface
     """
 
-    def __init__(self, test, paths, is_multicast=0):
+    def __init__(self, test, paths, is_multicast=0, is_l2=0):
         """ Create MPLS Tunnel interface """
         self._sw_if_index = 0
         super(VppMPLSTunnelInterface, self).__init__(test)
         self._test = test
         self.t_paths = paths
         self.is_multicast = is_multicast
+        self.is_l2 = is_l2
 
     def add_vpp_config(self):
         self._sw_if_index = 0xffffffff
@@ -29,7 +30,8 @@ class VppMPLSTunnelInterface(VppInterface):
                 path.weight,
                 next_hop_out_label_stack=path.nh_labels,
                 next_hop_n_out_labels=len(path.nh_labels),
-                is_multicast=self.is_multicast)
+                is_multicast=self.is_multicast,
+                l2_only=self.is_l2)
             self._sw_if_index = reply.sw_if_index
 
     def remove_vpp_config(self):
diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py
index 801a6c2d..3ba2ad4a 100644
--- a/test/vpp_papi_provider.py
+++ b/test/vpp_papi_provider.py
@@ -921,7 +921,7 @@ class VppPapiProvider(object):
             self,
             label,
             eos,
-            next_hop_proto_is_ip4,
+            next_hop_proto,
             next_hop_address,
             next_hop_sw_if_index=0xFFFFFFFF,
             table_id=0,
@@ -982,7 +982,7 @@ class VppPapiProvider(object):
              'mr_is_resolve_attached': is_resolve_attached,
              'mr_is_interface_rx': is_interface_rx,
              'mr_is_rpf_id': is_rpf_id,
-             'mr_next_hop_proto_is_ip4': next_hop_proto_is_ip4,
+             'mr_next_hop_proto': next_hop_proto,
              'mr_next_hop_weight': next_hop_weight,
              'mr_next_hop': next_hop_address,
              'mr_next_hop_n_out_labels': next_hop_n_out_labels,
-- 
cgit 1.2.3-korg


From 43161a873375ddf156cf6fbe8764bfc206b38fa0 Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Sat, 12 Aug 2017 02:12:00 -0700
Subject: PPPoE usses a midchain adjacency stack on an interface-tx DPO

1) introduce an interface-tx DPO. This is a simple wrapper around a sw_if_index. enhance DPO stacking functions to allow per-instance next-nodes and hence allow children to stack onto the interface per-instance tx node and not on 'interface-output'.
2) update PPPoE code to use ta midchain stack on a interface-tx DPO of the encap-interface. This remove the need for pppoe_encap node (which is replaced by the adj-midchain-tx) and interface-output node is no longer used (see above). Since PPPoE encap node is no longer needed, the PPPoE seesion does not need to be retrieved in the data-path, hence the cahce misses are removed.

Change-Id: Id8b40f53daa14889a9c51d802e14fed7fba4399a
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/plugins/pppoe.am            |  23 +--
 src/plugins/pppoe/pppoe.c       | 144 ++++++++++---
 src/plugins/pppoe/pppoe.h       |   6 -
 src/plugins/pppoe/pppoe_encap.c | 384 ----------------------------------
 src/vnet.am                     |   3 +-
 src/vnet/adj/adj_midchain.c     | 126 ++++++++----
 src/vnet/dpo/dpo.c              | 107 ++++++----
 src/vnet/dpo/dpo.h              |  19 +-
 src/vnet/dpo/interface_dpo.c    | 446 ----------------------------------------
 src/vnet/dpo/interface_dpo.h    |  67 ------
 src/vnet/dpo/interface_rx_dpo.c | 445 +++++++++++++++++++++++++++++++++++++++
 src/vnet/dpo/interface_rx_dpo.h |  69 +++++++
 src/vnet/dpo/interface_tx_dpo.c |  92 +++++++++
 src/vnet/dpo/interface_tx_dpo.h |  33 +++
 src/vnet/fib/fib_path.c         |  14 +-
 src/vnet/fib/fib_test.c         |  14 +-
 test/test_pppoe.py              |   3 +-
 17 files changed, 952 insertions(+), 1043 deletions(-)
 delete mode 100644 src/plugins/pppoe/pppoe_encap.c
 delete mode 100644 src/vnet/dpo/interface_dpo.c
 delete mode 100644 src/vnet/dpo/interface_dpo.h
 create mode 100644 src/vnet/dpo/interface_rx_dpo.c
 create mode 100644 src/vnet/dpo/interface_rx_dpo.h
 create mode 100644 src/vnet/dpo/interface_tx_dpo.c
 create mode 100644 src/vnet/dpo/interface_tx_dpo.h

(limited to 'src/vnet/dpo')

diff --git a/src/plugins/pppoe.am b/src/plugins/pppoe.am
index 28bd20a0..06ed60b4 100644
--- a/src/plugins/pppoe.am
+++ b/src/plugins/pppoe.am
@@ -14,27 +14,26 @@
 vppapitestplugins_LTLIBRARIES += pppoe_test_plugin.la
 vppplugins_LTLIBRARIES += pppoe_plugin.la
 
-pppoe_plugin_la_SOURCES =		\
-    pppoe/pppoe_decap.c		    \
-    pppoe/pppoe_encap.c		    \
-    pppoe/pppoe_tap.c		    \
-    pppoe/pppoe_tap_node.c		\
-    pppoe/pppoe.c		        \
+pppoe_plugin_la_SOURCES =	\
+    pppoe/pppoe_decap.c		\
+    pppoe/pppoe_tap.c		\
+    pppoe/pppoe_tap_node.c	\
+    pppoe/pppoe.c		\
     pppoe/pppoe_api.c
 
-BUILT_SOURCES +=			   \
-    pppoe/pppoe.api.h		    \
+BUILT_SOURCES +=		\
+    pppoe/pppoe.api.h		\
     pppoe/pppoe.api.json
 
 API_FILES += pppoe/pppoe.api
 
 nobase_apiinclude_HEADERS +=	\
-    pppoe/pppoe_all_api_h.h		\
-    pppoe/pppoe_msg_enum.h		\
+    pppoe/pppoe_all_api_h.h	\
+    pppoe/pppoe_msg_enum.h	\
     pppoe/pppoe.api.h
 
-pppoe_test_plugin_la_SOURCES = \
-    pppoe/pppoe_test.c            \
+pppoe_test_plugin_la_SOURCES =	\
+    pppoe/pppoe_test.c		\
     pppoe/pppoe_plugin.api.h
 
 # vi:syntax=automake
diff --git a/src/plugins/pppoe/pppoe.c b/src/plugins/pppoe/pppoe.c
index cb587e29..e09ac7d9 100644
--- a/src/plugins/pppoe/pppoe.c
+++ b/src/plugins/pppoe/pppoe.c
@@ -24,11 +24,13 @@
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/fib/fib_entry.h>
 #include <vnet/fib/fib_table.h>
-#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/interface_tx_dpo.h>
 #include <vnet/plugin/plugin.h>
 #include <vpp/app/version.h>
 #include <vnet/ppp/packet.h>
 #include <pppoe/pppoe.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_mcast.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/bihash_template.c>
@@ -85,7 +87,6 @@ pppoe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
 VNET_DEVICE_CLASS (pppoe_device_class,static) = {
   .name = "PPPPOE",
   .format_device_name = format_pppoe_name,
-  .format_tx_trace = format_pppoe_encap_trace,
   .tx_function = dummy_interface_tx,
   .admin_up_down_function = pppoe_interface_admin_up_down,
 };
@@ -99,27 +100,19 @@ format_pppoe_header_with_length (u8 * s, va_list * args)
   return s;
 }
 
-/* *INDENT-OFF* */
-VNET_HW_INTERFACE_CLASS (pppoe_hw_class) =
-{
-  .name = "PPPPOE",
-  .format_header = format_pppoe_header_with_length,
-  .build_rewrite = default_build_rewrite,
-  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
-};
-/* *INDENT-ON* */
-
-#define foreach_copy_field                      \
-_(session_id)                                   \
-_(encap_if_index)                               \
-_(decap_fib_index)                              \
-_(client_ip)
-
-static void
-eth_pppoe_rewrite (pppoe_session_t * t, bool is_ip6)
+static u8 *
+pppoe_build_rewrite (vnet_main_t * vnm,
+		     u32 sw_if_index,
+		     vnet_link_t link_type, const void *dst_address)
 {
-  u8 *rw = 0;
   int len = sizeof (pppoe_header_t) + sizeof (ethernet_header_t);
+  pppoe_main_t *pem = &pppoe_main;
+  pppoe_session_t *t;
+  u32 session_id;
+  u8 *rw = 0;
+
+  session_id = pem->session_index_by_sw_if_index[sw_if_index];
+  t = pool_elt_at_index (pem->sessions, session_id);
 
   vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
 
@@ -134,21 +127,112 @@ eth_pppoe_rewrite (pppoe_session_t * t, bool is_ip6)
   pppoe->session_id = clib_host_to_net_u16 (t->session_id);
   pppoe->length = 0;		/* To be filled in at run-time */
 
-  if (!is_ip6)
+  switch (link_type)
     {
+    case VNET_LINK_IP4:
       pppoe->ppp_proto = clib_host_to_net_u16 (PPP_PROTOCOL_ip4);
+      break;
+    case VNET_LINK_IP6:
+      pppoe->ppp_proto = clib_host_to_net_u16 (PPP_PROTOCOL_ip6);
+      break;
+    default:
+      break;
     }
-  else
+
+  return rw;
+}
+
+/**
+ * @brief Fixup the adj rewrite post encap. Insert the packet's length
+ */
+static void
+pppoe_fixup (vlib_main_t * vm, ip_adjacency_t * adj, vlib_buffer_t * b0)
+{
+  pppoe_header_t *pppoe0;
+
+  pppoe0 = vlib_buffer_get_current (b0);
+
+  pppoe0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+					 - sizeof (pppoe_header_t)
+					 - sizeof (ethernet_header_t));
+}
+
+static void
+pppoe_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
+{
+  pppoe_main_t *pem = &pppoe_main;
+  dpo_id_t dpo = DPO_INVALID;
+  ip_adjacency_t *adj;
+  pppoe_session_t *t;
+  u32 session_id;
+
+  ASSERT (ADJ_INDEX_INVALID != ai);
+
+  adj = adj_get (ai);
+
+  switch (adj->lookup_next_index)
     {
-      pppoe->ppp_proto = clib_host_to_net_u16 (PPP_PROTOCOL_ip6);
+    case IP_LOOKUP_NEXT_ARP:
+    case IP_LOOKUP_NEXT_GLEAN:
+      adj_nbr_midchain_update_rewrite (ai, pppoe_fixup,
+				       ADJ_FLAG_NONE,
+				       pppoe_build_rewrite (vnm,
+							    sw_if_index,
+							    adj->ia_link,
+							    NULL));
+      break;
+    case IP_LOOKUP_NEXT_MCAST:
+      /*
+       * Construct a partial rewrite from the known ethernet mcast dest MAC
+       * There's no MAC fixup, so the last 2 parameters are 0
+       */
+      adj_mcast_midchain_update_rewrite (ai, pppoe_fixup,
+					 ADJ_FLAG_NONE,
+					 pppoe_build_rewrite (vnm,
+							      sw_if_index,
+							      adj->ia_link,
+							      NULL), 0, 0);
+      break;
+
+    case IP_LOOKUP_NEXT_DROP:
+    case IP_LOOKUP_NEXT_PUNT:
+    case IP_LOOKUP_NEXT_LOCAL:
+    case IP_LOOKUP_NEXT_REWRITE:
+    case IP_LOOKUP_NEXT_MIDCHAIN:
+    case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+    case IP_LOOKUP_NEXT_ICMP_ERROR:
+    case IP_LOOKUP_N_NEXT:
+      ASSERT (0);
+      break;
     }
 
-  t->rewrite = rw;
-  _vec_len (t->rewrite) = len;
+  session_id = pem->session_index_by_sw_if_index[sw_if_index];
+  t = pool_elt_at_index (pem->sessions, session_id);
+  interface_tx_dpo_add_or_lock (vnet_link_to_dpo_proto (adj->ia_link),
+				t->encap_if_index, &dpo);
+
+  adj_nbr_midchain_stack (ai, &dpo);
 
-  return;
+  dpo_reset (&dpo);
 }
 
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (pppoe_hw_class) =
+{
+  .name = "PPPPOE",
+  .format_header = format_pppoe_header_with_length,
+  .build_rewrite = pppoe_build_rewrite,
+  .update_adjacency = pppoe_update_adj,
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+/* *INDENT-ON* */
+
+#define foreach_copy_field                      \
+_(session_id)                                   \
+_(encap_if_index)                               \
+_(decap_fib_index)                              \
+_(client_ip)
+
 static bool
 pppoe_decap_next_is_valid (pppoe_main_t * pem, u32 is_ip6,
 			   u32 decap_fib_index)
@@ -231,8 +315,6 @@ int vnet_pppoe_add_del_session
 
       clib_memcpy (t->client_mac, a->client_mac, 6);
 
-      eth_pppoe_rewrite (t, is_ip6);
-
       /* update pppoe fib with session_index */
       result.fields.session_index = t - pem->sessions;
       pppoe_update_1 (&pem->session_table,
@@ -285,9 +367,6 @@ int vnet_pppoe_add_del_session
       vnet_sw_interface_set_flags (vnm, sw_if_index,
 				   VNET_SW_INTERFACE_FLAG_ADMIN_UP);
 
-      /* Set pppoe session output node */
-      hi->output_node_index = pppoe_encap_node.index;
-
       /* add reverse route for client ip */
       fib_table_entry_path_add (a->decap_fib_index, &pfx,
 				FIB_SOURCE_PLUGIN_HI, FIB_ENTRY_FLAG_NONE,
@@ -328,7 +407,6 @@ int vnet_pppoe_add_del_session
 				   sw_if_index, ~0, 1,
 				   FIB_ROUTE_PATH_FLAG_NONE);
 
-      vec_free (t->rewrite);
       pool_put (pem->sessions, t);
     }
 
diff --git a/src/plugins/pppoe/pppoe.h b/src/plugins/pppoe/pppoe.h
index 37d628eb..b06c068f 100644
--- a/src/plugins/pppoe/pppoe.h
+++ b/src/plugins/pppoe/pppoe.h
@@ -48,9 +48,6 @@ typedef struct
 
 typedef struct
 {
-  /* Rewrite string */
-  u8 *rewrite;
-
   /* pppoe session_id in HOST byte order */
   u16 session_id;
 
@@ -177,11 +174,8 @@ typedef struct
 extern pppoe_main_t pppoe_main;
 
 extern vlib_node_registration_t pppoe_input_node;
-extern vlib_node_registration_t pppoe_encap_node;
 extern vlib_node_registration_t pppoe_tap_dispatch_node;
 
-u8 *format_pppoe_encap_trace (u8 * s, va_list * args);
-
 typedef struct
 {
   u8 is_add;
diff --git a/src/plugins/pppoe/pppoe_encap.c b/src/plugins/pppoe/pppoe_encap.c
deleted file mode 100644
index 69bec61d..00000000
--- a/src/plugins/pppoe/pppoe_encap.c
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Copyright (c) 2017 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <pppoe/pppoe.h>
-
-/* Statistics (not all errors) */
-#define foreach_pppoe_encap_error    \
-_(ENCAPSULATED, "good packets encapsulated")
-
-static char * pppoe_encap_error_strings[] = {
-#define _(sym,string) string,
-  foreach_pppoe_encap_error
-#undef _
-};
-
-typedef enum {
-#define _(sym,str) PPPOE_ENCAP_ERROR_##sym,
-    foreach_pppoe_encap_error
-#undef _
-    PPPOE_ENCAP_N_ERROR,
-} pppoe_encap_error_t;
-
-#define foreach_pppoe_encap_next       \
-_(DROP, "error-drop")                  \
-_(INTERFACE, "interface-output" )      \
-
-typedef enum
-{
-#define _(s,n) PPPOE_ENCAP_NEXT_##s,
-  foreach_pppoe_encap_next
-#undef _
-    PPPOE_ENCAP_N_NEXT,
-} pppoe_encap_next_t;
-
-typedef struct {
-  u32 session_index;
-  u32 session_id;
-} pppoe_encap_trace_t;
-
-u8 * format_pppoe_encap_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  pppoe_encap_trace_t * t
-      = va_arg (*args, pppoe_encap_trace_t *);
-
-  s = format (s, "PPPOE encap to pppoe_session%d session_id %d",
-	      t->session_index, t->session_id);
-  return s;
-}
-
-
-#define foreach_fixed_header2_offset            \
-        _(0) _(1)
-
-
-static uword
-pppoe_encap (vlib_main_t * vm,
-	      vlib_node_runtime_t * node,
-	      vlib_frame_t * from_frame)
-{
-  u32 n_left_from, next_index, * from, * to_next;
-  pppoe_main_t * pem = &pppoe_main;
-  vnet_main_t * vnm = pem->vnet_main;
-  vnet_interface_main_t * im = &vnm->interface_main;
-  u32 pkts_encapsulated = 0;
-  u32 thread_index = vlib_get_thread_index();
-  u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
-  u32 sw_if_index0 = 0, sw_if_index1 = 0;
-  u32 next0 = 0, next1 = 0;
-  pppoe_session_t * t0 = NULL, * t1 = NULL;
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-  stats_sw_if_index = node->runtime_data[0];
-  stats_n_packets = stats_n_bytes = 0;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index,
-			   to_next, n_left_to_next);
-
-      while (n_left_from >= 4 && n_left_to_next >= 2)
-	{
-          u32 bi0, bi1;
-	  vlib_buffer_t * b0, * b1;
-	  u32 len0, len1;
-	  ethernet_header_t * eth0, * eth1;
-          pppoe_header_t * pppoe0, * pppoe1;
-          u64 * copy_src0, * copy_dst0;
-          u64 * copy_src1, * copy_dst1;
-          u16 * copy_src_last0, * copy_dst_last0;
-          u16 * copy_src_last1, * copy_dst_last1;
-          u16 new_l0, new_l1;
-          u32 session_id0, session_id1;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t * p2, * p3;
-
-	    p2 = vlib_get_buffer (vm, from[2]);
-	    p3 = vlib_get_buffer (vm, from[3]);
-
-	    vlib_prefetch_buffer_header (p2, LOAD);
-	    vlib_prefetch_buffer_header (p3, LOAD);
-
-	    CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
-	    CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
-	  }
-
-	  bi0 = from[0];
-	  bi1 = from[1];
-	  to_next[0] = bi0;
-	  to_next[1] = bi1;
-	  from += 2;
-	  to_next += 2;
-	  n_left_to_next -= 2;
-	  n_left_from -= 2;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-
-	  /* Get next node index and if-index from session */
-	  sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
-	  session_id0 = pem->session_index_by_sw_if_index[sw_if_index0];
-	  t0 = pool_elt_at_index(pem->sessions, session_id0);
-	  next0 = PPPOE_ENCAP_NEXT_INTERFACE;
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_if_index;
-
-          /* Get next node index and if-index from session */
-	  sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
-	  session_id1 = pem->session_index_by_sw_if_index[sw_if_index1];
-	  t1 = pool_elt_at_index(pem->sessions, session_id1);
-	  next1 = PPPOE_ENCAP_NEXT_INTERFACE;
-          vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_if_index;
-
-          /* Apply the rewrite string. $$$$ vnet_rewrite? */
-          vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
-          vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite));
-
-          eth0 = (ethernet_header_t *)(vlib_buffer_get_current(b0));
-          eth1 = (ethernet_header_t *)(vlib_buffer_get_current(b1));
-
-	  /* Copy the fixed header */
-	  copy_dst0 = (u64 *) eth0;
-	  copy_src0 = (u64 *) t0->rewrite;
-	  copy_dst1 = (u64 *) eth1;
-	  copy_src1 = (u64 *) t1->rewrite;
-	  /* Copy first 8-bytes at a time */
-#define _(offs) copy_dst0[offs] = copy_src0[offs];
-	  foreach_fixed_header2_offset;
-#undef _
-	  /* Last 6 octets. Hopefully gcc will be our friend */
-          copy_dst_last0 = (u16 *)(&copy_dst0[2]);
-          copy_src_last0 = (u16 *)(&copy_src0[2]);
-          copy_dst_last0[0] = copy_src_last0[0];
-          copy_dst_last0[1] = copy_src_last0[1];
-          copy_dst_last0[2] = copy_src_last0[2];
-
-#define _(offs) copy_dst1[offs] = copy_src1[offs];
-	  foreach_fixed_header2_offset;
-#undef _
-	  /* Last 6 octets. Hopefully gcc will be our friend */
-          copy_dst_last1 = (u16 *)(&copy_dst1[2]);
-          copy_src_last1 = (u16 *)(&copy_src1[2]);
-          copy_dst_last1[0] = copy_src_last1[0];
-          copy_dst_last1[1] = copy_src_last1[1];
-          copy_dst_last1[2] = copy_src_last1[2];
-
-          /* Fix PPPoE length */
-	  new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
-					 - sizeof (*pppoe0) - sizeof(*eth0));
-	  pppoe0 = (pppoe_header_t *)(eth0 + 1);
-	  pppoe0->length = new_l0;
-
-	  new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
-					 - sizeof (*pppoe1) - sizeof(*eth1));
-	  pppoe1 = (pppoe_header_t *)(eth1 + 1);
-	  pppoe1->length = new_l1;
-
-          pkts_encapsulated += 2;
- 	  len0 = vlib_buffer_length_in_chain (vm, b0);
- 	  len1 = vlib_buffer_length_in_chain (vm, b1);
-	  stats_n_packets += 2;
-	  stats_n_bytes += len0 + len1;
-
-	  /* Batch stats increment on the same pppoe session so counter is not
-	     incremented per packet. Note stats are still incremented for deleted
-	     and admin-down session where packets are dropped. It is not worthwhile
-	     to check for this rare case and affect normal path performance. */
-	  if (PREDICT_FALSE ((sw_if_index0 != stats_sw_if_index) ||
-			     (sw_if_index1 != stats_sw_if_index)))
-	    {
-	      stats_n_packets -= 2;
-	      stats_n_bytes -= len0 + len1;
-	      if (sw_if_index0 == sw_if_index1)
-	        {
-		  if (stats_n_packets)
-		    vlib_increment_combined_counter
-		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       thread_index, stats_sw_if_index,
-		       stats_n_packets, stats_n_bytes);
-		  stats_sw_if_index = sw_if_index0;
-		  stats_n_packets = 2;
-		  stats_n_bytes = len0 + len1;
-	        }
-	      else
-	        {
-		  vlib_increment_combined_counter
-		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       thread_index, sw_if_index0, 1, len0);
-		  vlib_increment_combined_counter
-		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       thread_index, sw_if_index1, 1, len1);
-		}
-	    }
-
-	  if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-            {
-              pppoe_encap_trace_t *tr =
-                vlib_add_trace (vm, node, b0, sizeof (*tr));
-              tr->session_index = t0 - pem->sessions;
-              tr->session_id = t0->session_id;
-           }
-
-          if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
-            {
-              pppoe_encap_trace_t *tr =
-                vlib_add_trace (vm, node, b1, sizeof (*tr));
-              tr->session_index = t1 - pem->sessions;
-              tr->session_id = t1->session_id;
-            }
-
-	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, bi1, next0, next1);
-	}
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t * b0;
-	  ethernet_header_t * eth0;
-          pppoe_header_t * pppoe0;
-          u64 * copy_src0, * copy_dst0;
-          u16 * copy_src_last0, * copy_dst_last0;
-          u16 new_l0;
-          u32 len0;
-          u32 session_id0;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-
-	  /* Get next node index and if-index from session */
-	  sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
-	  session_id0 = pem->session_index_by_sw_if_index[sw_if_index0];
-	  t0 = pool_elt_at_index(pem->sessions, session_id0);
-	  next0 = PPPOE_ENCAP_NEXT_INTERFACE;
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_if_index;
-
-          /* Apply the rewrite string. $$$$ vnet_rewrite? */
-          vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
-
-          eth0 = (ethernet_header_t *)(vlib_buffer_get_current(b0));
-	  /* Copy the fixed header */
-	  copy_dst0 = (u64 *) eth0;
-	  copy_src0 = (u64 *) t0->rewrite;
-
-	  /* Copy first 8-bytes at a time */
-#define _(offs) copy_dst0[offs] = copy_src0[offs];
-	  foreach_fixed_header2_offset;
-#undef _
-	  /* Last 6 octets. Hopefully gcc will be our friend */
-          copy_dst_last0 = (u16 *)(&copy_dst0[2]);
-          copy_src_last0 = (u16 *)(&copy_src0[2]);
-          copy_dst_last0[0] = copy_src_last0[0];
-          copy_dst_last0[1] = copy_src_last0[1];
-          copy_dst_last0[2] = copy_src_last0[2];
-
-          /* Fix PPPoE length */
-	  new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
-					 - sizeof (*pppoe0) - sizeof(*eth0));
-	  pppoe0 = (pppoe_header_t *)(eth0 + 1);
-	  pppoe0->length = new_l0;
-
-          pkts_encapsulated ++;
-	  len0 = vlib_buffer_length_in_chain (vm, b0);
-	  stats_n_packets += 1;
-	  stats_n_bytes += len0;
-
-	  /* Batch stats increment on the same pppoe session so counter is not
-	     incremented per packet. Note stats are still incremented for deleted
-	     and admin-down session where packets are dropped. It is not worthwhile
-	     to check for this rare case and affect normal path performance. */
-	  if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
-	    {
-	      stats_n_packets -= 1;
-	      stats_n_bytes -= len0;
-	      if (stats_n_packets)
-		vlib_increment_combined_counter
-		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		   thread_index, stats_sw_if_index,
-		   stats_n_packets, stats_n_bytes);
-	      stats_n_packets = 1;
-	      stats_n_bytes = len0;
-	      stats_sw_if_index = sw_if_index0;
-	    }
-
-          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-            {
-              pppoe_encap_trace_t *tr =
-                vlib_add_trace (vm, node, b0, sizeof (*tr));
-              tr->session_index = t0 - pem->sessions;
-              tr->session_id = t0->session_id;
-            }
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  /* Do we still need this now that session tx stats is kept? */
-  vlib_node_increment_counter (vm, node->node_index,
-                               PPPOE_ENCAP_ERROR_ENCAPSULATED,
-                               pkts_encapsulated);
-
-  /* Increment any remaining batch stats */
-  if (stats_n_packets)
-    {
-      vlib_increment_combined_counter
-	(im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
-      node->runtime_data[0] = stats_sw_if_index;
-    }
-
-  return from_frame->n_vectors;
-}
-
-VLIB_REGISTER_NODE (pppoe_encap_node) = {
-  .function = pppoe_encap,
-  .name = "pppoe-encap",
-  .vector_size = sizeof (u32),
-  .format_trace = format_pppoe_encap_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = ARRAY_LEN(pppoe_encap_error_strings),
-  .error_strings = pppoe_encap_error_strings,
-  .n_next_nodes = PPPOE_ENCAP_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [PPPOE_ENCAP_NEXT_##s] = n,
-    foreach_pppoe_encap_next
-#undef _
-  },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (pppoe_encap_node, pppoe_encap)
-
diff --git a/src/vnet.am b/src/vnet.am
index 9821069a..182ec1f4 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -1011,7 +1011,8 @@ libvnet_la_SOURCES +=				\
   vnet/dpo/lookup_dpo.c   			\
   vnet/dpo/classify_dpo.c   			\
   vnet/dpo/replicate_dpo.c   			\
-  vnet/dpo/interface_dpo.c   			\
+  vnet/dpo/interface_rx_dpo.c  			\
+  vnet/dpo/interface_tx_dpo.c  			\
   vnet/dpo/mpls_disposition.c   		\
   vnet/dpo/mpls_label_dpo.c
 
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index a93a1c3e..e9a510b0 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -65,8 +65,7 @@ adj_midchain_tx_inline (vlib_main_t * vm,
 	/* set up to enqueue to our disposition with index = next_index */
 	vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
-
-	while (n_left_from >= 4 && n_left_to_next > 2)
+	while (n_left_from >= 8 && n_left_to_next > 4)
 	{
 	    u32 bi0, adj_index0, next0;
 	    const ip_adjacency_t * adj0;
@@ -76,49 +75,75 @@ adj_midchain_tx_inline (vlib_main_t * vm,
 	    const ip_adjacency_t * adj1;
 	    const dpo_id_t *dpo1;
 	    vlib_buffer_t * b1;
+	    u32 bi2, adj_index2, next2;
+	    const ip_adjacency_t * adj2;
+	    const dpo_id_t *dpo2;
+	    vlib_buffer_t * b2;
+	    u32 bi3, adj_index3, next3;
+	    const ip_adjacency_t * adj3;
+	    const dpo_id_t *dpo3;
+	    vlib_buffer_t * b3;
 
 	    /* Prefetch next iteration. */
 	    {
-		vlib_buffer_t * p2, * p3;
-
-		p2 = vlib_get_buffer (vm, from[2]);
-		p3 = vlib_get_buffer (vm, from[3]);
-
-		vlib_prefetch_buffer_header (p2, LOAD);
-		vlib_prefetch_buffer_header (p3, LOAD);
-
-		CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
-		CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+		vlib_buffer_t * p4, * p5;
+		vlib_buffer_t * p6, * p7;
+
+		p4 = vlib_get_buffer (vm, from[4]);
+		p5 = vlib_get_buffer (vm, from[5]);
+		p6 = vlib_get_buffer (vm, from[6]);
+		p7 = vlib_get_buffer (vm, from[7]);
+
+		vlib_prefetch_buffer_header (p4, LOAD);
+		vlib_prefetch_buffer_header (p5, LOAD);
+		vlib_prefetch_buffer_header (p6, LOAD);
+		vlib_prefetch_buffer_header (p7, LOAD);
 	    }
 
 	    bi0 = from[0];
 	    to_next[0] = bi0;
 	    bi1 = from[1];
 	    to_next[1] = bi1;
+	    bi2 = from[2];
+	    to_next[2] = bi2;
+	    bi3 = from[3];
+	    to_next[3] = bi3;
 
-	    from += 2;
-	    to_next += 2;
-	    n_left_from -= 2;
-	    n_left_to_next -= 2;
+	    from += 4;
+	    to_next += 4;
+	    n_left_from -= 4;
+	    n_left_to_next -= 4;
 
 	    b0 = vlib_get_buffer(vm, bi0);
 	    b1 = vlib_get_buffer(vm, bi1);
+	    b2 = vlib_get_buffer(vm, bi2);
+	    b3 = vlib_get_buffer(vm, bi3);
 
 	    /* Follow the DPO on which the midchain is stacked */
 	    adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
 	    adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+	    adj_index2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX];
+	    adj_index3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX];
 
 	    adj0 = adj_get(adj_index0);
 	    adj1 = adj_get(adj_index1);
+	    adj2 = adj_get(adj_index2);
+	    adj3 = adj_get(adj_index3);
 
 	    dpo0 = &adj0->sub_type.midchain.next_dpo;
 	    dpo1 = &adj1->sub_type.midchain.next_dpo;
+	    dpo2 = &adj2->sub_type.midchain.next_dpo;
+	    dpo3 = &adj3->sub_type.midchain.next_dpo;
 
 	    next0 = dpo0->dpoi_next_node;
 	    next1 = dpo1->dpoi_next_node;
+	    next2 = dpo2->dpoi_next_node;
+	    next3 = dpo3->dpoi_next_node;
 
-	    vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+            vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+            vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+            vnet_buffer(b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+            vnet_buffer(b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
 
 	    if (interface_count)
 	    {
@@ -134,6 +159,18 @@ adj_midchain_tx_inline (vlib_main_t * vm,
 						 adj1->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b1));
+		vlib_increment_combined_counter (im->combined_sw_if_counters
+						 + VNET_INTERFACE_COUNTER_TX,
+						 thread_index,
+						 adj2->rewrite_header.sw_if_index,
+						 1,
+						 vlib_buffer_length_in_chain (vm, b2));
+		vlib_increment_combined_counter (im->combined_sw_if_counters
+						 + VNET_INTERFACE_COUNTER_TX,
+						 thread_index,
+						 adj3->rewrite_header.sw_if_index,
+						 1,
+						 vlib_buffer_length_in_chain (vm, b3));
 	    }
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -148,11 +185,23 @@ adj_midchain_tx_inline (vlib_main_t * vm,
 							      b1, sizeof (*tr));
 		tr->ai = adj_index1;
 	    }
+	    if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+							      b2, sizeof (*tr));
+		tr->ai = adj_index2;
+	    }
+	    if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+		adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+							      b3, sizeof (*tr));
+		tr->ai = adj_index3;
+	    }
 
-	    vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+	    vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
 					     to_next, n_left_to_next,
-					     bi0, bi1,
-					     next0, next1);
+					     bi0, bi1, bi2, bi3,
+					     next0, next1, next2, next3);
 	}
 	while (n_left_from > 0 && n_left_to_next > 0)
 	{
@@ -175,7 +224,7 @@ adj_midchain_tx_inline (vlib_main_t * vm,
 	    adj0 = adj_get(adj_index0);
 	    dpo0 = &adj0->sub_type.midchain.next_dpo;
 	    next0 = dpo0->dpoi_next_node;
-	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+            vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	    if (interface_count)
 	    {
@@ -392,6 +441,17 @@ adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
             adj_midchain_tx_node.index);
 }
 
+static u32
+adj_nbr_midchain_get_feature_node (ip_adjacency_t *adj)
+{
+    if (adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT)
+    {
+        return (adj_midchain_tx_no_count_feature_node[adj->ia_link]);
+    }
+
+    return (adj_midchain_tx_feature_node[adj->ia_link]);
+}
+
 /**
  * adj_midchain_setup
  *
@@ -414,10 +474,7 @@ adj_midchain_setup (adj_index_t adj_index,
     adj->ia_flags |= flags;
 
     arc_index = adj_midchain_get_feature_arc_index_for_link_type (adj);
-    feature_index = (flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ?
-                    adj_midchain_tx_no_count_feature_node[adj->ia_link] :
-                    adj_midchain_tx_feature_node[adj->ia_link];
-
+    feature_index = adj_nbr_midchain_get_feature_node(adj);
     tx_node = adj_nbr_midchain_get_tx_node(adj);
 
     vnet_feature_enable_disable_with_index (arc_index, feature_index,
@@ -432,8 +489,8 @@ adj_midchain_setup (adj_index_t adj_index,
      * need to get to the stacked child's node.
      */
     dpo_stack_from_node(tx_node,
-			&adj->sub_type.midchain.next_dpo,
-			drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+                        &adj->sub_type.midchain.next_dpo,
+                        drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
 }
 
 /**
@@ -495,10 +552,9 @@ adj_nbr_midchain_unstack (adj_index_t adj_index)
      * stack on the drop
      */
     dpo_stack(DPO_ADJACENCY_MIDCHAIN,
-	      vnet_link_to_dpo_proto(adj->ia_link),
-	      &adj->sub_type.midchain.next_dpo,
-	      drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
-
+              vnet_link_to_dpo_proto(adj->ia_link),
+              &adj->sub_type.midchain.next_dpo,
+              drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
     CLIB_MEMORY_BARRIER();
 }
 
@@ -537,9 +593,9 @@ format_adj_midchain (u8* s, va_list *ap)
 		format_vnet_rewrite,
 		&adj->rewrite_header, sizeof (adj->rewrite_data), indent);
     s = format (s, "\n%Ustacked-on:\n%U%U",
-		format_white_space, indent,
-		format_white_space, indent+2,
-		format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
+                format_white_space, indent,
+                format_white_space, indent+2,
+                format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
 
     return (s);
 }
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index aa770838..bd18b66b 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -37,7 +37,8 @@
 #include <vnet/dpo/classify_dpo.h>
 #include <vnet/dpo/ip_null_dpo.h>
 #include <vnet/dpo/replicate_dpo.h>
-#include <vnet/dpo/interface_dpo.h>
+#include <vnet/dpo/interface_rx_dpo.h>
+#include <vnet/dpo/interface_tx_dpo.h>
 #include <vnet/dpo/mpls_disposition.h>
 
 /**
@@ -275,6 +276,29 @@ dpo_is_adj (const dpo_id_t *dpo)
 	    (dpo->dpoi_type == DPO_ADJACENCY_GLEAN));
 }
 
+static u32 *
+dpo_default_get_next_node (const dpo_id_t *dpo)
+{
+    u32 *node_indices = NULL;
+    const char *node_name;
+    u32 ii = 0;
+
+    node_name = dpo_nodes[dpo->dpoi_type][dpo->dpoi_proto][ii];
+    while (NULL != node_name)
+    {
+        vlib_node_t *node;
+
+        node = vlib_get_node_by_name(vlib_get_main(), (u8*) node_name);
+        ASSERT(NULL != node);
+        vec_add1(node_indices, node->index);
+
+        ++ii;
+        node_name = dpo_nodes[dpo->dpoi_type][dpo->dpoi_proto][ii];
+    }
+
+    return (node_indices);
+}
+
 void
 dpo_register (dpo_type_t type,
 	      const dpo_vft_t *vft,
@@ -282,6 +306,10 @@ dpo_register (dpo_type_t type,
 {
     vec_validate(dpo_vfts, type);
     dpo_vfts[type] = *vft;
+    if (NULL == dpo_vfts[type].dv_get_next_node)
+    {
+        dpo_vfts[type].dv_get_next_node = dpo_default_get_next_node;
+    }
 
     vec_validate(dpo_nodes, type);
     dpo_nodes[type] = nodes;
@@ -340,24 +368,25 @@ dpo_get_next_node (dpo_type_t child_type,
      */
     if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto])
     {
-        vlib_node_t *parent_node, *child_node;
+        vlib_node_t *child_node;
+        u32 *parent_indices;
         vlib_main_t *vm;
-        u32 edge ,pp, cc;
+        u32 edge, *pi, cc;
 
         vm = vlib_get_main();
 
-        vlib_worker_thread_barrier_sync(vm);
-
+        ASSERT(NULL != dpo_vfts[parent_type].dv_get_next_node);
         ASSERT(NULL != dpo_nodes[child_type]);
         ASSERT(NULL != dpo_nodes[child_type][child_proto]);
-        ASSERT(NULL != dpo_nodes[parent_type]);
-        ASSERT(NULL != dpo_nodes[parent_type][parent_proto]);
 
         cc = 0;
+        parent_indices = dpo_vfts[parent_type].dv_get_next_node(parent_dpo);
+
+        vlib_worker_thread_barrier_sync(vm);
 
         /*
-         * create a graph arc from each of the parent's registered node types,
-         * to each of the childs.
+         * create a graph arc from each of the child's registered node types,
+         * to each of the parent's.
          */
         while (NULL != dpo_nodes[child_type][child_proto][cc])
         {
@@ -365,17 +394,9 @@ dpo_get_next_node (dpo_type_t child_type,
                 vlib_get_node_by_name(vm,
                                       (u8*) dpo_nodes[child_type][child_proto][cc]);
 
-            pp = 0;
-
-            while (NULL != dpo_nodes[parent_type][parent_proto][pp])
+            vec_foreach(pi, parent_indices)
             {
-                parent_node =
-                    vlib_get_node_by_name(vm,
-                                          (u8*) dpo_nodes[parent_type][parent_proto][pp]);
-
-                edge = vlib_node_add_next(vm,
-                                          child_node->index,
-                                          parent_node->index);
+                edge = vlib_node_add_next(vm, child_node->index, *pi);
 
                 if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto])
                 {
@@ -385,12 +406,12 @@ dpo_get_next_node (dpo_type_t child_type,
                 {
                     ASSERT(dpo_edges[child_type][child_proto][parent_type][parent_proto] == edge);
                 }
-                pp++;
             }
             cc++;
         }
 
         vlib_worker_thread_barrier_release(vm);
+        vec_free(parent_indices);
     }
 
     return (dpo_edges[child_type][child_proto][parent_type][parent_proto]);
@@ -451,38 +472,39 @@ dpo_stack_from_node (u32 child_node_index,
                      dpo_id_t *dpo,
                      const dpo_id_t *parent)
 {
-    dpo_proto_t parent_proto;
-    vlib_node_t *parent_node;
     dpo_type_t parent_type;
+    u32 *parent_indices;
     vlib_main_t *vm;
-    u32 edge;
+    u32 edge, *pi;
 
+    edge = 0;
     parent_type = parent->dpoi_type;
-    parent_proto = parent->dpoi_proto;
-
     vm = vlib_get_main();
 
-    ASSERT(NULL != dpo_nodes[parent_type]);
-    ASSERT(NULL != dpo_nodes[parent_type][parent_proto]);
+    ASSERT(NULL != dpo_vfts[parent_type].dv_get_next_node);
+    parent_indices = dpo_vfts[parent_type].dv_get_next_node(parent);
+    ASSERT(parent_indices);
 
-    parent_node =
-        vlib_get_node_by_name(vm, (u8*) dpo_nodes[parent_type][parent_proto][0]);
-
-    edge = vlib_node_get_next(vm,
-                              child_node_index,
-                              parent_node->index);
-
-    if (~0 == edge)
+    /*
+     * This loop is purposefully written with the worker thread lock in the
+     * inner loop because;
+     *  1) the likelihood that the edge does not exist is smaller
+     *  2) the likelihood there is more than one node is even smaller
+     * so we are optimising for not need to take the lock
+     */
+    vec_foreach(pi, parent_indices)
     {
-        vlib_worker_thread_barrier_sync(vm);
+        edge = vlib_node_get_next(vm, child_node_index, *pi);
 
-        edge = vlib_node_add_next(vm,
-                                  child_node_index,
-                                  parent_node->index);
+        if (~0 == edge)
+        {
+            vlib_worker_thread_barrier_sync(vm);
 
-        vlib_worker_thread_barrier_release(vm);
-    }
+            edge = vlib_node_add_next(vm, child_node_index, *pi);
 
+            vlib_worker_thread_barrier_release(vm);
+        }
+    }
     dpo_stack_i(edge, dpo, parent);
 }
 
@@ -498,7 +520,8 @@ dpo_module_init (vlib_main_t * vm)
     lookup_dpo_module_init();
     ip_null_dpo_module_init();
     replicate_module_init();
-    interface_dpo_module_init();
+    interface_rx_dpo_module_init();
+    interface_tx_dpo_module_init();
     mpls_disp_dpo_module_init();
 
     return (NULL);
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
index 42fc51d4..33562968 100644
--- a/src/vnet/dpo/dpo.h
+++ b/src/vnet/dpo/dpo.h
@@ -112,7 +112,8 @@ typedef enum dpo_type_t_ {
     DPO_MPLS_LABEL,
     DPO_MPLS_DISPOSITION,
     DPO_MFIB_ENTRY,
-    DPO_INTERFACE,
+    DPO_INTERFACE_RX,
+    DPO_INTERFACE_TX,
     DPO_LAST,
 } __attribute__((packed)) dpo_type_t;
 
@@ -138,7 +139,8 @@ typedef enum dpo_type_t_ {
     [DPO_MPLS_LABEL] = "dpo-mpls-label", \
     [DPO_MPLS_DISPOSITION] = "dpo-mpls-diposition", \
     [DPO_MFIB_ENTRY] = "dpo-mfib_entry", \
-    [DPO_INTERFACE] = "dpo-interface"	\
+    [DPO_INTERFACE_RX] = "dpo-interface-rx",	\
+    [DPO_INTERFACE_TX] = "dpo-interface-tx"	\
 }
 
 /**
@@ -331,6 +333,12 @@ typedef void (*dpo_unlock_fn_t)(dpo_id_t *dpo);
  */
 typedef void (*dpo_mem_show_t)(void);
 
+/**
+ * @brief Given a DPO instance return a vector of node indices that
+ * the type/instance will use.
+ */
+typedef u32* (*dpo_get_next_node_t)(const dpo_id_t *dpo);
+
 /**
  * @brief A virtual function table regisitered for a DPO type
  */
@@ -352,6 +360,13 @@ typedef struct dpo_vft_t_
      * A show memory usage function
      */
     dpo_mem_show_t dv_mem_show;
+    /**
+     * A function to get the next VLIB node given an instance
+     * of the DPO. If this is null, then the node's name MUST be
+     * retreiveable from the nodes names array passed in the register
+     * function
+     */
+    dpo_get_next_node_t dv_get_next_node;
 } dpo_vft_t;
 
 
diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c
deleted file mode 100644
index 780bfa2a..00000000
--- a/src/vnet/dpo/interface_dpo.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/dpo/interface_dpo.h>
-#include <vnet/fib/fib_node.h>
-
-/*
- * The 'DB' of interface DPOs.
- * There is only one  per-interface per-protocol, so this is a per-interface
- * vector
- */
-static index_t *interface_dpo_db[DPO_PROTO_NUM];
-
-static interface_dpo_t *
-interface_dpo_alloc (void)
-{
-    interface_dpo_t *ido;
-
-    pool_get(interface_dpo_pool, ido);
-
-    return (ido);
-}
-
-static inline interface_dpo_t *
-interface_dpo_get_from_dpo (const dpo_id_t *dpo)
-{
-    ASSERT(DPO_INTERFACE == dpo->dpoi_type);
-
-    return (interface_dpo_get(dpo->dpoi_index));
-}
-
-static inline index_t
-interface_dpo_get_index (interface_dpo_t *ido)
-{
-    return (ido - interface_dpo_pool);
-}
-
-static void
-interface_dpo_lock (dpo_id_t *dpo)
-{
-    interface_dpo_t *ido;
-
-    ido = interface_dpo_get_from_dpo(dpo);
-    ido->ido_locks++;
-}
-
-static void
-interface_dpo_unlock (dpo_id_t *dpo)
-{
-    interface_dpo_t *ido;
-
-    ido = interface_dpo_get_from_dpo(dpo);
-    ido->ido_locks--;
-
-    if (0 == ido->ido_locks)
-    {
-	interface_dpo_db[ido->ido_proto][ido->ido_sw_if_index] =
-            INDEX_INVALID;
-        pool_put(interface_dpo_pool, ido);
-    }
-}
-
-/*
- * interface_dpo_add_or_lock
- *
- * Add/create and lock a new or lock an existing for the interface DPO
- * on the interface and protocol given
- */
-void
-interface_dpo_add_or_lock (dpo_proto_t proto,
-                           u32 sw_if_index,
-                           dpo_id_t *dpo)
-{
-    interface_dpo_t *ido;
-
-    vec_validate_init_empty(interface_dpo_db[proto],
-                            sw_if_index,
-                            INDEX_INVALID);
-
-    if (INDEX_INVALID == interface_dpo_db[proto][sw_if_index])
-    {
-	ido = interface_dpo_alloc();
-
-        ido->ido_sw_if_index = sw_if_index;
-        ido->ido_proto = proto;
-
-	interface_dpo_db[proto][sw_if_index] =
-            interface_dpo_get_index(ido);
-    }
-    else
-    {
-	ido = interface_dpo_get(interface_dpo_db[proto][sw_if_index]);
-    }
-
-    dpo_set(dpo, DPO_INTERFACE, proto, interface_dpo_get_index(ido));
-}
-
-
-static clib_error_t *
-interface_dpo_interface_state_change (vnet_main_t * vnm,
-                                      u32 sw_if_index,
-                                      u32 flags)
-{
-    /*
-     */
-    return (NULL);
-}
-
-VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(
-    interface_dpo_interface_state_change);
-
-/**
- * @brief Registered callback for HW interface state changes
- */
-static clib_error_t *
-interface_dpo_hw_interface_state_change (vnet_main_t * vnm,
-                                         u32 hw_if_index,
-                                         u32 flags)
-{
-    return (NULL);
-}
-
-VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
-    interface_dpo_hw_interface_state_change);
-
-static clib_error_t *
-interface_dpo_interface_delete (vnet_main_t * vnm,
-                                u32 sw_if_index,
-                                u32 is_add)
-{
-    return (NULL);
-}
-
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION(
-    interface_dpo_interface_delete);
-
-u8*
-format_interface_dpo (u8* s, va_list *ap)
-{
-    index_t index = va_arg(*ap, index_t);
-    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
-    vnet_main_t * vnm = vnet_get_main();
-    interface_dpo_t *ido = interface_dpo_get(index);
-
-    return (format(s, "%U-dpo: %U",
-                   format_vnet_sw_interface_name,
-                   vnm,
-                   vnet_get_sw_interface(vnm, ido->ido_sw_if_index),
-                   format_dpo_proto, ido->ido_proto));
-}
-
-static void
-interface_dpo_mem_show (void)
-{
-    fib_show_memory_usage("Interface",
-			  pool_elts(interface_dpo_pool),
-			  pool_len(interface_dpo_pool),
-			  sizeof(interface_dpo_t));
-}
-
-
-const static dpo_vft_t interface_dpo_vft = {
-    .dv_lock = interface_dpo_lock,
-    .dv_unlock = interface_dpo_unlock,
-    .dv_format = format_interface_dpo,
-    .dv_mem_show = interface_dpo_mem_show,
-};
-
-/**
- * @brief The per-protocol VLIB graph nodes that are assigned to a glean
- *        object.
- *
- * this means that these graph nodes are ones from which a glean is the
- * parent object in the DPO-graph.
- */
-const static char* const interface_dpo_ip4_nodes[] =
-{
-    "interface-dpo-ip4",
-    NULL,
-};
-const static char* const interface_dpo_ip6_nodes[] =
-{
-    "interface-dpo-ip4",
-    NULL,
-};
-const static char* const interface_dpo_l2_nodes[] =
-{
-    "interface-dpo-l2",
-    NULL,
-};
-
-const static char* const * const interface_dpo_nodes[DPO_PROTO_NUM] =
-{
-    [DPO_PROTO_IP4]  = interface_dpo_ip4_nodes,
-    [DPO_PROTO_IP6]  = interface_dpo_ip6_nodes,
-    [DPO_PROTO_ETHERNET]  = interface_dpo_l2_nodes,
-    [DPO_PROTO_MPLS] = NULL,
-};
-
-void
-interface_dpo_module_init (void)
-{
-    dpo_register(DPO_INTERFACE,
-                 &interface_dpo_vft,
-                 interface_dpo_nodes);
-}
-
-/**
- * @brief Interface DPO trace data
- */
-typedef struct interface_dpo_trace_t_
-{
-    u32 sw_if_index;
-} interface_dpo_trace_t;
-
-typedef enum interface_dpo_next_t_
-{
-    INTERFACE_DPO_DROP = 0,
-    INTERFACE_DPO_INPUT = 1,
-} interface_dpo_next_t;
-
-always_inline uword
-interface_dpo_inline (vlib_main_t * vm,
-                      vlib_node_runtime_t * node,
-                      vlib_frame_t * from_frame)
-{
-    u32 n_left_from, next_index, * from, * to_next;
-    u32 thread_index = vlib_get_thread_index ();
-    vnet_interface_main_t *im;
-
-    im = &vnet_get_main ()->interface_main;
-    from = vlib_frame_vector_args (from_frame);
-    n_left_from = from_frame->n_vectors;
-
-    next_index = node->cached_next_index;
-
-    while (n_left_from > 0)
-    {
-        u32 n_left_to_next;
-
-        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
-
-	while (n_left_from >= 4 && n_left_to_next > 2)
-	{
-	    const interface_dpo_t *ido0, *ido1;
-	    u32 bi0, idoi0, bi1, idoi1;
-	    vlib_buffer_t *b0, *b1;
-
-	    bi0 = from[0];
-	    to_next[0] = bi0;
-	    bi1 = from[1];
-	    to_next[1] = bi1;
-	    from += 2;
-	    to_next += 2;
-	    n_left_from -= 2;
-	    n_left_to_next -= 2;
-
-	    b0 = vlib_get_buffer (vm, bi0);
-	    b1 = vlib_get_buffer (vm, bi1);
-
-	    idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
-	    idoi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
-	    ido0 = interface_dpo_get(idoi0);
-	    ido1 = interface_dpo_get(idoi1);
-
-	    vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index;
-	    vnet_buffer(b1)->sw_if_index[VLIB_RX] = ido1->ido_sw_if_index;
-
-            vlib_increment_combined_counter (im->combined_sw_if_counters
-                                             + VNET_INTERFACE_COUNTER_RX,
-                                             thread_index,
-                                             ido0->ido_sw_if_index,
-                                             1,
-                                             vlib_buffer_length_in_chain (vm, b0));
-            vlib_increment_combined_counter (im->combined_sw_if_counters
-                                             + VNET_INTERFACE_COUNTER_RX,
-                                             thread_index,
-                                             ido1->ido_sw_if_index,
-                                             1,
-                                             vlib_buffer_length_in_chain (vm, b1));
-
-	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-		interface_dpo_trace_t *tr0;
-
-                tr0 = vlib_add_trace (vm, node, b0, sizeof (*tr0));
-		tr0->sw_if_index = ido0->ido_sw_if_index;
-	    }
-	    if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-		interface_dpo_trace_t *tr1;
-
-                tr1 = vlib_add_trace (vm, node, b1, sizeof (*tr1));
-		tr1->sw_if_index = ido1->ido_sw_if_index;
-	    }
-
-	    vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
-					    n_left_to_next, bi0, bi1,
-                                            INTERFACE_DPO_INPUT,
-                                            INTERFACE_DPO_INPUT);
-	}
-
-	while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	    const interface_dpo_t * ido0;
-	    vlib_buffer_t * b0;
-	    u32 bi0, idoi0;
-
-	    bi0 = from[0];
-	    to_next[0] = bi0;
-	    from += 1;
-	    to_next += 1;
-	    n_left_from -= 1;
-	    n_left_to_next -= 1;
-
-	    b0 = vlib_get_buffer (vm, bi0);
-
-	    idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
-	    ido0 = interface_dpo_get(idoi0);
-
-            /* Swap the RX interface of the packet to the one the
-             * interface DPR represents */
-	    vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index;
-
-            /* Bump the interface's RX coutners */
-            vlib_increment_combined_counter (im->combined_sw_if_counters
-                                             + VNET_INTERFACE_COUNTER_RX,
-                                             thread_index,
-                                             ido0->ido_sw_if_index,
-                                             1,
-                                             vlib_buffer_length_in_chain (vm, b0));
-
-	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-		interface_dpo_trace_t *tr;
-
-                tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
-		tr->sw_if_index = ido0->ido_sw_if_index;
-	    }
-
-	    vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
-					    n_left_to_next, bi0,
-                                            INTERFACE_DPO_INPUT);
-	}
-        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-    return from_frame->n_vectors;
-}
-
-static u8 *
-format_interface_dpo_trace (u8 * s, va_list * args)
-{
-    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-    interface_dpo_trace_t * t = va_arg (*args, interface_dpo_trace_t *);
-    uword indent = format_get_indent (s);
-    s = format (s, "%U sw_if_index:%d",
-                format_white_space, indent,
-                t->sw_if_index);
-    return s;
-}
-
-static uword
-interface_dpo_ip4 (vlib_main_t * vm,
-                   vlib_node_runtime_t * node,
-                   vlib_frame_t * from_frame)
-{
-    return (interface_dpo_inline(vm, node, from_frame));
-}
-
-static uword
-interface_dpo_ip6 (vlib_main_t * vm,
-                   vlib_node_runtime_t * node,
-                   vlib_frame_t * from_frame)
-{
-    return (interface_dpo_inline(vm, node, from_frame));
-}
-
-static uword
-interface_dpo_l2 (vlib_main_t * vm,
-                   vlib_node_runtime_t * node,
-                   vlib_frame_t * from_frame)
-{
-    return (interface_dpo_inline(vm, node, from_frame));
-}
-
-VLIB_REGISTER_NODE (interface_dpo_ip4_node) = {
-    .function = interface_dpo_ip4,
-    .name = "interface-dpo-ip4",
-    .vector_size = sizeof (u32),
-    .format_trace = format_interface_dpo_trace,
-
-    .n_next_nodes = 2,
-    .next_nodes = {
-        [INTERFACE_DPO_DROP] = "ip4-drop",
-        [INTERFACE_DPO_INPUT] = "ip4-input",
-    },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip4_node,
-                              interface_dpo_ip4)
-
-VLIB_REGISTER_NODE (interface_dpo_ip6_node) = {
-    .function = interface_dpo_ip6,
-    .name = "interface-dpo-ip6",
-    .vector_size = sizeof (u32),
-    .format_trace = format_interface_dpo_trace,
-
-    .n_next_nodes = 2,
-    .next_nodes = {
-        [INTERFACE_DPO_DROP] = "ip6-drop",
-        [INTERFACE_DPO_INPUT] = "ip6-input",
-    },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip6_node,
-                              interface_dpo_ip6)
-
-VLIB_REGISTER_NODE (interface_dpo_l2_node) = {
-    .function = interface_dpo_l2,
-    .name = "interface-dpo-l2",
-    .vector_size = sizeof (u32),
-    .format_trace = format_interface_dpo_trace,
-
-    .n_next_nodes = 2,
-    .next_nodes = {
-        [INTERFACE_DPO_DROP] = "error-drop",
-        [INTERFACE_DPO_INPUT] = "l2-input",
-    },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_l2_node,
-                              interface_dpo_l2)
-
diff --git a/src/vnet/dpo/interface_dpo.h b/src/vnet/dpo/interface_dpo.h
deleted file mode 100644
index 1538dfbb..00000000
--- a/src/vnet/dpo/interface_dpo.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * @brief
- * The data-path object representing interfaceing the packet, i.e. it's for-us
- */
-
-#ifndef __INTERFACE_DPO_H__
-#define __INTERFACE_DPO_H__
-
-#include <vnet/dpo/dpo.h>
-
-typedef struct interface_dpo_t_
-{
-    /**
-     * The Software interface index that the packets will be given
-     * as the ingress/rx interface
-     */
-    u32 ido_sw_if_index;
-
-    /**
-     * next VLIB node. A '<proto>-input' node.
-     */
-    u32 ido_next_node;
-
-    /**
-     * DPO protocol that the packets will have as they 'ingress'
-     * on this interface
-     */
-    dpo_proto_t ido_proto;
-
-    /**
-     * number of locks.
-     */
-    u16 ido_locks;
-} interface_dpo_t;
-
-extern void interface_dpo_add_or_lock (dpo_proto_t proto,
-                                       u32 sw_if_index,
-                                       dpo_id_t *dpo);
-
-extern void interface_dpo_module_init(void);
-
-/**
- * @brief pool of all interface DPOs
- */
-interface_dpo_t *interface_dpo_pool;
-
-static inline interface_dpo_t *
-interface_dpo_get (index_t index)
-{
-    return (pool_elt_at_index(interface_dpo_pool, index));
-}
-
-#endif
diff --git a/src/vnet/dpo/interface_rx_dpo.c b/src/vnet/dpo/interface_rx_dpo.c
new file mode 100644
index 00000000..a624f514
--- /dev/null
+++ b/src/vnet/dpo/interface_rx_dpo.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dpo/interface_rx_dpo.h>
+#include <vnet/fib/fib_node.h>
+
+/*
+ * The 'DB' of interface DPOs.
+ * There is only one  per-interface per-protocol, so this is a per-interface
+ * vector
+ */
+static index_t *interface_rx_dpo_db[DPO_PROTO_NUM];
+
+static interface_rx_dpo_t *
+interface_rx_dpo_alloc (void)
+{
+    interface_rx_dpo_t *ido;
+
+    pool_get(interface_rx_dpo_pool, ido);
+
+    return (ido);
+}
+
+static inline interface_rx_dpo_t *
+interface_rx_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+    ASSERT(DPO_INTERFACE_RX == dpo->dpoi_type);
+
+    return (interface_rx_dpo_get(dpo->dpoi_index));
+}
+
+static inline index_t
+interface_rx_dpo_get_index (interface_rx_dpo_t *ido)
+{
+    return (ido - interface_rx_dpo_pool);
+}
+
+static void
+interface_rx_dpo_lock (dpo_id_t *dpo)
+{
+    interface_rx_dpo_t *ido;
+
+    ido = interface_rx_dpo_get_from_dpo(dpo);
+    ido->ido_locks++;
+}
+
+static void
+interface_rx_dpo_unlock (dpo_id_t *dpo)
+{
+    interface_rx_dpo_t *ido;
+
+    ido = interface_rx_dpo_get_from_dpo(dpo);
+    ido->ido_locks--;
+
+    if (0 == ido->ido_locks)
+    {
+        interface_rx_dpo_db[ido->ido_proto][ido->ido_sw_if_index] =
+            INDEX_INVALID;
+        pool_put(interface_rx_dpo_pool, ido);
+    }
+}
+
+/*
+ * interface_rx_dpo_add_or_lock
+ *
+ * Add/create and lock a new or lock an existing for the interface DPO
+ * on the interface and protocol given
+ */
+void
+interface_rx_dpo_add_or_lock (dpo_proto_t proto,
+                              u32 sw_if_index,
+                              dpo_id_t *dpo)
+{
+    interface_rx_dpo_t *ido;
+
+    vec_validate_init_empty(interface_rx_dpo_db[proto],
+                            sw_if_index,
+                            INDEX_INVALID);
+
+    if (INDEX_INVALID == interface_rx_dpo_db[proto][sw_if_index])
+    {
+        ido = interface_rx_dpo_alloc();
+
+        ido->ido_sw_if_index = sw_if_index;
+        ido->ido_proto = proto;
+
+        interface_rx_dpo_db[proto][sw_if_index] =
+            interface_rx_dpo_get_index(ido);
+    }
+    else
+    {
+        ido = interface_rx_dpo_get(interface_rx_dpo_db[proto][sw_if_index]);
+    }
+
+    dpo_set(dpo, DPO_INTERFACE_RX, proto, interface_rx_dpo_get_index(ido));
+}
+
+
+static clib_error_t *
+interface_rx_dpo_interface_state_change (vnet_main_t * vnm,
+                                         u32 sw_if_index,
+                                         u32 flags)
+{
+    /*
+     */
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(
+    interface_rx_dpo_interface_state_change);
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+interface_rx_dpo_hw_interface_state_change (vnet_main_t * vnm,
+                                            u32 hw_if_index,
+                                            u32 flags)
+{
+    return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+    interface_rx_dpo_hw_interface_state_change);
+
+static clib_error_t *
+interface_rx_dpo_interface_delete (vnet_main_t * vnm,
+                                   u32 sw_if_index,
+                                   u32 is_add)
+{
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(
+    interface_rx_dpo_interface_delete);
+
+u8*
+format_interface_rx_dpo (u8* s, va_list *ap)
+{
+    index_t index = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    vnet_main_t * vnm = vnet_get_main();
+    interface_rx_dpo_t *ido = interface_rx_dpo_get(index);
+
+    return (format(s, "%U-dpo: %U",
+                   format_vnet_sw_interface_name,
+                   vnm,
+                   vnet_get_sw_interface(vnm, ido->ido_sw_if_index),
+                   format_dpo_proto, ido->ido_proto));
+}
+
+static void
+interface_rx_dpo_mem_show (void)
+{
+    fib_show_memory_usage("Interface",
+                          pool_elts(interface_rx_dpo_pool),
+                          pool_len(interface_rx_dpo_pool),
+                          sizeof(interface_rx_dpo_t));
+}
+
+
+const static dpo_vft_t interface_rx_dpo_vft = {
+    .dv_lock = interface_rx_dpo_lock,
+    .dv_unlock = interface_rx_dpo_unlock,
+    .dv_format = format_interface_rx_dpo,
+    .dv_mem_show = interface_rx_dpo_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a glean
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a glean is the
+ * parent object in the DPO-graph.
+ */
+const static char* const interface_rx_dpo_ip4_nodes[] =
+{
+    "interface-rx-dpo-ip4",
+    NULL,
+};
+const static char* const interface_rx_dpo_ip6_nodes[] =
+{
+    "interface-rx-dpo-ip6",
+    NULL,
+};
+const static char* const interface_rx_dpo_l2_nodes[] =
+{
+    "interface-rx-dpo-l2",
+    NULL,
+};
+
+const static char* const * const interface_rx_dpo_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = interface_rx_dpo_ip4_nodes,
+    [DPO_PROTO_IP6]  = interface_rx_dpo_ip6_nodes,
+    [DPO_PROTO_ETHERNET]  = interface_rx_dpo_l2_nodes,
+    [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+interface_rx_dpo_module_init (void)
+{
+    dpo_register(DPO_INTERFACE_RX,
+                 &interface_rx_dpo_vft,
+                 interface_rx_dpo_nodes);
+}
+
+/**
+ * @brief Interface DPO trace data
+ */
+typedef struct interface_rx_dpo_trace_t_
+{
+    u32 sw_if_index;
+} interface_rx_dpo_trace_t;
+
+typedef enum interface_rx_dpo_next_t_
+{
+    INTERFACE_RX_DPO_DROP = 0,
+    INTERFACE_RX_DPO_INPUT = 1,
+} interface_rx_dpo_next_t;
+
+always_inline uword
+interface_rx_dpo_inline (vlib_main_t * vm,
+                         vlib_node_runtime_t * node,
+                         vlib_frame_t * from_frame)
+{
+    u32 n_left_from, next_index, * from, * to_next;
+    u32 thread_index = vlib_get_thread_index ();
+    vnet_interface_main_t *im;
+
+    im = &vnet_get_main ()->interface_main;
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+        while (n_left_from >= 4 && n_left_to_next > 2)
+        {
+            const interface_rx_dpo_t *ido0, *ido1;
+            u32 bi0, idoi0, bi1, idoi1;
+            vlib_buffer_t *b0, *b1;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            bi1 = from[1];
+            to_next[1] = bi1;
+            from += 2;
+            to_next += 2;
+            n_left_from -= 2;
+            n_left_to_next -= 2;
+
+            b0 = vlib_get_buffer (vm, bi0);
+            b1 = vlib_get_buffer (vm, bi1);
+
+            idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            idoi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+            ido0 = interface_rx_dpo_get(idoi0);
+            ido1 = interface_rx_dpo_get(idoi1);
+
+            vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index;
+            vnet_buffer(b1)->sw_if_index[VLIB_RX] = ido1->ido_sw_if_index;
+
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_RX,
+                                             thread_index,
+                                             ido0->ido_sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b0));
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_RX,
+                                             thread_index,
+                                             ido1->ido_sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b1));
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                interface_rx_dpo_trace_t *tr0;
+
+                tr0 = vlib_add_trace (vm, node, b0, sizeof (*tr0));
+                tr0->sw_if_index = ido0->ido_sw_if_index;
+            }
+            if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                interface_rx_dpo_trace_t *tr1;
+
+                tr1 = vlib_add_trace (vm, node, b1, sizeof (*tr1));
+                tr1->sw_if_index = ido1->ido_sw_if_index;
+            }
+
+            vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0, bi1,
+                                            INTERFACE_RX_DPO_INPUT,
+                                            INTERFACE_RX_DPO_INPUT);
+        }
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            const interface_rx_dpo_t * ido0;
+            vlib_buffer_t * b0;
+            u32 bi0, idoi0;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            from += 1;
+            to_next += 1;
+            n_left_from -= 1;
+            n_left_to_next -= 1;
+
+            b0 = vlib_get_buffer (vm, bi0);
+
+            idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            ido0 = interface_rx_dpo_get(idoi0);
+
+            /* Swap the RX interface of the packet to the one the
+             * interface DPR represents */
+            vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index;
+
+            /* Bump the interface's RX coutners */
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_RX,
+                                             thread_index,
+                                             ido0->ido_sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b0));
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                interface_rx_dpo_trace_t *tr;
+
+                tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+                tr->sw_if_index = ido0->ido_sw_if_index;
+            }
+
+            vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0,
+                                            INTERFACE_RX_DPO_INPUT);
+        }
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+static u8 *
+format_interface_rx_dpo_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    interface_rx_dpo_trace_t * t = va_arg (*args, interface_rx_dpo_trace_t *);
+    uword indent = format_get_indent (s);
+    s = format (s, "%U sw_if_index:%d",
+                format_white_space, indent,
+                t->sw_if_index);
+    return s;
+}
+
+static uword
+interface_rx_dpo_ip4 (vlib_main_t * vm,
+                      vlib_node_runtime_t * node,
+                      vlib_frame_t * from_frame)
+{
+    return (interface_rx_dpo_inline(vm, node, from_frame));
+}
+
+static uword
+interface_rx_dpo_ip6 (vlib_main_t * vm,
+                      vlib_node_runtime_t * node,
+                      vlib_frame_t * from_frame)
+{
+    return (interface_rx_dpo_inline(vm, node, from_frame));
+}
+
+static uword
+interface_rx_dpo_l2 (vlib_main_t * vm,
+                     vlib_node_runtime_t * node,
+                     vlib_frame_t * from_frame)
+{
+    return (interface_rx_dpo_inline(vm, node, from_frame));
+}
+
+VLIB_REGISTER_NODE (interface_rx_dpo_ip4_node) = {
+    .function = interface_rx_dpo_ip4,
+    .name = "interface-rx-dpo-ip4",
+    .vector_size = sizeof (u32),
+    .format_trace = format_interface_rx_dpo_trace,
+
+    .n_next_nodes = 2,
+    .next_nodes = {
+        [INTERFACE_RX_DPO_DROP] = "ip4-drop",
+        [INTERFACE_RX_DPO_INPUT] = "ip4-input",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (interface_rx_dpo_ip4_node,
+                              interface_rx_dpo_ip4)
+
+VLIB_REGISTER_NODE (interface_rx_dpo_ip6_node) = {
+    .function = interface_rx_dpo_ip6,
+    .name = "interface-rx-dpo-ip6",
+    .vector_size = sizeof (u32),
+    .format_trace = format_interface_rx_dpo_trace,
+
+    .n_next_nodes = 2,
+    .next_nodes = {
+        [INTERFACE_RX_DPO_DROP] = "ip6-drop",
+        [INTERFACE_RX_DPO_INPUT] = "ip6-input",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (interface_rx_dpo_ip6_node,
+                              interface_rx_dpo_ip6)
+
+VLIB_REGISTER_NODE (interface_rx_dpo_l2_node) = {
+    .function = interface_rx_dpo_l2,
+    .name = "interface-rx-dpo-l2",
+    .vector_size = sizeof (u32),
+    .format_trace = format_interface_rx_dpo_trace,
+
+    .n_next_nodes = 2,
+    .next_nodes = {
+        [INTERFACE_RX_DPO_DROP] = "error-drop",
+        [INTERFACE_RX_DPO_INPUT] = "l2-input",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (interface_rx_dpo_l2_node,
+                              interface_rx_dpo_l2)
diff --git a/src/vnet/dpo/interface_rx_dpo.h b/src/vnet/dpo/interface_rx_dpo.h
new file mode 100644
index 00000000..edecce08
--- /dev/null
+++ b/src/vnet/dpo/interface_rx_dpo.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERFACE_RX_DPO_H__
+#define __INTERFACE_RX_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief
+ * The data-path object representing a change of receive interface.
+ * If a packet encounters an object of this type in the data-path, it's
+ * RX interface is changed.
+ */
+typedef struct interface_rx_dpo_t_
+{
+    /**
+     * The Software interface index that the packets will be given
+     * as the ingress/rx interface
+     */
+    u32 ido_sw_if_index;
+
+    /**
+     * next VLIB node. A '<proto>-input' node.
+     */
+    u32 ido_next_node;
+
+    /**
+     * DPO protocol that the packets will have as they 'ingress'
+     * on this interface
+     */
+    dpo_proto_t ido_proto;
+
+    /**
+     * number of locks.
+     */
+    u16 ido_locks;
+} interface_rx_dpo_t;
+
+extern void interface_rx_dpo_add_or_lock (dpo_proto_t proto,
+                                          u32 sw_if_index,
+                                          dpo_id_t *dpo);
+
+extern void interface_rx_dpo_module_init(void);
+
+/**
+ * @brief pool of all interface DPOs
+ */
+interface_rx_dpo_t *interface_rx_dpo_pool;
+
+static inline interface_rx_dpo_t *
+interface_rx_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(interface_rx_dpo_pool, index));
+}
+
+#endif
diff --git a/src/vnet/dpo/interface_tx_dpo.c b/src/vnet/dpo/interface_tx_dpo.c
new file mode 100644
index 00000000..f7c8bfda
--- /dev/null
+++ b/src/vnet/dpo/interface_tx_dpo.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dpo/interface_tx_dpo.h>
+#include <vnet/adj/rewrite.h>
+
+/*
+ * We do not lock nor unlock these DPOs since there is nothing to lock
+ * all we do is construct DPO object wrappers around a sw_if_index
+ */
+static void
+interface_tx_dpo_lock (dpo_id_t *dpo)
+{
+}
+
+static void
+interface_tx_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+/*
+ * interface_tx_dpo_add_or_lock
+ *
+ * construct DPO object wrappers around a sw_if_index
+ */
+void
+interface_tx_dpo_add_or_lock (dpo_proto_t proto,
+                              u32 sw_if_index,
+                              dpo_id_t *dpo)
+{
+    dpo_set(dpo, DPO_INTERFACE_TX, proto, sw_if_index);
+}
+
+u8*
+format_interface_tx_dpo (u8* s, va_list *ap)
+{
+    index_t index = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    vnet_main_t * vnm = vnet_get_main();
+
+    return (format(s, "%U-dpo:",
+                   format_vnet_sw_interface_name,
+                   vnm,
+                   vnet_get_sw_interface(vnm, index)));
+}
+
+static void
+interface_tx_dpo_mem_show (void)
+{
+}
+
+u32*
+interface_tx_dpo_get_next_node (const dpo_id_t *dpo)
+{
+    u32 *node_indices = NULL;
+
+    /*
+     * return the interface's TX node for the wrapped sw_if_index
+     */
+    vec_add1(node_indices,
+             vnet_tx_node_index_for_sw_interface(vnet_get_main(),
+                                                 dpo->dpoi_index));
+
+    return (node_indices);
+}
+
+const static dpo_vft_t interface_tx_dpo_vft = {
+    .dv_lock = interface_tx_dpo_lock,
+    .dv_unlock = interface_tx_dpo_unlock,
+    .dv_format = format_interface_tx_dpo,
+    .dv_mem_show = interface_tx_dpo_mem_show,
+    .dv_get_next_node = interface_tx_dpo_get_next_node,
+};
+
+void
+interface_tx_dpo_module_init (void)
+{
+    dpo_register(DPO_INTERFACE_TX, &interface_tx_dpo_vft, NULL);
+}
+
diff --git a/src/vnet/dpo/interface_tx_dpo.h b/src/vnet/dpo/interface_tx_dpo.h
new file mode 100644
index 00000000..0c560ada
--- /dev/null
+++ b/src/vnet/dpo/interface_tx_dpo.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing transmitting the packet on a n interface.
+ * This is a convenient DPO wrapper around a simple interface transmit and thus
+ * allows us to represent direct interface transmit in the DPO model.
+ */
+
+#ifndef __INTERFACE_TX_DPO_H__
+#define __INTERFACE_TX_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern void interface_tx_dpo_add_or_lock (dpo_proto_t proto,
+                                          u32 sw_if_index,
+                                          dpo_id_t *dpo);
+
+extern void interface_tx_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 58050ccb..cf5a463d 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -21,7 +21,7 @@
 #include <vnet/dpo/receive_dpo.h>
 #include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/lookup_dpo.h>
-#include <vnet/dpo/interface_dpo.h>
+#include <vnet/dpo/interface_rx_dpo.h>
 #include <vnet/dpo/mpls_disposition.h>
 
 #include <vnet/adj/adj.h>
@@ -1707,9 +1707,9 @@ fib_path_resolve (fib_node_index_t path_index)
 	/*
 	 * Resolve via a receive DPO.
 	 */
-	interface_dpo_add_or_lock(path->fp_nh_proto,
-                                  path->intf_rx.fp_interface,
-                                  &path->fp_dpo);
+	interface_rx_dpo_add_or_lock(path->fp_nh_proto,
+                                     path->intf_rx.fp_interface,
+                                     &path->fp_dpo);
 	break;
     }
     case FIB_PATH_TYPE_EXCLUSIVE:
@@ -2041,9 +2041,9 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
             /*
              * Create the adj needed for sending IP multicast traffic
              */
-            interface_dpo_add_or_lock(fib_forw_chain_type_to_dpo_proto(fct),
-                                      path->attached.fp_interface,
-                                      dpo);
+            interface_rx_dpo_add_or_lock(fib_forw_chain_type_to_dpo_proto(fct),
+                                         path->attached.fp_interface,
+                                         dpo);
             break;
         case FIB_PATH_TYPE_RECEIVE:
         case FIB_PATH_TYPE_SPECIAL:
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index 7ca4cb3a..6867cca8 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -25,7 +25,7 @@
 #include <vnet/dpo/receive_dpo.h>
 #include <vnet/dpo/ip_null_dpo.h>
 #include <vnet/bfd/bfd_main.h>
-#include <vnet/dpo/interface_dpo.h>
+#include <vnet/dpo/interface_rx_dpo.h>
 #include <vnet/dpo/replicate_dpo.h>
 
 #include <vnet/mpls/mpls.h>
@@ -407,7 +407,7 @@ fib_test_validate_rep_v (const replicate_t *rep,
 	    }
 	    break;
 	case FT_REP_INTF:
-            FIB_TEST_LB((DPO_INTERFACE == dpo->dpoi_type),
+            FIB_TEST_LB((DPO_INTERFACE_RX == dpo->dpoi_type),
                         "bucket %d stacks on %U",
                         bucket,
                         format_dpo_type, dpo->dpoi_type);
@@ -589,7 +589,7 @@ fib_test_validate_lb_v (const load_balance_t *lb,
 			exp->adj.adj);
 	    break;
 	case FT_LB_INTF:
-	    FIB_TEST_I((DPO_INTERFACE == dpo->dpoi_type),
+	    FIB_TEST_I((DPO_INTERFACE_RX == dpo->dpoi_type),
 		       "bucket %d stacks on %U",
 		       bucket,
 		       format_dpo_type, dpo->dpoi_type);
@@ -8523,7 +8523,7 @@ lfib_test (void)
      */
     dpo_id_t idpo = DPO_INVALID;
 
-    interface_dpo_add_or_lock(DPO_PROTO_IP4,
+    interface_rx_dpo_add_or_lock(DPO_PROTO_IP4,
                               tm->hw[0]->sw_if_index,
                               &idpo);
 
@@ -8667,9 +8667,9 @@ lfib_test (void)
     FIB_TEST(lb_count == pool_elts(load_balance_pool),
 	     "Load-balance resources freed %d of %d",
              lb_count, pool_elts(load_balance_pool));
-    FIB_TEST(0 == pool_elts(interface_dpo_pool),
-	     "interface_dpo resources freed %d of %d",
-             0, pool_elts(interface_dpo_pool));
+    FIB_TEST(0 == pool_elts(interface_rx_dpo_pool),
+	     "interface_rx_dpo resources freed %d of %d",
+             0, pool_elts(interface_rx_dpo_pool));
 
     return (0);
 }
diff --git a/test/test_pppoe.py b/test/test_pppoe.py
index 0baf4546..1d0aeffd 100644
--- a/test/test_pppoe.py
+++ b/test/test_pppoe.py
@@ -281,7 +281,7 @@ class TestPPPoE(VppTestCase):
         #
         self.vapi.cli("clear trace")
         tx2 = self.create_stream_ip4(self.pg1, self.pg0,
-                                     self.pg0.remote_ip4, self.dst_ip)
+                                     self.pg0.remote_ip4, self.dst_ip, 65)
         self.pg1.add_stream(tx2)
 
         self.pg_enable_capture(self.pg_interfaces)
@@ -293,6 +293,7 @@ class TestPPPoE(VppTestCase):
         self.logger.info(self.vapi.cli("show pppoe fib"))
         self.logger.info(self.vapi.cli("show pppoe session"))
         self.logger.info(self.vapi.cli("show ip fib"))
+        self.logger.info(self.vapi.cli("show adj"))
 
         #
         # test case cleanup
-- 
cgit 1.2.3-korg


From 1500254bee11355bbd69cc1dd9705be4f002f2bd Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Sun, 10 Sep 2017 04:39:11 -0700
Subject: FIB table add/delete API

part 2;
  - this adds the code to create an IP and MPLS table via the API.
  - but the enforcement that the table must be created before it is used is still missing, this is so that CSIT can pass.

Change-Id: Id124d884ade6cb7da947225200e3bb193454c555
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/plugins/nat/nat.c                         |  17 +-
 src/plugins/nat/nat64.c                       |  13 +-
 src/vnet/classify/vnet_classify.c             |  16 +-
 src/vnet/dhcp/dhcp4_proxy_node.c              |   9 +-
 src/vnet/dhcp/dhcp6_proxy_node.c              |   9 +-
 src/vnet/dhcp/dhcp_proxy.c                    |  19 ++-
 src/vnet/dpo/lookup_dpo.c                     |  20 ++-
 src/vnet/dpo/mpls_label_dpo.c                 |  12 +-
 src/vnet/ethernet/arp.c                       | 127 +++++++++++----
 src/vnet/fib/fib_api.h                        |   1 -
 src/vnet/fib/fib_entry.c                      |  15 +-
 src/vnet/fib/fib_entry.h                      |   1 +
 src/vnet/fib/fib_entry_src_mpls.c             |   7 +-
 src/vnet/fib/fib_table.c                      |  43 +++--
 src/vnet/fib/fib_table.h                      |  32 +++-
 src/vnet/fib/fib_test.c                       |  27 ++--
 src/vnet/fib/ip4_fib.c                        |  41 +++--
 src/vnet/fib/ip4_fib.h                        |   5 +-
 src/vnet/fib/ip6_fib.c                        |  41 +++--
 src/vnet/fib/ip6_fib.h                        |   5 +-
 src/vnet/fib/mpls_fib.c                       |  16 +-
 src/vnet/fib/mpls_fib.h                       |   5 +-
 src/vnet/interface_api.c                      | 177 ++++++++++++++++----
 src/vnet/ip/ip.h                              |   7 +
 src/vnet/ip/ip4.h                             |  13 ++
 src/vnet/ip/ip4_forward.c                     | 101 +-----------
 src/vnet/ip/ip4_source_and_port_range_check.c |  11 +-
 src/vnet/ip/ip6.h                             |  13 ++
 src/vnet/ip/ip6_forward.c                     | 103 +-----------
 src/vnet/ip/ip6_neighbor.c                    | 108 +++++++++----
 src/vnet/ip/ip_api.c                          | 122 +++++++++++---
 src/vnet/ip/lookup.c                          | 225 ++++++++++++++++++++++++++
 src/vnet/lisp-gpe/interface.c                 |  11 +-
 src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c        |   9 +-
 src/vnet/lisp-gpe/lisp_gpe_sub_interface.c    |  11 +-
 src/vnet/mfib/ip4_mfib.c                      |  12 +-
 src/vnet/mfib/ip4_mfib.h                      |   5 +-
 src/vnet/mfib/ip6_mfib.c                      |  12 +-
 src/vnet/mfib/ip6_mfib.h                      |   5 +-
 src/vnet/mfib/mfib_entry.c                    |  11 ++
 src/vnet/mfib/mfib_entry.h                    |   2 +
 src/vnet/mfib/mfib_table.c                    |  88 ++++++++--
 src/vnet/mfib/mfib_table.h                    |  29 +++-
 src/vnet/mfib/mfib_test.c                     |  11 +-
 src/vnet/mfib/mfib_types.h                    |   8 +-
 src/vnet/mpls/interface.c                     |  26 ++-
 src/vnet/mpls/mpls.c                          |  76 ++++++++-
 src/vnet/mpls/mpls.h                          |  16 +-
 src/vnet/mpls/mpls_api.c                      |  66 ++++++--
 src/vnet/srv6/sr_policy_rewrite.c             |   6 +-
 src/vnet/srv6/sr_steering.c                   |   6 +-
 src/vpp/api/api.c                             |   5 +-
 src/vpp/api/custom_dump.c                     |   3 -
 test/test_dhcp.py                             |  24 ++-
 test/test_gre.py                              |   8 +-
 test/test_ip4.py                              |  11 +-
 test/test_ip4_vrf_multi_instance.py           |   4 +-
 test/test_ip6.py                              |   7 +-
 test/test_ip6_vrf_multi_instance.py           |   4 +-
 test/test_ip_mcast.py                         |  98 ++++++++++-
 test/test_mpls.py                             |  48 +++++-
 test/test_nat.py                              |  13 ++
 test/test_neighbor.py                         |  66 +++++++-
 test/vpp_ip_route.py                          |  73 +++++++++
 test/vpp_papi_provider.py                     |  46 ++++--
 65 files changed, 1643 insertions(+), 538 deletions(-)

(limited to 'src/vnet/dpo')

diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c
index aa7ef10a..8aecac6d 100644
--- a/src/plugins/nat/nat.c
+++ b/src/plugins/nat/nat.c
@@ -167,7 +167,8 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id)
   ap->addr = *addr;
   if (vrf_id != ~0)
     ap->fib_index =
-      fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id);
+      fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
+                                         FIB_SOURCE_PLUGIN_HI);
   else
     ap->fib_index = ~0;
 #define _(N, i, n, s) \
@@ -625,7 +626,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
         return VNET_API_ERROR_INVALID_VALUE;
 
       fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
-                                                     vrf_id);
+                                                     vrf_id,
+                                                     FIB_SOURCE_PLUGIN_HI);
 
       /* Find external address in allocated addresses and reserve port for
          address and port pair mapping when dynamic translations enabled */
@@ -754,7 +756,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
       if (!m)
         return VNET_API_ERROR_NO_SUCH_ENTRY;
 
-      fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4);
+      fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_PLUGIN_HI);
 
       /* Free external address port */
       if (!sm->static_mapping_only)
@@ -874,7 +876,8 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm)
     }
 
   if (a->fib_index != ~0)
-    fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4);
+    fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4,
+                     FIB_SOURCE_PLUGIN_HI);
 
   /* Delete sessions using address */
   if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
@@ -2151,10 +2154,12 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
   sm->max_translations_per_user = max_translations_per_user;
   sm->outside_vrf_id = outside_vrf_id;
   sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
-                                                             outside_vrf_id);
+                                                             outside_vrf_id,
+                                                             FIB_SOURCE_PLUGIN_HI);
   sm->inside_vrf_id = inside_vrf_id;
   sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
-                                                            inside_vrf_id);
+                                                            inside_vrf_id,
+                                                            FIB_SOURCE_PLUGIN_HI);
   sm->static_mapping_only = static_mapping_only;
   sm->static_mapping_connection_tracking = static_mapping_connection_tracking;
 
diff --git a/src/plugins/nat/nat64.c b/src/plugins/nat/nat64.c
index b04901fa..bfcfa9b3 100644
--- a/src/plugins/nat/nat64.c
+++ b/src/plugins/nat/nat64.c
@@ -107,7 +107,8 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add)
       a->fib_index = 0;
       if (vrf_id != ~0)
 	a->fib_index =
-	  fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id);
+	  fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
+					     FIB_SOURCE_PLUGIN_HI);
 #define _(N, i, n, s) \
       clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535);
       foreach_snat_protocol
@@ -119,7 +120,8 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add)
 	return VNET_API_ERROR_NO_SUCH_ENTRY;
 
       if (a->fib_index)
-	fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6);
+	fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6,
+			  FIB_SOURCE_PLUGIN_HI);
 
 #define _(N, id, n, s) \
       clib_bitmap_free (a->busy_##n##_port_bitmap);
@@ -353,8 +355,8 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
 {
   nat64_main_t *nm = &nat64_main;
   nat64_db_bib_entry_t *bibe;
-  u32 fib_index =
-    fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id);
+  u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
+						     FIB_SOURCE_PLUGIN_HI);
   snat_protocol_t p = ip_proto_to_snat_proto (proto);
   ip46_address_t addr;
   int i;
@@ -644,7 +646,8 @@ nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add)
 	{
 	  vec_add2 (nm->pref64, p, 1);
 	  p->fib_index =
-	    fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id);
+	    fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
+					       FIB_SOURCE_PLUGIN_HI);
 	  p->vrf_id = vrf_id;
 	}
 
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
index 879fba3c..57d86748 100644
--- a/src/vnet/classify/vnet_classify.c
+++ b/src/vnet/classify/vnet_classify.c
@@ -368,10 +368,10 @@ vnet_classify_entry_claim_resource (vnet_classify_entry_t *e)
     switch (e->action)
     {
     case CLASSIFY_ACTION_SET_IP4_FIB_INDEX:
-        fib_table_lock (e->metadata, FIB_PROTOCOL_IP4);
+        fib_table_lock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY);
         break;
     case CLASSIFY_ACTION_SET_IP6_FIB_INDEX:
-        fib_table_lock (e->metadata, FIB_PROTOCOL_IP6);
+        fib_table_lock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY);
         break;
     }
 }
@@ -382,10 +382,10 @@ vnet_classify_entry_release_resource (vnet_classify_entry_t *e)
     switch (e->action)
     {
     case CLASSIFY_ACTION_SET_IP4_FIB_INDEX:
-        fib_table_unlock (e->metadata, FIB_PROTOCOL_IP4);
+        fib_table_unlock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY);
         break;
     case CLASSIFY_ACTION_SET_IP6_FIB_INDEX:
-        fib_table_unlock (e->metadata, FIB_PROTOCOL_IP6);
+        fib_table_unlock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY);
         break;
     }
 }
@@ -2096,9 +2096,13 @@ int vnet_classify_add_del_session (vnet_classify_main_t * cm,
   e->flags = 0;
   e->action = action;
   if (e->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX)
-    e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, metadata);
+    e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+                                                     metadata,
+                                                     FIB_SOURCE_CLASSIFY);
   else if (e->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
-    e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, metadata);
+    e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
+                                                     metadata,
+                                                     FIB_SOURCE_CLASSIFY);
   else
     e->metadata = 0;
 
diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c
index 1b59cdea..339a7885 100644
--- a/src/vnet/dhcp/dhcp4_proxy_node.c
+++ b/src/vnet/dhcp/dhcp4_proxy_node.c
@@ -785,7 +785,8 @@ dhcp4_proxy_set_server (ip46_address_t *addr,
     return VNET_API_ERROR_INVALID_SRC_ADDRESS;
 
   rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
-                                                   rx_table_id);
+                                                   rx_table_id,
+                                                   FIB_SOURCE_DHCP);
 
   if (is_del)
     {
@@ -795,7 +796,7 @@ dhcp4_proxy_set_server (ip46_address_t *addr,
           fib_table_entry_special_remove(rx_fib_index,
                                          &all_1s,
                                          FIB_SOURCE_DHCP);
-          fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4);
+          fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP);
       }
     }
   else
@@ -808,10 +809,10 @@ dhcp4_proxy_set_server (ip46_address_t *addr,
                                       &all_1s,
                                       FIB_SOURCE_DHCP,
                                       FIB_ENTRY_FLAG_LOCAL);
-          fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4);
+          fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP);
       }
   }
-  fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4);
+  fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP);
 
   return (rc);
 }
diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c
index 9c2f5220..ce7a8fca 100644
--- a/src/vnet/dhcp/dhcp6_proxy_node.c
+++ b/src/vnet/dhcp/dhcp6_proxy_node.c
@@ -841,7 +841,8 @@ dhcp6_proxy_set_server (ip46_address_t *addr,
     return VNET_API_ERROR_INVALID_SRC_ADDRESS;
 
   rx_fib_index = mfib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6,
-                                                    rx_table_id);
+                                                    rx_table_id,
+                                                    MFIB_SOURCE_DHCP);
 
   if (is_del)
     {
@@ -851,7 +852,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr,
           mfib_table_entry_delete(rx_fib_index,
                                   &all_dhcp_servers,
                                   MFIB_SOURCE_DHCP);
-          mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6);
+          mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP);
       }
     }
   else
@@ -885,11 +886,11 @@ dhcp6_proxy_set_server (ip46_address_t *addr,
                                  MFIB_SOURCE_DHCP,
                                  MFIB_RPF_ID_NONE,
                                  MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
-         mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6);
+         mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP);
      }
     }
 
-  mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6);
+  mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP);
 
   return (rc);
 }
diff --git a/src/vnet/dhcp/dhcp_proxy.c b/src/vnet/dhcp/dhcp_proxy.c
index ba7f354e..1784906b 100644
--- a/src/vnet/dhcp/dhcp_proxy.c
+++ b/src/vnet/dhcp/dhcp_proxy.c
@@ -29,9 +29,9 @@ dhcp_proxy_rx_table_lock (fib_protocol_t proto,
                           u32 fib_index)
 {
     if (FIB_PROTOCOL_IP4 == proto)
-        fib_table_lock(fib_index, proto);
+        fib_table_lock(fib_index, proto, FIB_SOURCE_DHCP);
     else
-        mfib_table_lock(fib_index, proto);
+        mfib_table_lock(fib_index, proto, MFIB_SOURCE_DHCP);
 }
 
 static void
@@ -39,9 +39,9 @@ dhcp_proxy_rx_table_unlock (fib_protocol_t proto,
                             u32 fib_index)
 {
     if (FIB_PROTOCOL_IP4 == proto)
-        fib_table_unlock(fib_index, proto);
+        fib_table_unlock(fib_index, proto, FIB_SOURCE_DHCP);
     else
-        mfib_table_unlock(fib_index, proto);
+        mfib_table_unlock(fib_index, proto, MFIB_SOURCE_DHCP);
 }
 
  u32
@@ -169,7 +169,7 @@ dhcp_proxy_server_del (fib_protocol_t proto,
       if (~0 != index)
       {
           server = &proxy->dhcp_servers[index];
-          fib_table_unlock (server->server_fib_index, proto);
+          fib_table_unlock (server->server_fib_index, proto, FIB_SOURCE_DHCP);
 
           vec_del1(proxy->dhcp_servers, index);
 
@@ -228,7 +228,8 @@ dhcp_proxy_server_add (fib_protocol_t proto,
   dhcp_server_t server = {
       .dhcp_server = *addr,
       .server_fib_index = fib_table_find_or_create_and_lock(proto,
-                                                            server_table_id),
+                                                            server_table_id,
+                                                            FIB_SOURCE_DHCP),
   };
 
   vec_add1(proxy->dhcp_servers, server);
@@ -297,9 +298,11 @@ int dhcp_proxy_set_vss (fib_protocol_t proto,
   int rc = 0;
   
   if (proto == FIB_PROTOCOL_IP4)
-      rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id);
+      rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id,
+                                                       FIB_SOURCE_DHCP);
   else
-      rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id);
+      rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id,
+                                                        MFIB_SOURCE_DHCP);
   v = dhcp_get_vss_info(dm, rx_fib_index, proto);
 
   if (NULL != v)
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index 26363a2f..af189eda 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -135,11 +135,15 @@ lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index,
     {
         if (LOOKUP_UNICAST == cast)
         {
-            fib_table_lock(fib_index, dpo_proto_to_fib(proto));
+            fib_table_lock(fib_index,
+                           dpo_proto_to_fib(proto),
+                           FIB_SOURCE_RR);
         }
         else
         {
-            mfib_table_lock(fib_index, dpo_proto_to_fib(proto));
+            mfib_table_lock(fib_index,
+                            dpo_proto_to_fib(proto),
+                            MFIB_SOURCE_RR);
         }
     }
     lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo);
@@ -161,13 +165,15 @@ lookup_dpo_add_or_lock_w_table_id (u32 table_id,
         {
             fib_index =
                 fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
-                                                  table_id);
+                                                  table_id,
+                                                  FIB_SOURCE_RR);
         }
         else
         {
             fib_index =
                 mfib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
-                                                   table_id);
+                                                   table_id,
+                                                   MFIB_SOURCE_RR);
         }
     }
 
@@ -238,12 +244,14 @@ lookup_dpo_unlock (dpo_id_t *dpo)
             if (LOOKUP_UNICAST == lkd->lkd_cast)
             {
                 fib_table_unlock(lkd->lkd_fib_index,
-                                 dpo_proto_to_fib(lkd->lkd_proto));
+                                 dpo_proto_to_fib(lkd->lkd_proto),
+                                 FIB_SOURCE_RR);
             }
             else
             {
                 mfib_table_unlock(lkd->lkd_fib_index,
-                                  dpo_proto_to_fib(lkd->lkd_proto));
+                                  dpo_proto_to_fib(lkd->lkd_proto),
+                                  MFIB_SOURCE_RR);
             }
         }
         pool_put(lookup_dpo_pool, lkd);
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index b178a902..2a6e7dd5 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -105,10 +105,18 @@ format_mpls_label_dpo (u8 *s, va_list *args)
     mpls_label_dpo_t *mld;
     u32 ii;
 
-    mld = mpls_label_dpo_get(index);
-
     s = format(s, "mpls-label:[%d]:", index);
 
+    if (pool_is_free_index(mpls_label_dpo_pool, index))
+    {
+        /*
+         * the packet trace can be printed after the DPO has been deleted
+         */
+        return (s);
+    }
+
+    mld = mpls_label_dpo_get(index);
+
     for (ii = 0; ii < mld->mld_n_labels; ii++)
     {
 	hdr.label_exp_s_ttl =
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
index c84ff47b..08e91373 100644
--- a/src/vnet/ethernet/arp.c
+++ b/src/vnet/ethernet/arp.c
@@ -522,6 +522,24 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
     }
 }
 
+static void
+arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, uint32_t fib_index)
+{
+  fib_prefix_t pfx = {
+    .fp_len = 32,
+    .fp_proto = FIB_PROTOCOL_IP4,
+    .fp_addr.ip4 = e->ip4_address,
+  };
+
+  e->fib_entry_index =
+    fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
+			      FIB_ENTRY_FLAG_ATTACHED,
+			      DPO_PROTO_IP4, &pfx.fp_addr,
+			      e->sw_if_index, ~0, 1, NULL,
+			      FIB_ROUTE_PATH_FLAG_NONE);
+  fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
+}
+
 int
 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
 					 vnet_arp_set_ip4_over_ethernet_rpc_args_t
@@ -576,21 +594,9 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
 
       if (!is_no_fib_entry)
 	{
-	  fib_prefix_t pfx = {
-	    .fp_len = 32,
-	    .fp_proto = FIB_PROTOCOL_IP4,
-	    .fp_addr.ip4 = a->ip4,
-	  };
-	  u32 fib_index;
-
-	  fib_index =
-	    ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
-	  e->fib_entry_index =
-	    fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
-				      FIB_ENTRY_FLAG_ATTACHED,
-				      DPO_PROTO_IP4, &pfx.fp_addr,
-				      e->sw_if_index, ~0, 1, NULL,
-				      FIB_ROUTE_PATH_FLAG_NONE);
+	  arp_adj_fib_add (e,
+			   ip4_fib_table_get_index_for_sw_if_index
+			   (e->sw_if_index));
 	}
       else
 	{
@@ -1561,6 +1567,65 @@ arp_add_del_interface_address (ip4_main_t * im,
     }
 }
 
+void
+arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, uint32_t fib_index)
+{
+  if (FIB_NODE_INDEX_INVALID != e->fib_entry_index)
+    {
+      fib_prefix_t pfx = {
+	.fp_len = 32,
+	.fp_proto = FIB_PROTOCOL_IP4,
+	.fp_addr.ip4 = e->ip4_address,
+      };
+      u32 fib_index;
+
+      fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
+
+      fib_table_entry_path_remove (fib_index, &pfx,
+				   FIB_SOURCE_ADJ,
+				   DPO_PROTO_IP4,
+				   &pfx.fp_addr,
+				   e->sw_if_index, ~0, 1,
+				   FIB_ROUTE_PATH_FLAG_NONE);
+      fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
+    }
+}
+
+static void
+arp_table_bind (ip4_main_t * im,
+		uword opaque,
+		u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
+{
+  ethernet_arp_main_t *am = &ethernet_arp_main;
+  ethernet_arp_interface_t *eai;
+  ethernet_arp_ip4_entry_t *e;
+  hash_pair_t *pair;
+
+  /*
+   * the IP table that the interface is bound to has changed.
+   * reinstall all the adj fibs.
+   */
+
+  if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
+    return;
+
+  eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+  /* *INDENT-OFF* */
+  hash_foreach_pair (pair, eai->arp_entries,
+  ({
+    e = pool_elt_at_index(am->ip4_entry_pool,
+                          pair->value[0]);
+    /*
+     * remove the adj-fib from the old table and add to the new
+     */
+    arp_adj_fib_remove(e, old_fib_index);
+    arp_adj_fib_add(e, new_fib_index);
+  }));
+  /* *INDENT-ON* */
+
+}
+
 static clib_error_t *
 ethernet_arp_init (vlib_main_t * vm)
 {
@@ -1606,6 +1671,11 @@ ethernet_arp_init (vlib_main_t * vm)
   cb.function_opaque = 0;
   vec_add1 (im->add_del_interface_address_callbacks, cb);
 
+  ip4_table_bind_callback_t cbt;
+  cbt.function = arp_table_bind;
+  cbt.function_opaque = 0;
+  vec_add1 (im->table_bind_callbacks, cbt);
+
   return 0;
 }
 
@@ -1616,24 +1686,9 @@ arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
 {
   ethernet_arp_main_t *am = &ethernet_arp_main;
 
-  if (FIB_NODE_INDEX_INVALID != e->fib_entry_index)
-    {
-      fib_prefix_t pfx = {
-	.fp_len = 32,
-	.fp_proto = FIB_PROTOCOL_IP4,
-	.fp_addr.ip4 = e->ip4_address,
-      };
-      u32 fib_index;
-
-      fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
-
-      fib_table_entry_path_remove (fib_index, &pfx,
-				   FIB_SOURCE_ADJ,
-				   DPO_PROTO_IP4,
-				   &pfx.fp_addr,
-				   e->sw_if_index, ~0, 1,
-				   FIB_ROUTE_PATH_FLAG_NONE);
-    }
+  arp_adj_fib_remove (e,
+		      ip4_fib_table_get_index_for_sw_if_index
+		      (e->sw_if_index));
   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
   pool_put (am->ip4_entry_pool, e);
 }
@@ -1693,7 +1748,11 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
        * does in response to interface events. unset is only done
        * by the control plane.
        */
-      if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
+      if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
+	{
+	  e->flags &= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
+	}
+      else if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
 	{
 	  arp_entry_free (eai, e);
 	}
diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h
index d07d6cae..f5a107ca 100644
--- a/src/vnet/fib/fib_api.h
+++ b/src/vnet/fib/fib_api.h
@@ -23,7 +23,6 @@ add_del_route_check (fib_protocol_t table_proto,
 		     u32 next_hop_sw_if_index,
 		     dpo_proto_t next_hop_table_proto,
 		     u32 next_hop_table_id,
-		     u8 create_missing_tables,
                      u8 is_rpf_id,
 		     u32 * fib_index, u32 * next_hop_fib_index);
 
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index 2027f2be..4cb6cf60 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -89,6 +89,17 @@ fib_entry_get_default_chain_type (const fib_entry_t *fib_entry)
     return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
 }
 
+u8 *
+format_fib_source (u8 * s, va_list * args)
+{
+    fib_source_t source = va_arg (*args, int);
+
+    s = format (s, "\n  src:%s ",
+                fib_source_names[source]);
+
+    return (s);
+}
+
 u8 *
 format_fib_entry (u8 * s, va_list * args)
 {
@@ -114,8 +125,8 @@ format_fib_entry (u8 * s, va_list * args)
 
 	FOR_EACH_SRC_ADDED(fib_entry, src, source,
         ({
-	    s = format (s, "\n  src:%s ",
-			fib_source_names[source]);
+	    s = format (s, "\n  src:%U ",
+			format_fib_source, source);
 	    s = fib_entry_src_format(fib_entry, source, s);
 	    s = format (s, " refs:%d ", src->fes_ref_count);
 	    if (FIB_ENTRY_FLAG_NONE != src->fes_entry_flags) {
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index 93b8016d..2f6e37fe 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -431,6 +431,7 @@ typedef struct fib_entry_t_ {
 #define FIB_ENTRY_FORMAT_DETAIL2 (0x2)
 
 extern u8 *format_fib_entry (u8 * s, va_list * args);
+extern u8 *format_fib_source (u8 * s, va_list * args);
 
 extern fib_node_index_t fib_entry_create_special(u32 fib_index,
 						 const fib_prefix_t *prefix,
diff --git a/src/vnet/fib/fib_entry_src_mpls.c b/src/vnet/fib/fib_entry_src_mpls.c
index a616458f..6fdd5c0a 100644
--- a/src/vnet/fib/fib_entry_src_mpls.c
+++ b/src/vnet/fib/fib_entry_src_mpls.c
@@ -94,7 +94,9 @@ fib_entry_src_mpls_set_data (fib_entry_src_t *src,
 	    fib_table_entry_delete_index(src->mpls.fesm_lfes[eos],
 					 FIB_SOURCE_SPECIAL);
         }
-        fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, FIB_PROTOCOL_MPLS);
+        fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID,
+                         FIB_PROTOCOL_MPLS,
+                         FIB_SOURCE_MPLS);
         src->mpls.fesm_label = label;
     }
     else
@@ -113,7 +115,8 @@ fib_entry_src_mpls_set_data (fib_entry_src_t *src,
         {
             fib_index =
 		fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS,
-						  MPLS_FIB_DEFAULT_TABLE_ID);
+						  MPLS_FIB_DEFAULT_TABLE_ID,
+                                                  FIB_SOURCE_MPLS);
         }
 	else
 	{
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 6b6cc5cb..75d15628 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -1039,7 +1039,8 @@ fib_table_find (fib_protocol_t proto,
 
 u32
 fib_table_find_or_create_and_lock (fib_protocol_t proto,
-				   u32 table_id)
+				   u32 table_id,
+                                   fib_source_t src)
 {
     fib_table_t *fib_table;
     fib_node_index_t fi;
@@ -1047,13 +1048,13 @@ fib_table_find_or_create_and_lock (fib_protocol_t proto,
     switch (proto)
     {
     case FIB_PROTOCOL_IP4:
-	fi = ip4_fib_table_find_or_create_and_lock(table_id);
+	fi = ip4_fib_table_find_or_create_and_lock(table_id, src);
         break;
     case FIB_PROTOCOL_IP6:
-	fi = ip6_fib_table_find_or_create_and_lock(table_id);
+	fi = ip6_fib_table_find_or_create_and_lock(table_id, src);
         break;
     case FIB_PROTOCOL_MPLS:
-	fi = mpls_fib_table_find_or_create_and_lock(table_id);
+	fi = mpls_fib_table_find_or_create_and_lock(table_id, src);
         break;
     default:
         return (~0);        
@@ -1070,6 +1071,7 @@ fib_table_find_or_create_and_lock (fib_protocol_t proto,
 
 u32
 fib_table_create_and_lock (fib_protocol_t proto,
+                           fib_source_t src,
                            const char *const fmt,
                            ...)
 {
@@ -1082,13 +1084,13 @@ fib_table_create_and_lock (fib_protocol_t proto,
     switch (proto)
     {
     case FIB_PROTOCOL_IP4:
-	fi = ip4_fib_table_create_and_lock();
+	fi = ip4_fib_table_create_and_lock(src);
         break;
     case FIB_PROTOCOL_IP6:
-	fi = ip6_fib_table_create_and_lock();
+	fi = ip6_fib_table_create_and_lock(src);
         break;
      case FIB_PROTOCOL_MPLS:
-	fi = mpls_fib_table_create_and_lock();
+	fi = mpls_fib_table_create_and_lock(src);
         break;
    default:
         return (~0);        
@@ -1143,26 +1145,43 @@ fib_table_walk (u32 fib_index,
 
 void
 fib_table_unlock (u32 fib_index,
-		  fib_protocol_t proto)
+		  fib_protocol_t proto,
+                  fib_source_t source)
 {
     fib_table_t *fib_table;
 
     fib_table = fib_table_get(fib_index, proto);
-    fib_table->ft_locks--;
+    fib_table->ft_locks[source]--;
+    fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]--;
 
-    if (0 == fib_table->ft_locks)
+    if (0 == fib_table->ft_locks[source])
     {
+        /*
+         * The source no longer needs the table. flush any routes
+         * from it just in case
+         */
+        fib_table_flush(fib_index, proto, source);
+    }
+
+    if (0 == fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS])
+    {
+        /*
+         * no more locak from any source - kill it
+         */
 	fib_table_destroy(fib_table);
     }
 }
+
 void
 fib_table_lock (u32 fib_index,
-		fib_protocol_t proto)
+		fib_protocol_t proto,
+                fib_source_t source)
 {
     fib_table_t *fib_table;
 
     fib_table = fib_table_get(fib_index, proto);
-    fib_table->ft_locks++;
+    fib_table->ft_locks[source]++;
+    fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]++;
 }
 
 u32
diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h
index 579740e9..6b7011b3 100644
--- a/src/vnet/fib/fib_table.h
+++ b/src/vnet/fib/fib_table.h
@@ -22,6 +22,12 @@
 #include <vnet/mpls/mpls.h>
 #include <vnet/mpls/packet.h>
 
+/**
+ * Keep a lock per-source and a total
+ */
+#define FIB_TABLE_N_LOCKS (FIB_SOURCE_MAX+1)
+#define FIB_TABLE_TOTAL_LOCKS FIB_SOURCE_MAX
+
 /**
  * @brief 
  *   A protocol Independent FIB table
@@ -34,9 +40,9 @@ typedef struct fib_table_t_
     fib_protocol_t ft_proto;
 
     /**
-     * number of locks on the table
+     * per-source number of locks on the table
      */
-    u16 ft_locks;
+    u16 ft_locks[FIB_TABLE_N_LOCKS];
 
     /**
      * Table ID (hash key) for this FIB.
@@ -628,9 +634,13 @@ extern u32 fib_table_find(fib_protocol_t proto, u32 table_id);
  *
  * @return fib_index
  *  The index of the FIB
+ *
+ * @param source
+ *  The ID of the client/source.
  */
 extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto,
-					     u32 table_id);
+					     u32 table_id,
+                                             fib_source_t source);
 
 /**
  * @brief
@@ -643,10 +653,14 @@ extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto,
  * @param fmt
  *  A string to describe the table
  *
+ * @param source
+ *  The ID of the client/source.
+ *
  * @return fib_index
  *  The index of the FIB
  */
 extern u32 fib_table_create_and_lock(fib_protocol_t proto,
+                                     fib_source_t source,
                                      const char *const fmt,
                                      ...);
 
@@ -704,9 +718,13 @@ extern void fib_table_set_flow_hash_config(u32 fib_index,
  *
  * @paran proto
  *  The protocol of the FIB (and thus the entries therein)
+ *
+ * @param source
+ *  The ID of the client/source.
  */ 
 extern void fib_table_unlock(u32 fib_index,
-			     fib_protocol_t proto);
+			     fib_protocol_t proto,
+                             fib_source_t source);
 
 /**
  * @brief
@@ -718,9 +736,13 @@ extern void fib_table_unlock(u32 fib_index,
  *
  * @paran proto
  *  The protocol of the FIB (and thus the entries therein)
+ *
+ * @param source
+ *  The ID of the client/source.
  */ 
 extern void fib_table_lock(u32 fib_index,
-			   fib_protocol_t proto);
+			   fib_protocol_t proto,
+                           fib_source_t source);
 
 /**
  * @brief
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index 6867cca8..572d7f0d 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -739,7 +739,8 @@ fib_test_v4 (void)
     lb_count = pool_elts(load_balance_pool);
 
     /* Find or create FIB table 11 */
-    fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11);
+    fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11,
+                                                  FIB_SOURCE_API);
 
     for (ii = 0; ii < 4; ii++)
     {
@@ -4150,7 +4151,7 @@ fib_test_v4 (void)
                                              FIB_SOURCE_INTERFACE)),
              "NO INterface Source'd prefixes");
 
-    fib_table_unlock(fib_index, FIB_PROTOCOL_IP4);
+    fib_table_unlock(fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_API);
 
     FIB_TEST((0  == fib_path_list_db_size()), "path list DB population:%d",
     	     fib_path_list_db_size());
@@ -4201,7 +4202,8 @@ fib_test_v6 (void)
     dpo_drop = drop_dpo_get(DPO_PROTO_IP6);
 
     /* Find or create FIB table 11 */
-    fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11);
+    fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11,
+                                                  FIB_SOURCE_API);
 
     for (ii = 0; ii < 4; ii++)
     {
@@ -5025,7 +5027,7 @@ fib_test_v6 (void)
     /*
      * now remove the VRF
      */
-    fib_table_unlock(fib_index, FIB_PROTOCOL_IP6);
+    fib_table_unlock(fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_API);
 
     FIB_TEST((0 == fib_path_list_db_size()),   "path list DB population:%d",
 	     fib_path_list_db_size());
@@ -5157,7 +5159,9 @@ fib_test_ae (void)
      */
     u32 import_fib_index1;
 
-    import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11);
+    import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
+                                                          11,
+                                                          FIB_SOURCE_CLI);
 
     /*
      * Add an attached route in the import FIB
@@ -5233,7 +5237,8 @@ fib_test_ae (void)
      */
     u32 import_fib_index2;
 
-    import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12);
+    import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12,
+                                                          FIB_SOURCE_CLI);
 
     /*
      * Add an attached route in the import FIB
@@ -5595,8 +5600,8 @@ fib_test_ae (void)
 			   &local_pfx,
 			   FIB_SOURCE_API);
 
-    fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4);
-    fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4);
+    fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI);
+    fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI);
 
     FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
 	     adj_nbr_db_size());
@@ -8168,9 +8173,10 @@ lfib_test (void)
     /*
      * MPLS enable an interface so we get the MPLS table created
      */
+    mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
     mpls_sw_interface_enable_disable(&mpls_main,
                                      tm->hw[0]->sw_if_index,
-                                     1);
+                                     1, 1);
 
     ip46_address_t nh_10_10_10_1 = {
 	.ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
@@ -8662,7 +8668,8 @@ lfib_test (void)
      */
     mpls_sw_interface_enable_disable(&mpls_main,
                                      tm->hw[0]->sw_if_index,
-                                     0);
+                                     0, 1);
+    mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
 
     FIB_TEST(lb_count == pool_elts(load_balance_pool),
 	     "Load-balance resources freed %d of %d",
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
index d563bafd..865e2dd5 100644
--- a/src/vnet/fib/ip4_fib.c
+++ b/src/vnet/fib/ip4_fib.c
@@ -101,7 +101,8 @@ static const ip4_fib_table_special_prefix_t ip4_specials[] = {
 
 
 static u32
-ip4_create_fib_with_table_id (u32 table_id)
+ip4_create_fib_with_table_id (u32 table_id,
+                              fib_source_t src)
 {
     fib_table_t *fib_table;
     ip4_fib_t *v4_fib;
@@ -128,7 +129,7 @@ ip4_create_fib_with_table_id (u32 table_id)
     v4_fib->fwd_classify_table_index = ~0;
     v4_fib->rev_classify_table_index = ~0;
     
-    fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4);
+    fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4, src);
 
     ip4_mtrie_init(&v4_fib->mtrie);
 
@@ -198,23 +199,24 @@ ip4_fib_table_destroy (u32 fib_index)
 
 
 u32
-ip4_fib_table_find_or_create_and_lock (u32 table_id)
+ip4_fib_table_find_or_create_and_lock (u32 table_id,
+                                       fib_source_t src)
 {
     u32 index;
 
     index = ip4_fib_index_from_table_id(table_id);
     if (~0 == index)
-	return ip4_create_fib_with_table_id(table_id);
+	return ip4_create_fib_with_table_id(table_id, src);
 
-    fib_table_lock(index, FIB_PROTOCOL_IP4);
+    fib_table_lock(index, FIB_PROTOCOL_IP4, src);
 
     return (index);
 }
 
 u32
-ip4_fib_table_create_and_lock (void)
+ip4_fib_table_create_and_lock (fib_source_t src)
 {
-    return (ip4_create_fib_with_table_id(~0));
+    return (ip4_create_fib_with_table_id(~0, src));
 }
 
 u32
@@ -525,17 +527,32 @@ ip4_show_fib (vlib_main_t * vm,
     pool_foreach (fib_table, im4->fibs,
     ({
 	ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index);
+        fib_source_t source;
+        u8 *s = NULL;
 
 	if (table_id >= 0 && table_id != (int)fib->table_id)
 	    continue;
 	if (fib_index != ~0 && fib_index != (int)fib->index)
 	    continue;
 
-	vlib_cli_output (vm, "%U, fib_index:%d, flow hash:[%U] locks:%d", 
-			 format_fib_table_name, fib->index, FIB_PROTOCOL_IP4,
-			 fib->index,
-			 format_ip_flow_hash_config, fib_table->ft_flow_hash_config,
-                         fib_table->ft_locks);
+	s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[",
+                   format_fib_table_name, fib->index,
+                   FIB_PROTOCOL_IP4,
+                   fib->index,
+                   format_ip_flow_hash_config,
+                   fib_table->ft_flow_hash_config);
+	FOR_EACH_FIB_SOURCE(source)
+        {
+            if (0 != fib_table->ft_locks[source])
+            {
+                s = format(s, "%U:%d, ",
+                           format_fib_source, source,
+                           fib_table->ft_locks[source]);
+            }
+        }
+        s = format (s, "]");
+        vlib_cli_output (vm, "%V", s);
+        vec_free(s);
 
 	/* Show summary? */
 	if (! verbose)
diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h
index 006163b4..495b45cc 100644
--- a/src/vnet/fib/ip4_fib.h
+++ b/src/vnet/fib/ip4_fib.h
@@ -127,8 +127,9 @@ ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst)
  * @returns A pointer to the retrieved or created fib.
  *
  */
-extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id);
-extern u32 ip4_fib_table_create_and_lock(void);
+extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id,
+                                                 fib_source_t src);
+extern u32 ip4_fib_table_create_and_lock(fib_source_t src);
 
 
 static inline 
diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c
index 8fde6f9f..3ddb8453 100644
--- a/src/vnet/fib/ip6_fib.c
+++ b/src/vnet/fib/ip6_fib.c
@@ -50,7 +50,8 @@ vnet_ip6_fib_init (u32 fib_index)
 }
 
 static u32
-create_fib_with_table_id (u32 table_id)
+create_fib_with_table_id (u32 table_id,
+                          fib_source_t src)
 {
     fib_table_t *fib_table;
     ip6_fib_t *v6_fib;
@@ -77,29 +78,30 @@ create_fib_with_table_id (u32 table_id)
     fib_table->ft_flow_hash_config = IP_FLOW_HASH_DEFAULT;
 
     vnet_ip6_fib_init(fib_table->ft_index);
-    fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6);
+    fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6, src);
 
     return (fib_table->ft_index);
 }
 
 u32
-ip6_fib_table_find_or_create_and_lock (u32 table_id)
+ip6_fib_table_find_or_create_and_lock (u32 table_id,
+                                       fib_source_t src)
 {
     uword * p;
 
     p = hash_get (ip6_main.fib_index_by_table_id, table_id);
     if (NULL == p)
-	return create_fib_with_table_id(table_id);
+	return create_fib_with_table_id(table_id, src);
     
-    fib_table_lock(p[0], FIB_PROTOCOL_IP6);
+    fib_table_lock(p[0], FIB_PROTOCOL_IP6, src);
 
     return (p[0]);
 }
 
 u32
-ip6_fib_table_create_and_lock (void)
+ip6_fib_table_create_and_lock (fib_source_t src)
 {
-    return (create_fib_with_table_id(~0));
+    return (create_fib_with_table_id(~0, src));
 }
 
 void
@@ -588,16 +590,33 @@ ip6_show_fib (vlib_main_t * vm,
 
     pool_foreach (fib_table, im6->fibs,
     ({
+        fib_source_t source;
+        u8 *s = NULL;
+
 	fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index);
 	if (table_id >= 0 && table_id != (int)fib->table_id)
 	    continue;
 	if (fib_index != ~0 && fib_index != (int)fib->index)
 	    continue;
 
-	vlib_cli_output (vm, "%s, fib_index:%d, flow hash:[%U] locks:%d", 
-			 fib_table->ft_desc, fib->index,
-			 format_ip_flow_hash_config, fib_table->ft_flow_hash_config,
-                         fib_table->ft_locks);
+	s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[",
+                   format_fib_table_name, fib->index,
+                   FIB_PROTOCOL_IP6,
+                   fib->index,
+                   format_ip_flow_hash_config,
+                   fib_table->ft_flow_hash_config);
+	FOR_EACH_FIB_SOURCE(source)
+        {
+            if (0 != fib_table->ft_locks[source])
+            {
+                s = format(s, "%U:%d, ",
+                           format_fib_source, source,
+                           fib_table->ft_locks[source]);
+            }
+        }
+        s = format (s, "]");
+        vlib_cli_output (vm, "%V", s);
+        vec_free(s);
 
 	/* Show summary? */
 	if (! verbose)
diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h
index aad8305c..9728eecc 100644
--- a/src/vnet/fib/ip6_fib.h
+++ b/src/vnet/fib/ip6_fib.h
@@ -144,8 +144,9 @@ ip6_src_lookup_for_packet (ip6_main_t * im,
  * \returns A pointer to the retrieved or created fib.
  *
  */
-extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id);
-extern u32 ip6_fib_table_create_and_lock(void);
+extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id,
+                                                 fib_source_t src);
+extern u32 ip6_fib_table_create_and_lock(fib_source_t src);
 
 static inline ip6_fib_t *
 ip6_fib_get (fib_node_index_t index)
diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c
index ca6271fe..4eeef7ab 100644
--- a/src/vnet/fib/mpls_fib.c
+++ b/src/vnet/fib/mpls_fib.c
@@ -83,7 +83,8 @@ mpls_fib_index_from_table_id (u32 table_id)
 }
 
 static u32
-mpls_fib_create_with_table_id (u32 table_id)
+mpls_fib_create_with_table_id (u32 table_id,
+                               fib_source_t src)
 {
     dpo_id_t dpo = DPO_INVALID;
     fib_table_t *fib_table;
@@ -107,7 +108,7 @@ mpls_fib_create_with_table_id (u32 table_id)
     fib_table->ft_table_id = table_id;
     fib_table->ft_flow_hash_config = MPLS_FLOW_HASH_DEFAULT;
     
-    fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS);
+    fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS, src);
 
     if (INDEX_INVALID == mpls_fib_drop_dpo_index)
     {
@@ -220,22 +221,23 @@ mpls_fib_create_with_table_id (u32 table_id)
 }
 
 u32
-mpls_fib_table_find_or_create_and_lock (u32 table_id)
+mpls_fib_table_find_or_create_and_lock (u32 table_id,
+                                        fib_source_t src)
 {
     u32 index;
 
     index = mpls_fib_index_from_table_id(table_id);
     if (~0 == index)
-	return mpls_fib_create_with_table_id(table_id);
+	return mpls_fib_create_with_table_id(table_id, src);
 
-    fib_table_lock(index, FIB_PROTOCOL_MPLS);
+    fib_table_lock(index, FIB_PROTOCOL_MPLS, src);
 
     return (index);
 }
 u32
-mpls_fib_table_create_and_lock (void)
+mpls_fib_table_create_and_lock (fib_source_t src)
 {
-    return (mpls_fib_create_with_table_id(~0));
+    return (mpls_fib_create_with_table_id(~0, src));
 }
 
 void
diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h
index dfb8b7fc..29cd1d20 100644
--- a/src/vnet/fib/mpls_fib.h
+++ b/src/vnet/fib/mpls_fib.h
@@ -59,8 +59,9 @@ mpls_fib_get (fib_node_index_t index)
     return (pool_elt_at_index(mpls_main.mpls_fibs, index));
 }
 
-extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id);
-extern u32 mpls_fib_table_create_and_lock(void);
+extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id,
+                                                  fib_source_t src);
+extern u32 mpls_fib_table_create_and_lock(fib_source_t src);
 // extern mpls_fib_t * mpls_fib_find(u32 table_id);
 extern u32 mpls_fib_index_from_table_id(u32 table_id);
 
diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c
index 113728cd..419fef94 100644
--- a/src/vnet/interface_api.c
+++ b/src/vnet/interface_api.c
@@ -320,68 +320,189 @@ stats_dsunlock (void)
 static void
 vl_api_sw_interface_set_table_t_handler (vl_api_sw_interface_set_table_t * mp)
 {
-  int rv = 0;
-  u32 table_id = ntohl (mp->vrf_id);
-  u32 sw_if_index = ntohl (mp->sw_if_index);
   vl_api_sw_interface_set_table_reply_t *rmp;
-  CLIB_UNUSED (ip_interface_address_t * ia);
-  u32 fib_index;
+  u32 sw_if_index = ntohl (mp->sw_if_index);
+  u32 table_id = ntohl (mp->vrf_id);
+  int rv = 0;
 
   VALIDATE_SW_IF_INDEX (mp);
 
   stats_dslock_with_hint (1 /* release hint */ , 4 /* tag */ );
 
   if (mp->is_ipv6)
+    rv = ip_table_bind (FIB_PROTOCOL_IP6, sw_if_index, table_id, 1);
+  else
+    rv = ip_table_bind (FIB_PROTOCOL_IP4, sw_if_index, table_id, 1);
+
+  stats_dsunlock ();
+
+  BAD_SW_IF_INDEX_LABEL;
+
+  REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY);
+}
+
+int
+ip_table_bind (fib_protocol_t fproto,
+	       uint32_t sw_if_index, uint32_t table_id, u8 is_api)
+{
+  CLIB_UNUSED (ip_interface_address_t * ia);
+  u32 fib_index, mfib_index;
+  fib_source_t src;
+  mfib_source_t msrc;
+
+  if (is_api)
+    {
+      src = FIB_SOURCE_API;
+      msrc = MFIB_SOURCE_API;
+    }
+  else
+    {
+      src = FIB_SOURCE_CLI;
+      msrc = MFIB_SOURCE_CLI;
+    }
+
+  /*
+   * This is temporary whilst I do the song and dance with the CSIT version
+   */
+  if (0 != table_id)
     {
+      fib_index = fib_table_find_or_create_and_lock (fproto, table_id, src);
+      mfib_index =
+	mfib_table_find_or_create_and_lock (fproto, table_id, msrc);
+    }
+  else
+    {
+      fib_index = 0;
+      mfib_index = 0;
+    }
+
+  /*
+   * This if table does not exist = error is what we want in the end.
+   */
+  /* fib_index = fib_table_find (fproto, table_id); */
+  /* mfib_index = mfib_table_find (fproto, table_id); */
+
+  /* if (~0 == fib_index || ~0 == mfib_index) */
+  /*   { */
+  /*     return (VNET_API_ERROR_NO_SUCH_FIB); */
+  /*   } */
+
+  if (FIB_PROTOCOL_IP6 == fproto)
+    {
+      /*
+       * If the interface already has in IP address, then a change int
+       * VRF is not allowed. The IP address applied must first be removed.
+       * We do not do that automatically here, since VPP has no knowledge
+       * of whether thoses subnets are valid in the destination VRF.
+       */
       /* *INDENT-OFF* */
       foreach_ip_interface_address (&ip6_main.lookup_main,
 				    ia, sw_if_index,
 				    1 /* honor unnumbered */ ,
       ({
-        rv = VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE;
-        goto done;
+        return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
       }));
       /* *INDENT-ON* */
 
-      fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
-						     table_id);
       vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
-      ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
-
-      fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
-						      table_id);
       vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index);
-      ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index;
+
+      /*
+       * tell those that are interested that the binding is changing.
+       */
+      ip6_table_bind_callback_t *cb;
+      vec_foreach (cb, ip6_main.table_bind_callbacks)
+	cb->function (&ip6_main, cb->function_opaque,
+		      sw_if_index,
+		      fib_index,
+		      ip6_main.fib_index_by_sw_if_index[sw_if_index]);
+
+      if (0 == table_id)
+	{
+	  /* reset back to default */
+	  if (0 != ip6_main.fib_index_by_sw_if_index[sw_if_index])
+	    fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index],
+			      FIB_PROTOCOL_IP6, src);
+	  if (0 != ip6_main.mfib_index_by_sw_if_index[sw_if_index])
+	    mfib_table_unlock (ip6_main.mfib_index_by_sw_if_index
+			       [sw_if_index], FIB_PROTOCOL_IP6, msrc);
+
+	}
+      else
+	{
+	  /* we need to lock the table now it's inuse */
+	  fib_table_lock (fib_index, FIB_PROTOCOL_IP6, src);
+	  mfib_table_lock (mfib_index, FIB_PROTOCOL_IP6, msrc);
+	}
+
+      ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+      ip6_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
     }
   else
     {
+      /*
+       * If the interface already has in IP address, then a change int
+       * VRF is not allowed. The IP address applied must first be removed.
+       * We do not do that automatically here, since VPP has no knowledge
+       * of whether thoses subnets are valid in the destination VRF.
+       */
       /* *INDENT-OFF* */
       foreach_ip_interface_address (&ip4_main.lookup_main,
 				    ia, sw_if_index,
 				    1 /* honor unnumbered */ ,
       ({
-        rv = VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE;
-        goto done;
+        return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
       }));
       /* *INDENT-ON* */
 
-      fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
-						     table_id);
       vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
-      ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
-
-      fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
-						      table_id);
       vec_validate (ip4_main.mfib_index_by_sw_if_index, sw_if_index);
-      ip4_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index;
-    }
 
-done:
-  stats_dsunlock ();
+      /*
+       * tell those that are interested that the binding is changing.
+       */
+      ip4_table_bind_callback_t *cb;
+      vec_foreach (cb, ip4_main.table_bind_callbacks)
+	cb->function (&ip4_main, cb->function_opaque,
+		      sw_if_index,
+		      fib_index,
+		      ip4_main.fib_index_by_sw_if_index[sw_if_index]);
+
+      if (0 == table_id)
+	{
+	  /* reset back to default */
+	  if (0 != ip4_main.fib_index_by_sw_if_index[sw_if_index])
+	    fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index],
+			      FIB_PROTOCOL_IP4, src);
+	  if (0 != ip4_main.mfib_index_by_sw_if_index[sw_if_index])
+	    mfib_table_unlock (ip4_main.mfib_index_by_sw_if_index
+			       [sw_if_index], FIB_PROTOCOL_IP4, msrc);
 
-  BAD_SW_IF_INDEX_LABEL;
+	}
+      else
+	{
+	  /* we need to lock the table now it's inuse */
+	  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+							 table_id, src);
 
-  REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY);
+	  mfib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+							   table_id, msrc);
+	}
+
+      ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+      ip4_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
+    }
+
+  /*
+   * Temporary. undo the locks from the find and create at the staart
+   */
+  if (0 != table_id)
+    {
+      fib_table_unlock (fib_index, fproto, src);
+      mfib_table_unlock (mfib_index, fproto, msrc);
+    }
+
+  return (0);
 }
 
 static void
diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
index 70b4ccd8..7aae73ff 100644
--- a/src/vnet/ip/ip.h
+++ b/src/vnet/ip/ip.h
@@ -184,6 +184,13 @@ void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index);
 extern vlib_node_registration_t ip4_inacl_node;
 extern vlib_node_registration_t ip6_inacl_node;
 
+void ip_table_create (fib_protocol_t fproto, uint32_t table_id, u8 is_api);
+
+void ip_table_delete (fib_protocol_t fproto, uint32_t table_id, u8 is_api);
+
+int ip_table_bind (fib_protocol_t fproto,
+		   uint32_t sw_if_index, uint32_t table_id, u8 is_api);
+
 #endif /* included_ip_main_h */
 
 /*
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index 8f9a8e27..decb840b 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -72,6 +72,16 @@ typedef struct
   uword function_opaque;
 } ip4_add_del_interface_address_callback_t;
 
+typedef void (ip4_table_bind_function_t)
+  (struct ip4_main_t * im,
+   uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index);
+
+typedef struct
+{
+  ip4_table_bind_function_t *function;
+  uword function_opaque;
+} ip4_table_bind_callback_t;
+
 /**
  * @brief IPv4 main type.
  *
@@ -117,6 +127,9 @@ typedef struct ip4_main_t
     ip4_add_del_interface_address_callback_t
     * add_del_interface_address_callbacks;
 
+  /** Functions to call when interface to table biding changes. */
+  ip4_table_bind_callback_t *table_bind_callbacks;
+
   /** Template used to generate IP4 ARP packets. */
   vlib_packet_template_t ip4_arp_request_packet_template;
 
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 2d48e8a9..ec4287bb 100755
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -1198,8 +1198,10 @@ ip4_lookup_init (vlib_main_t * vm)
   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
 
   /* Create FIB with index 0 and table id of 0. */
-  fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
-  mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
+  fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
+				     FIB_SOURCE_DEFAULT_ROUTE);
+  mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
+				      MFIB_SOURCE_DEFAULT_ROUTE);
 
   {
     pg_node_t *pn;
@@ -2794,101 +2796,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = {
 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
 /* *INDENT-ON */
 
-static clib_error_t *
-add_del_interface_table (vlib_main_t * vm,
-			 unformat_input_t * input, vlib_cli_command_t * cmd)
-{
-  vnet_main_t *vnm = vnet_get_main ();
-  ip_interface_address_t *ia;
-  clib_error_t *error = 0;
-  u32 sw_if_index, table_id;
-
-  sw_if_index = ~0;
-
-  if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
-    {
-      error = clib_error_return (0, "unknown interface `%U'",
-				 format_unformat_error, input);
-      goto done;
-    }
-
-  if (unformat (input, "%d", &table_id))
-    ;
-  else
-    {
-      error = clib_error_return (0, "expected table id `%U'",
-				 format_unformat_error, input);
-      goto done;
-    }
-
-  /*
-   * If the interface already has in IP address, then a change int
-   * VRF is not allowed. The IP address applied must first be removed.
-   * We do not do that automatically here, since VPP has no knowledge
-   * of whether thoses subnets are valid in the destination VRF.
-   */
-  /* *INDENT-OFF* */
-  foreach_ip_interface_address (&ip4_main.lookup_main,
-                                ia, sw_if_index,
-                                1 /* honor unnumbered */,
-  ({
-      ip4_address_t * a;
-
-      a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
-      error = clib_error_return (0, "interface %U has address %U",
-                                 format_vnet_sw_if_index_name, vnm,
-                                 sw_if_index,
-                                 format_ip4_address, a);
-      goto done;
-   }));
-   /* *INDENT-ON* */
-
-{
-  ip4_main_t *im = &ip4_main;
-  u32 fib_index;
-
-  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
-
-  vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
-  im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
-
-  fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
-  vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
-  im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
-}
-
-done:
-return error;
-}
-
-/*?
- * Place the indicated interface into the supplied IPv4 FIB table (also known
- * as a VRF). If the FIB table does not exist, this command creates it. To
- * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
- * FIB table will only be displayed if a route has been added to the table, or
- * an IP Address is assigned to an interface in the table (which adds a route
- * automatically).
- *
- * @note IP addresses added after setting the interface IP table are added to
- * the indicated FIB table. If an IP address is added prior to changing the
- * table then this is an error. The control plane must remove these addresses
- * first and then change the table. VPP will not automatically move the
- * addresses from the old to the new table as it does not know the validity
- * of such a change.
- *
- * @cliexpar
- * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
- * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
-{
-  .path = "set interface ip table",
-  .function = add_del_interface_table,
-  .short_help = "set interface ip table <interface> <table-id>",
-};
-/* *INDENT-ON* */
-
 int
 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
 {
diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c
index ae836a11..9aa880ae 100644
--- a/src/vnet/ip/ip4_source_and_port_range_check.c
+++ b/src/vnet/ip/ip4_source_and_port_range_check.c
@@ -1126,6 +1126,14 @@ ip6_source_and_port_range_check_add_del (ip6_address_t * address,
 					 u16 * low_ports,
 					 u16 * high_ports, int is_add)
 {
+  uint32_t fib_index;
+
+  fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+
+  ASSERT (~0 != fib_index);
+
+  fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY);
+
   return 0;
 }
 
@@ -1138,7 +1146,8 @@ ip4_source_and_port_range_check_add_del (ip4_address_t * address,
 {
   u32 fib_index;
 
-  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id);
+  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
+						 FIB_SOURCE_CLASSIFY);
 
   if (is_add == 0)
     {
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index fa922725..8aef53a9 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -103,6 +103,16 @@ typedef struct
   uword function_opaque;
 } ip6_add_del_interface_address_callback_t;
 
+typedef void (ip6_table_bind_function_t)
+  (struct ip6_main_t * im,
+   uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index);
+
+typedef struct
+{
+  ip6_table_bind_function_t *function;
+  uword function_opaque;
+} ip6_table_bind_callback_t;
+
 /**
  * Enumeration of the FIB table instance types
  */
@@ -183,6 +193,9 @@ typedef struct ip6_main_t
     ip6_add_del_interface_address_callback_t
     * add_del_interface_address_callbacks;
 
+  /** Functions to call when interface to table biding changes. */
+  ip6_table_bind_callback_t *table_bind_callbacks;
+
   /* Template used to generate IP6 neighbor solicitation packets. */
   vlib_packet_template_t discover_neighbor_packet_template;
 
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 5832bd0b..1002f6b6 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -2999,8 +2999,10 @@ ip6_lookup_init (vlib_main_t * vm)
 			 im->lookup_table_nbuckets, im->lookup_table_size);
 
   /* Create FIB with index 0 and table id of 0. */
-  fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0);
-  mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0);
+  fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
+				     FIB_SOURCE_DEFAULT_ROUTE);
+  mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
+				      MFIB_SOURCE_DEFAULT_ROUTE);
 
   {
     pg_node_t *pn;
@@ -3045,103 +3047,6 @@ ip6_lookup_init (vlib_main_t * vm)
 
 VLIB_INIT_FUNCTION (ip6_lookup_init);
 
-static clib_error_t *
-add_del_ip6_interface_table (vlib_main_t * vm,
-			     unformat_input_t * input,
-			     vlib_cli_command_t * cmd)
-{
-  vnet_main_t *vnm = vnet_get_main ();
-  ip_interface_address_t *ia;
-  clib_error_t *error = 0;
-  u32 sw_if_index, table_id;
-
-  sw_if_index = ~0;
-
-  if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
-    {
-      error = clib_error_return (0, "unknown interface `%U'",
-				 format_unformat_error, input);
-      goto done;
-    }
-
-  if (unformat (input, "%d", &table_id))
-    ;
-  else
-    {
-      error = clib_error_return (0, "expected table id `%U'",
-				 format_unformat_error, input);
-      goto done;
-    }
-
-  /*
-   * If the interface already has in IP address, then a change int
-   * VRF is not allowed. The IP address applied must first be removed.
-   * We do not do that automatically here, since VPP has no knowledge
-   * of whether thoses subnets are valid in the destination VRF.
-   */
-  /* *INDENT-OFF* */
-  foreach_ip_interface_address (&ip6_main.lookup_main,
-                                ia, sw_if_index,
-                                1 /* honor unnumbered */,
-  ({
-      ip4_address_t * a;
-
-      a = ip_interface_address_get_address (&ip6_main.lookup_main, ia);
-      error = clib_error_return (0, "interface %U has address %U",
-                                 format_vnet_sw_if_index_name, vnm,
-                                 sw_if_index,
-                                 format_ip6_address, a);
-      goto done;
-  }));
-  /* *INDENT-ON* */
-
-  {
-    u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
-						       table_id);
-
-    vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
-    ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
-
-    fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
-						    table_id);
-
-    vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index);
-    ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index;
-  }
-
-
-done:
-  return error;
-}
-
-/*?
- * Place the indicated interface into the supplied IPv6 FIB table (also known
- * as a VRF). If the FIB table does not exist, this command creates it. To
- * display the current IPv6 FIB table, use the command '<em>show ip6 fib</em>'.
- * FIB table will only be displayed if a route has been added to the table, or
- * an IP Address is assigned to an interface in the table (which adds a route
- * automatically).
- *
- * @note IP addresses added after setting the interface IP table are added to
- * the indicated FIB table. If an IP address is added prior to changing the
- * table then this is an error. The control plane must remove these addresses
- * first and then change the table. VPP will not automatically move the
- * addresses from the old to the new table as it does not know the validity
- * of such a change.
- *
- * @cliexpar
- * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
- * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) =
-{
-  .path = "set interface ip6 table",
-  .function = add_del_ip6_interface_table,
-  .short_help = "set interface ip6 table <interface> <table-id>"
-};
-/* *INDENT-ON* */
-
 void
 ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip,
 						  u8 * mac)
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index 62cf23ac..56f33ac8 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -250,6 +250,26 @@ format_ip6_neighbor_ip6_entry (u8 * s, va_list * va)
   return s;
 }
 
+static void
+ip6_neighbor_adj_fib_remove (ip6_neighbor_t * n, uint32_t fib_index)
+{
+  if (FIB_NODE_INDEX_INVALID != n->fib_entry_index)
+    {
+      fib_prefix_t pfx = {
+	.fp_len = 128,
+	.fp_proto = FIB_PROTOCOL_IP6,
+	.fp_addr.ip6 = n->key.ip6_address,
+      };
+      fib_table_entry_path_remove (fib_index,
+				   &pfx,
+				   FIB_SOURCE_ADJ,
+				   DPO_PROTO_IP6,
+				   &pfx.fp_addr,
+				   n->key.sw_if_index, ~0,
+				   1, FIB_ROUTE_PATH_FLAG_NONE);
+    }
+}
+
 static clib_error_t *
 ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm,
 				   u32 sw_if_index, u32 flags)
@@ -273,22 +293,10 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm,
 	{
 	  n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]);
 	  mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
-	  if (FIB_NODE_INDEX_INVALID != n->fib_entry_index)
-	    {
-	      fib_prefix_t pfx = {
-		.fp_len = 128,
-		.fp_proto = FIB_PROTOCOL_IP6,
-		.fp_addr.ip6 = n->key.ip6_address,
-	      };
-	      fib_table_entry_path_remove
-		(ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index),
-		 &pfx,
-		 FIB_SOURCE_ADJ,
-		 DPO_PROTO_IP6,
-		 &pfx.fp_addr,
-		 n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
-	      pool_put (nm->neighbor_pool, n);
-	    }
+	  ip6_neighbor_adj_fib_remove (n,
+				       ip6_fib_table_get_index_for_sw_if_index
+				       (n->key.sw_if_index));
+	  pool_put (nm->neighbor_pool, n);
 	}
       vec_free (to_delete);
     }
@@ -579,6 +587,24 @@ ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
     }
 }
 
+
+static void
+ip6_neighbor_adj_fib_add (ip6_neighbor_t * n, uint32_t fib_index)
+{
+  fib_prefix_t pfx = {
+    .fp_len = 128,
+    .fp_proto = FIB_PROTOCOL_IP6,
+    .fp_addr.ip6 = n->key.ip6_address,
+  };
+
+  n->fib_entry_index =
+    fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
+			      FIB_ENTRY_FLAG_ATTACHED,
+			      DPO_PROTO_IP6, &pfx.fp_addr,
+			      n->key.sw_if_index, ~0, 1, NULL,
+			      FIB_ROUTE_PATH_FLAG_NONE);
+}
+
 int
 vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
 				u32 sw_if_index,
@@ -633,21 +659,9 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
        */
       if (!is_no_fib_entry)
 	{
-	  fib_prefix_t pfx = {
-	    .fp_len = 128,
-	    .fp_proto = FIB_PROTOCOL_IP6,
-	    .fp_addr.ip6 = k.ip6_address,
-	  };
-	  u32 fib_index;
-
-	  fib_index =
-	    ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index);
-	  n->fib_entry_index =
-	    fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
-				      FIB_ENTRY_FLAG_ATTACHED,
-				      DPO_PROTO_IP6, &pfx.fp_addr,
-				      n->key.sw_if_index, ~0, 1, NULL,
-				      FIB_ROUTE_PATH_FLAG_NONE);
+	  ip6_neighbor_adj_fib_add (n,
+				    ip6_fib_table_get_index_for_sw_if_index
+				    (n->key.sw_if_index));
 	}
       else
 	{
@@ -3843,6 +3857,33 @@ ip6_set_neighbor_limit (u32 neighbor_limit)
   return 0;
 }
 
+static void
+ip6_neighbor_table_bind (ip6_main_t * im,
+			 uword opaque,
+			 u32 sw_if_index,
+			 u32 new_fib_index, u32 old_fib_index)
+{
+  ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+  ip6_neighbor_t *n = NULL;
+  u32 i, *to_re_add = 0;
+
+  /* *INDENT-OFF* */
+  pool_foreach (n, nm->neighbor_pool,
+  ({
+    if (n->key.sw_if_index == sw_if_index)
+      vec_add1 (to_re_add, n - nm->neighbor_pool);
+  }));
+  /* *INDENT-ON* */
+
+  for (i = 0; i < vec_len (to_re_add); i++)
+    {
+      n = pool_elt_at_index (nm->neighbor_pool, to_re_add[i]);
+      ip6_neighbor_adj_fib_remove (n, old_fib_index);
+      ip6_neighbor_adj_fib_add (n, new_fib_index);
+    }
+  vec_free (to_re_add);
+}
+
 static clib_error_t *
 ip6_neighbor_init (vlib_main_t * vm)
 {
@@ -3874,6 +3915,11 @@ ip6_neighbor_init (vlib_main_t * vm)
   cb.function_opaque = 0;
   vec_add1 (im->add_del_interface_address_callbacks, cb);
 
+  ip6_table_bind_callback_t cbt;
+  cbt.function = ip6_neighbor_table_bind;
+  cbt.function_opaque = 0;
+  vec_add1 (im->table_bind_callbacks, cbt);
+
   mhash_init (&nm->pending_resolutions_by_address,
 	      /* value size */ sizeof (uword),
 	      /* key size */ sizeof (ip6_address_t));
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index bba65ab4..384ec3e0 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -699,12 +699,58 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp,
   REPLY_MACRO (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY);
 }
 
+void
+ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api)
+{
+  u32 fib_index, mfib_index;
+
+  /*
+   * ignore action on the default table - this is always present
+   * and cannot be added nor deleted from the API
+   */
+  if (0 != table_id)
+    {
+      /*
+       * The API holds only one lock on the table.
+       * i.e. it can be added many times via the API but needs to be
+       * deleted only once.
+       * The FIB index for unicast and multicast is not necessarily the
+       * same, since internal VPP systesm (like LISP and SR) create
+       * their own unicast tables.
+       */
+      fib_index = fib_table_find (fproto, table_id);
+      mfib_index = mfib_table_find (fproto, table_id);
+
+      if (~0 != fib_index)
+	{
+	  fib_table_unlock (fib_index, fproto,
+			    (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI));
+	}
+      if (~0 != mfib_index)
+	{
+	  mfib_table_unlock (mfib_index, fproto,
+			     (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI));
+	}
+    }
+}
+
 void
 vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp)
 {
   vl_api_ip_table_add_del_reply_t *rmp;
+  fib_protocol_t fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4);
+  u32 table_id = ntohl (mp->table_id);
   int rv = 0;
 
+  if (mp->is_add)
+    {
+      ip_table_create (fproto, table_id, 1);
+    }
+  else
+    {
+      ip_table_delete (fproto, table_id, 1);
+    }
+
   REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY);
 }
 
@@ -866,18 +912,21 @@ add_del_route_check (fib_protocol_t table_proto,
 		     u32 next_hop_sw_if_index,
 		     dpo_proto_t next_hop_table_proto,
 		     u32 next_hop_table_id,
-		     u8 create_missing_tables,
 		     u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
 
+  /* Temporaray whilst I do the CSIT dance */
+  u8 create_missing_tables = 1;
+
   *fib_index = fib_table_find (table_proto, ntohl (table_id));
   if (~0 == *fib_index)
     {
       if (create_missing_tables)
 	{
 	  *fib_index = fib_table_find_or_create_and_lock (table_proto,
-							  ntohl (table_id));
+							  ntohl (table_id),
+							  FIB_SOURCE_API);
 	}
       else
 	{
@@ -918,12 +967,14 @@ add_del_route_check (fib_protocol_t table_proto,
 		*next_hop_fib_index =
 		  mfib_table_find_or_create_and_lock (fib_nh_proto,
 						      ntohl
-						      (next_hop_table_id));
+						      (next_hop_table_id),
+						      MFIB_SOURCE_API);
 	      else
 		*next_hop_fib_index =
 		  fib_table_find_or_create_and_lock (fib_nh_proto,
 						     ntohl
-						     (next_hop_table_id));
+						     (next_hop_table_id),
+						     FIB_SOURCE_API);
 	    }
 	  else
 	    {
@@ -948,8 +999,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 			    mp->next_hop_sw_if_index,
 			    DPO_PROTO_IP4,
 			    mp->next_hop_table_id,
-			    mp->create_vrf_if_needed, 0,
-			    &fib_index, &next_hop_fib_index);
+			    0, &fib_index, &next_hop_fib_index);
 
   if (0 != rv)
     return (rv);
@@ -1008,8 +1058,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 			    mp->next_hop_sw_if_index,
 			    DPO_PROTO_IP6,
 			    mp->next_hop_table_id,
-			    mp->create_vrf_if_needed, 0,
-			    &fib_index, &next_hop_fib_index);
+			    0, &fib_index, &next_hop_fib_index);
 
   if (0 != rv)
     return (rv);
@@ -1074,27 +1123,57 @@ vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
   REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY);
 }
 
+void
+ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api)
+{
+  u32 fib_index, mfib_index;
+
+  /*
+   * ignore action on the default table - this is always present
+   * and cannot be added nor deleted from the API
+   */
+  if (0 != table_id)
+    {
+      /*
+       * The API holds only one lock on the table.
+       * i.e. it can be added many times via the API but needs to be
+       * deleted only once.
+       * The FIB index for unicast and multicast is not necessarily the
+       * same, since internal VPP systesm (like LISP and SR) create
+       * their own unicast tables.
+       */
+      fib_index = fib_table_find (fproto, table_id);
+      mfib_index = mfib_table_find (fproto, table_id);
+
+      if (~0 == fib_index)
+	{
+	  fib_table_find_or_create_and_lock (fproto, table_id,
+					     (is_api ?
+					      FIB_SOURCE_API :
+					      FIB_SOURCE_CLI));
+	}
+      if (~0 == mfib_index)
+	{
+	  mfib_table_find_or_create_and_lock (fproto, table_id,
+					      (is_api ?
+					       MFIB_SOURCE_API :
+					       MFIB_SOURCE_CLI));
+	}
+    }
+}
+
 static int
 add_del_mroute_check (fib_protocol_t table_proto,
 		      u32 table_id,
-		      u32 next_hop_sw_if_index,
-		      u8 is_local, u8 create_missing_tables, u32 * fib_index)
+		      u32 next_hop_sw_if_index, u8 is_local, u32 * fib_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
 
   *fib_index = mfib_table_find (table_proto, ntohl (table_id));
   if (~0 == *fib_index)
     {
-      if (create_missing_tables)
-	{
-	  *fib_index = mfib_table_find_or_create_and_lock (table_proto,
-							   ntohl (table_id));
-	}
-      else
-	{
-	  /* No such VRF, and we weren't asked to create one */
-	  return VNET_API_ERROR_NO_SUCH_FIB;
-	}
+      /* No such table */
+      return VNET_API_ERROR_NO_SUCH_FIB;
     }
 
   if (~0 != ntohl (next_hop_sw_if_index))
@@ -1163,8 +1242,7 @@ api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
   rv = add_del_mroute_check (fproto,
 			     mp->table_id,
 			     mp->next_hop_sw_if_index,
-			     mp->is_local,
-			     mp->create_vrf_if_needed, &fib_index);
+			     mp->is_local, &fib_index);
 
   if (0 != rv)
     return (rv);
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index 5537bb04..667c6791 100755
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -687,6 +687,78 @@ done:
   return error;
 }
 
+clib_error_t *
+vnet_ip_table_cmd (vlib_main_t * vm,
+		   unformat_input_t * main_input,
+		   vlib_cli_command_t * cmd, fib_protocol_t fproto)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  clib_error_t *error = NULL;
+  u32 table_id, is_add;
+
+  is_add = 1;
+  table_id = ~0;
+
+  /* Get a line of input. */
+  if (!unformat_user (main_input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%d", &table_id))
+	;
+      else if (unformat (line_input, "del"))
+	is_add = 0;
+      else if (unformat (line_input, "add"))
+	is_add = 1;
+      else
+	{
+	  error = unformat_parse_error (line_input);
+	  goto done;
+	}
+    }
+
+  if (~0 == table_id)
+    {
+      error = clib_error_return (0, "No table id");
+      goto done;
+    }
+  else if (0 == table_id)
+    {
+      error = clib_error_return (0, "Can't change the default table");
+      goto done;
+    }
+  else
+    {
+      if (is_add)
+	{
+	  ip_table_create (fproto, table_id, 0);
+	}
+      else
+	{
+	  ip_table_delete (fproto, table_id, 0);
+	}
+    }
+
+done:
+  unformat_free (line_input);
+  return error;
+}
+
+clib_error_t *
+vnet_ip4_table_cmd (vlib_main_t * vm,
+		    unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+  return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP4));
+}
+
+clib_error_t *
+vnet_ip6_table_cmd (vlib_main_t * vm,
+		    unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+  return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6));
+}
+
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
   .path = "ip",
@@ -749,6 +821,159 @@ VLIB_CLI_COMMAND (ip_route_command, static) = {
   .function = vnet_ip_route_cmd,
   .is_mp_safe = 1,
 };
+
+/* *INDENT-ON* */
+/*?
+ * This command is used to add or delete IPv4  Tables. All
+ * Tables must be explicitly added before that can be used. Creating a
+ * table will add both unicast and multicast FIBs
+ *
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip4_table_command, static) = {
+  .path = "ip table",
+  .short_help = "ip table [add|del] <table-id>",
+  .function = vnet_ip4_table_cmd,
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-ON* */
+/*?
+ * This command is used to add or delete IPv4  Tables. All
+ * Tables must be explicitly added before that can be used. Creating a
+ * table will add both unicast and multicast FIBs
+ *
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_table_command, static) = {
+  .path = "ip6 table",
+  .short_help = "ip6 table [add|del] <table-id>",
+  .function = vnet_ip6_table_cmd,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+ip_table_bind_cmd (vlib_main_t * vm,
+                   unformat_input_t * input,
+                   vlib_cli_command_t * cmd,
+                   fib_protocol_t fproto)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  clib_error_t *error = 0;
+  u32 sw_if_index, table_id;
+  int rv;
+
+  sw_if_index = ~0;
+
+  if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+    {
+      error = clib_error_return (0, "unknown interface `%U'",
+				 format_unformat_error, input);
+      goto done;
+    }
+
+  if (unformat (input, "%d", &table_id))
+    ;
+  else
+    {
+      error = clib_error_return (0, "expected table id `%U'",
+				 format_unformat_error, input);
+      goto done;
+    }
+
+  rv = ip_table_bind (fproto, sw_if_index, table_id, 0);
+
+  if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv)
+    {
+      error = clib_error_return (0, "IP addresses are still present on %U",
+                                 format_vnet_sw_if_index_name,
+                                 vnet_get_main(),
+                                 sw_if_index);
+    }
+  else if (VNET_API_ERROR_NO_SUCH_FIB == rv)
+    {
+      error = clib_error_return (0, "no such table %d", table_id);
+    }
+  else if (0 != rv)
+    {
+      error = clib_error_return (0, "unknown error");
+    }
+
+ done:
+  return error;
+}
+
+static clib_error_t *
+ip4_table_bind_cmd (vlib_main_t * vm,
+                    unformat_input_t * input,
+                    vlib_cli_command_t * cmd)
+{
+  return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP4));
+}
+
+static clib_error_t *
+ip6_table_bind_cmd (vlib_main_t * vm,
+                    unformat_input_t * input,
+                    vlib_cli_command_t * cmd)
+{
+  return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP6));
+}
+
+/*?
+ * Place the indicated interface into the supplied IPv4 FIB table (also known
+ * as a VRF). If the FIB table does not exist, this command creates it. To
+ * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
+ * FIB table will only be displayed if a route has been added to the table, or
+ * an IP Address is assigned to an interface in the table (which adds a route
+ * automatically).
+ *
+ * @note IP addresses added after setting the interface IP table are added to
+ * the indicated FIB table. If an IP address is added prior to changing the
+ * table then this is an error. The control plane must remove these addresses
+ * first and then change the table. VPP will not automatically move the
+ * addresses from the old to the new table as it does not know the validity
+ * of such a change.
+ *
+ * @cliexpar
+ * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
+ * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
+{
+  .path = "set interface ip table",
+  .function = ip4_table_bind_cmd,
+  .short_help = "set interface ip table <interface> <table-id>",
+};
+/* *INDENT-ON* */
+
+/*?
+ * Place the indicated interface into the supplied IPv6 FIB table (also known
+ * as a VRF). If the FIB table does not exist, this command creates it. To
+ * display the current IPv6 FIB table, use the command '<em>show ip6 fib</em>'.
+ * FIB table will only be displayed if a route has been added to the table, or
+ * an IP Address is assigned to an interface in the table (which adds a route
+ * automatically).
+ *
+ * @note IP addresses added after setting the interface IP table are added to
+ * the indicated FIB table. If an IP address is added prior to changing the
+ * table then this is an error. The control plane must remove these addresses
+ * first and then change the table. VPP will not automatically move the
+ * addresses from the old to the new table as it does not know the validity
+ * of such a change.
+ *
+ * @cliexpar
+ * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
+ * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) =
+{
+  .path = "set interface ip6 table",
+  .function = ip6_table_bind_cmd,
+  .short_help = "set interface ip6 table <interface> <table-id>"
+};
 /* *INDENT-ON* */
 
 clib_error_t *
diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c
index e832c23f..a0c05e85 100644
--- a/src/vnet/lisp-gpe/interface.c
+++ b/src/vnet/lisp-gpe/interface.c
@@ -505,12 +505,14 @@ lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id)
 {
   fib_node_index_t fib_index;
 
-  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
+  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id,
+						 FIB_SOURCE_LISP);
   vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
   ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
   ip4_sw_interface_enable_disable (sw_if_index, 1);
 
-  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id);
+  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id,
+						 FIB_SOURCE_LISP);
   vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
   ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
   ip6_sw_interface_enable_disable (sw_if_index, 1);
@@ -530,7 +532,7 @@ lisp_gpe_tenant_del_default_routes (u32 table_id)
 
     fib_index = fib_table_find (prefix.fp_proto, table_id);
     fib_table_entry_special_remove (fib_index, &prefix, FIB_SOURCE_LISP);
-    fib_table_unlock (fib_index, prefix.fp_proto);
+    fib_table_unlock (fib_index, prefix.fp_proto, FIB_SOURCE_LISP);
   }
 }
 
@@ -549,7 +551,8 @@ lisp_gpe_tenant_add_default_routes (u32 table_id)
     /*
      * Add a deafult route that results in a control plane punt DPO
      */
-    fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id);
+    fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id,
+						   FIB_SOURCE_LISP);
     fib_table_entry_special_dpo_add (fib_index, &prefix, FIB_SOURCE_LISP,
 				     FIB_ENTRY_FLAG_EXCLUSIVE,
 				     lisp_cp_dpo_get (fib_proto_to_dpo
diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
index d7d3cb86..0a8dc039 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
@@ -66,6 +66,7 @@ ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix)
       /* create a new src FIB.  */
       src_fib_index =
 	fib_table_create_and_lock (dst_fib_prefix.fp_proto,
+				   FIB_SOURCE_LISP,
 				   "LISP-src for [%d,%U]",
 				   dst_fib_index,
 				   format_fib_prefix, &dst_fib_prefix);
@@ -180,7 +181,8 @@ ip_src_dst_fib_del_route (u32 src_fib_index,
        */
       fib_table_entry_special_remove (dst_fib_index,
 				      &dst_fib_prefix, FIB_SOURCE_LISP);
-      fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto);
+      fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto,
+			FIB_SOURCE_LISP);
     }
 }
 
@@ -544,7 +546,8 @@ add_ip_fwd_entry (lisp_gpe_main_t * lgm,
   lfe->tenant = lisp_gpe_tenant_find_or_create (lfe->key->vni);
   lfe->eid_table_id = a->table_id;
   lfe->eid_fib_index = fib_table_find_or_create_and_lock (fproto,
-							  lfe->eid_table_id);
+							  lfe->eid_table_id,
+							  FIB_SOURCE_LISP);
   lfe->is_src_dst = a->is_src_dst;
 
   if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type)
@@ -578,7 +581,7 @@ del_ip_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe)
 
   fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ?
 	    FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
-  fib_table_unlock (lfe->eid_fib_index, fproto);
+  fib_table_unlock (lfe->eid_fib_index, fproto, FIB_SOURCE_LISP);
 
   hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key);
   clib_mem_free (lfe->key);
diff --git a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c
index b234d9dc..26664f53 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c
@@ -89,13 +89,15 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id)
 {
   fib_node_index_t fib_index;
 
-  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
+  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id,
+						 FIB_SOURCE_LISP);
   ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
 
   vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
   ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
 
-  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id);
+  fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id,
+						 FIB_SOURCE_LISP);
   ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
 
   vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
@@ -105,9 +107,13 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id)
 static void
 lisp_gpe_sub_interface_unset_table (u32 sw_if_index, u32 table_id)
 {
+  fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index],
+		    FIB_PROTOCOL_IP4, FIB_SOURCE_LISP);
   ip4_main.fib_index_by_sw_if_index[sw_if_index] = 0;
   ip4_sw_interface_enable_disable (sw_if_index, 0);
 
+  fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index],
+		    FIB_PROTOCOL_IP6, FIB_SOURCE_LISP);
   ip6_main.fib_index_by_sw_if_index[sw_if_index] = 0;
   ip6_sw_interface_enable_disable (sw_if_index, 0);
 }
@@ -185,6 +191,7 @@ lisp_gpe_sub_interface_unlock (index_t l3si)
 
   l3s = lisp_gpe_sub_interface_get_i (l3si);
 
+  ASSERT (0 != l3s->locks);
   l3s->locks--;
 
   if (0 == l3s->locks)
diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c
index 1849a3a4..b2482580 100644
--- a/src/vnet/mfib/ip4_mfib.c
+++ b/src/vnet/mfib/ip4_mfib.c
@@ -33,7 +33,8 @@ static const mfib_prefix_t ip4_specials[] = {
 };
 
 static u32
-ip4_create_mfib_with_table_id (u32 table_id)
+ip4_create_mfib_with_table_id (u32 table_id,
+                               mfib_source_t src)
 {
     mfib_table_t *mfib_table;
 
@@ -53,7 +54,7 @@ ip4_create_mfib_with_table_id (u32 table_id)
         mfib_table->v4.table_id =
             table_id;
 
-    mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4);
+    mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4, src);
 
     /*
      * add the special entries into the new FIB
@@ -113,14 +114,15 @@ ip4_mfib_table_destroy (ip4_mfib_t *mfib)
 }
 
 u32
-ip4_mfib_table_find_or_create_and_lock (u32 table_id)
+ip4_mfib_table_find_or_create_and_lock (u32 table_id,
+                                        mfib_source_t src)
 {
     u32 index;
 
     index = ip4_mfib_index_from_table_id(table_id);
     if (~0 == index)
-        return ip4_create_mfib_with_table_id(table_id);
-    mfib_table_lock(index, FIB_PROTOCOL_IP4);
+        return ip4_create_mfib_with_table_id(table_id, src);
+    mfib_table_lock(index, FIB_PROTOCOL_IP4, src);
 
     return (index);
 }
diff --git a/src/vnet/mfib/ip4_mfib.h b/src/vnet/mfib/ip4_mfib.h
index ea682651..e31fb744 100644
--- a/src/vnet/mfib/ip4_mfib.h
+++ b/src/vnet/mfib/ip4_mfib.h
@@ -72,8 +72,9 @@ ip4_mfib_get (u32 index)
  * @returns A pointer to the retrieved or created fib.
  *
  */
-extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id);
-extern u32 ip4_mfib_table_create_and_lock(void);
+extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id,
+                                                  mfib_source_t src);
+extern u32 ip4_mfib_table_create_and_lock(mfib_source_t src);
 
 static inline
 u32 ip4_mfib_index_from_table_id (u32 table_id)
diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c
index 5e48e919..e4861330 100644
--- a/src/vnet/mfib/ip6_mfib.c
+++ b/src/vnet/mfib/ip6_mfib.c
@@ -151,7 +151,8 @@ static const ip6_mfib_special_t ip6_mfib_specials[] =
 
 
 static u32
-ip6_create_mfib_with_table_id (u32 table_id)
+ip6_create_mfib_with_table_id (u32 table_id,
+                               mfib_source_t src)
 {
     mfib_table_t *mfib_table;
     mfib_prefix_t pfx = {
@@ -182,7 +183,7 @@ ip6_create_mfib_with_table_id (u32 table_id)
         mfib_table->v6.table_id =
             table_id;
 
-    mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6);
+    mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6, src);
 
     mfib_table->v6.rhead =
         clib_mem_alloc_aligned (sizeof(*mfib_table->v6.rhead),
@@ -297,14 +298,15 @@ ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable)
 }
 
 u32
-ip6_mfib_table_find_or_create_and_lock (u32 table_id)
+ip6_mfib_table_find_or_create_and_lock (u32 table_id,
+                                        mfib_source_t src)
 {
     u32 index;
 
     index = ip6_mfib_index_from_table_id(table_id);
     if (~0 == index)
-        return ip6_create_mfib_with_table_id(table_id);
-    mfib_table_lock(index, FIB_PROTOCOL_IP6);
+        return ip6_create_mfib_with_table_id(table_id, src);
+    mfib_table_lock(index, FIB_PROTOCOL_IP6, src);
 
     return (index);
 }
diff --git a/src/vnet/mfib/ip6_mfib.h b/src/vnet/mfib/ip6_mfib.h
index adaa7ec2..ea81b553 100644
--- a/src/vnet/mfib/ip6_mfib.h
+++ b/src/vnet/mfib/ip6_mfib.h
@@ -79,8 +79,9 @@ ip6_mfib_get (u32 index)
  * @returns A pointer to the retrieved or created fib.
  *
  */
-extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id);
-extern u32 ip6_mfib_table_create_and_lock(void);
+extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id,
+                                                  mfib_source_t src);
+extern u32 ip6_mfib_table_create_and_lock(mfib_source_t src);
 
 
 static inline
diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c
index 804e10ab..2302b9a1 100644
--- a/src/vnet/mfib/mfib_entry.c
+++ b/src/vnet/mfib/mfib_entry.c
@@ -334,6 +334,17 @@ mfib_entry_get_best_src (const mfib_entry_t *mfib_entry)
     return (bsrc);
 }
 
+int
+mfib_entry_is_sourced (fib_node_index_t mfib_entry_index,
+                       mfib_source_t source)
+{
+    mfib_entry_t *mfib_entry;
+
+    mfib_entry = mfib_entry_get(mfib_entry_index);
+
+    return (NULL != mfib_entry_src_find(mfib_entry, source, NULL));
+}
+
 static void
 mfib_entry_src_flush (mfib_entry_src_t *msrc)
 {
diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h
index d4377878..96ee49f7 100644
--- a/src/vnet/mfib/mfib_entry.h
+++ b/src/vnet/mfib/mfib_entry.h
@@ -130,6 +130,8 @@ extern void mfib_entry_unlock(fib_node_index_t fib_entry_index);
 extern void mfib_entry_get_prefix(fib_node_index_t fib_entry_index,
                                   mfib_prefix_t *pfx);
 extern u32 mfib_entry_get_fib_index(fib_node_index_t fib_entry_index);
+extern int mfib_entry_is_sourced(fib_node_index_t fib_entry_index,
+                                 mfib_source_t source);
 
 extern void mfib_entry_contribute_forwarding(
     fib_node_index_t mfib_entry_index,
diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c
index 7ffe8941..e5550adc 100644
--- a/src/vnet/mfib/mfib_table.c
+++ b/src/vnet/mfib/mfib_table.c
@@ -424,7 +424,8 @@ mfib_table_find (fib_protocol_t proto,
 
 u32
 mfib_table_find_or_create_and_lock (fib_protocol_t proto,
-                                    u32 table_id)
+                                    u32 table_id,
+                                    mfib_source_t src)
 {
     mfib_table_t *mfib_table;
     fib_node_index_t fi;
@@ -432,10 +433,10 @@ mfib_table_find_or_create_and_lock (fib_protocol_t proto,
     switch (proto)
     {
     case FIB_PROTOCOL_IP4:
-        fi = ip4_mfib_table_find_or_create_and_lock(table_id);
+        fi = ip4_mfib_table_find_or_create_and_lock(table_id, src);
         break;
     case FIB_PROTOCOL_IP6:
-        fi = ip6_mfib_table_find_or_create_and_lock(table_id);
+        fi = ip6_mfib_table_find_or_create_and_lock(table_id, src);
         break;
     case FIB_PROTOCOL_MPLS:
     default:
@@ -451,6 +452,59 @@ mfib_table_find_or_create_and_lock (fib_protocol_t proto,
     return (fi);
 }
 
+/**
+ * @brief Table flush context. Store the indicies of matching FIB entries
+ * that need to be removed.
+ */
+typedef struct mfib_table_flush_ctx_t_
+{
+    /**
+     * The list of entries to flush
+     */
+    fib_node_index_t *mftf_entries;
+
+    /**
+     * The source we are flushing
+     */
+    mfib_source_t mftf_source;
+} mfib_table_flush_ctx_t;
+
+static int
+mfib_table_flush_cb (fib_node_index_t mfib_entry_index,
+                     void *arg)
+{
+    mfib_table_flush_ctx_t *ctx = arg;
+
+    if (mfib_entry_is_sourced(mfib_entry_index, ctx->mftf_source))
+    {
+        vec_add1(ctx->mftf_entries, mfib_entry_index);
+    }
+    return (1);
+}
+
+void
+mfib_table_flush (u32 mfib_index,
+                  fib_protocol_t proto,
+                  mfib_source_t source)
+{
+    fib_node_index_t *mfib_entry_index;
+    mfib_table_flush_ctx_t ctx = {
+        .mftf_entries = NULL,
+        .mftf_source = source,
+    };
+
+    mfib_table_walk(mfib_index, proto,
+                    mfib_table_flush_cb,
+                    &ctx);
+
+    vec_foreach(mfib_entry_index, ctx.mftf_entries)
+    {
+        mfib_table_entry_delete_index(*mfib_entry_index, source);
+    }
+
+    vec_free(ctx.mftf_entries);
+}
+
 static void
 mfib_table_destroy (mfib_table_t *mfib_table)
 {
@@ -472,27 +526,43 @@ mfib_table_destroy (mfib_table_t *mfib_table)
 
 void
 mfib_table_unlock (u32 fib_index,
-                   fib_protocol_t proto)
+                   fib_protocol_t proto,
+                   mfib_source_t source)
 {
     mfib_table_t *mfib_table;
 
     mfib_table = mfib_table_get(fib_index, proto);
-    mfib_table->mft_locks--;
+    mfib_table->mft_locks[source]--;
+    mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]--;
+
+    if (0 == mfib_table->mft_locks[source])
+    {
+        /*
+         * The source no longer needs the table. flush any routes
+         * from it just in case
+         */
+        mfib_table_flush(fib_index, proto, source);
+    }
 
-    if (0 == mfib_table->mft_locks)
+    if (0 == mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS])
     {
-        mfib_table_destroy(mfib_table);
+        /*
+         * no more locak from any source - kill it
+         */
+	mfib_table_destroy(mfib_table);
     }
 }
 
 void
 mfib_table_lock (u32 fib_index,
-                 fib_protocol_t proto)
+                 fib_protocol_t proto,
+                 mfib_source_t source)
 {
     mfib_table_t *mfib_table;
 
     mfib_table = mfib_table_get(fib_index, proto);
-    mfib_table->mft_locks++;
+    mfib_table->mft_locks[source]++;
+    mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]++;
 }
 
 void
diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h
index 83aa04ef..c6b0b097 100644
--- a/src/vnet/mfib/mfib_table.h
+++ b/src/vnet/mfib/mfib_table.h
@@ -22,6 +22,12 @@
 
 #include <vnet/mfib/mfib_types.h>
 
+/**
+ * Keep a lock per-source and a total
+ */
+#define MFIB_TABLE_N_LOCKS (MFIB_N_SOURCES+1)
+#define MFIB_TABLE_TOTAL_LOCKS MFIB_N_SOURCES
+
 /**
  * @brief
  *   A protocol Independent IP multicast FIB table
@@ -47,7 +53,7 @@ typedef struct mfib_table_t_
     /**
      * number of locks on the table
      */
-    u16 mft_locks;
+    u16 mft_locks[MFIB_TABLE_N_LOCKS];
 
     /**
      * Table ID (hash key) for this FIB.
@@ -259,7 +265,8 @@ extern fib_node_index_t mfib_table_entry_special_add(u32 fib_index,
  *  the source to flush
  */
 extern void mfib_table_flush(u32 fib_index,
-                             fib_protocol_t proto);
+                             fib_protocol_t proto,
+                             mfib_source_t source);
 
 /**
  * @brief
@@ -307,9 +314,13 @@ extern u32 mfib_table_find(fib_protocol_t proto, u32 table_id);
  *
  * @return fib_index
  *  The index of the FIB
+ *
+ * @param source
+ *  The ID of the client/source.
  */
 extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto,
-                                              u32 table_id);
+                                              u32 table_id,
+                                              mfib_source_t source);
 
 
 /**
@@ -321,9 +332,13 @@ extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto,
  *
  * @paran proto
  *  The protocol of the FIB (and thus the entries therein)
+ *
+ * @param source
+ *  The ID of the client/source.
  */
 extern void mfib_table_unlock(u32 fib_index,
-                              fib_protocol_t proto);
+                              fib_protocol_t proto,
+                              mfib_source_t source);
 
 /**
  * @brief
@@ -335,9 +350,13 @@ extern void mfib_table_unlock(u32 fib_index,
  *
  * @paran proto
  *  The protocol of the FIB (and thus the entries therein)
+ *
+ * @param source
+ *  The ID of the client/source.
  */
 extern void mfib_table_lock(u32 fib_index,
-                            fib_protocol_t proto);
+                            fib_protocol_t proto,
+                            mfib_source_t source);
 
 /**
  * @brief
diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c
index 57787eca..3055844d 100644
--- a/src/vnet/mfib/mfib_test.c
+++ b/src/vnet/mfib/mfib_test.c
@@ -22,6 +22,7 @@
 #include <vnet/fib/fib_path_list.h>
 #include <vnet/fib/fib_test.h>
 #include <vnet/fib/fib_table.h>
+#include <vnet/fib/mpls_fib.h>
 
 #include <vnet/dpo/replicate_dpo.h>
 #include <vnet/adj/adj_mcast.h>
@@ -366,7 +367,7 @@ mfib_test_i (fib_protocol_t PROTO,
     MFIB_TEST(3 == adj_mcast_db_size(), "3 MCAST adjs");
 
     /* Find or create FIB table 11 */
-    fib_index = mfib_table_find_or_create_and_lock(PROTO, 11);
+    fib_index = mfib_table_find_or_create_and_lock(PROTO, 11, MFIB_SOURCE_API);
 
     mfib_prefix_t pfx_dft = {
         .fp_len = 0,
@@ -1113,9 +1114,10 @@ mfib_test_i (fib_protocol_t PROTO,
     /*
      * MPLS enable an interface so we get the MPLS table created
      */
+    mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
     mpls_sw_interface_enable_disable(&mpls_main,
                                      tm->hw[0]->sw_if_index,
-                                     1);
+                                     1, 0);
 
     lfei = fib_table_entry_update_one_path(0, // default MPLS Table
                                            &pfx_3500,
@@ -1192,7 +1194,7 @@ mfib_test_i (fib_protocol_t PROTO,
     /*
      * Unlock the table - it's the last lock so should be gone thereafter
      */
-    mfib_table_unlock(fib_index, PROTO);
+    mfib_table_unlock(fib_index, PROTO, MFIB_SOURCE_API);
 
     MFIB_TEST((FIB_NODE_INDEX_INVALID ==
                mfib_table_find(PROTO, fib_index)),
@@ -1207,7 +1209,8 @@ mfib_test_i (fib_protocol_t PROTO,
      */
     mpls_sw_interface_enable_disable(&mpls_main,
                                      tm->hw[0]->sw_if_index,
-                                     0);
+                                     0, 0);
+    mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
 
     /*
      * test we've leaked no resources
diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h
index 863fad16..50aede04 100644
--- a/src/vnet/mfib/mfib_types.h
+++ b/src/vnet/mfib/mfib_types.h
@@ -166,9 +166,10 @@ typedef enum mfib_source_t_
     MFIB_SOURCE_VXLAN,
     MFIB_SOURCE_DHCP,
     MFIB_SOURCE_SRv6,
-    MFIB_SOURCE_DEFAULT_ROUTE,
     MFIB_SOURCE_GTPU,
     MFIB_SOURCE_VXLAN_GPE,
+    MFIB_SOURCE_RR,
+    MFIB_SOURCE_DEFAULT_ROUTE,
 } mfib_source_t;
 
 #define MFIB_SOURCE_NAMES {                        \
@@ -178,11 +179,14 @@ typedef enum mfib_source_t_
     [MFIB_SOURCE_DHCP] = "DHCP",                   \
     [MFIB_SOURCE_VXLAN] = "VXLAN",                 \
     [MFIB_SOURCE_SRv6] = "SRv6",                   \
-    [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \
     [MFIB_SOURCE_GTPU] = "GTPU",                   \
     [MFIB_SOURCE_VXLAN_GPE] = "VXLAN-GPE",         \
+    [MFIB_SOURCE_RR] = "Recursive-resolution",     \
+    [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \
 }
 
+#define MFIB_N_SOURCES (MFIB_SOURCE_DEFAULT_ROUTE)
+
 /**
  * \brief Compare two prefixes for equality
  */
diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c
index a085aaa2..d7c8e7d3 100644
--- a/src/vnet/mpls/interface.c
+++ b/src/vnet/mpls/interface.c
@@ -35,25 +35,33 @@ mpls_sw_interface_is_enabled (u32 sw_if_index)
     return (mm->mpls_enabled_by_sw_if_index[sw_if_index]);
 }
 
-void
+int
 mpls_sw_interface_enable_disable (mpls_main_t * mm,
                                   u32 sw_if_index,
-                                  u8 is_enable)
+                                  u8 is_enable,
+                                  u8 is_api)
 {
   fib_node_index_t lfib_index;
 
   vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0);
 
+  lfib_index = fib_table_find(FIB_PROTOCOL_MPLS,
+                              MPLS_FIB_DEFAULT_TABLE_ID);
+
+  if (~0 == lfib_index)
+       return VNET_API_ERROR_NO_SUCH_FIB;
+
   /*
    * enable/disable only on the 1<->0 transition
    */
   if (is_enable)
     {
       if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index])
-        return;
+          return (0);
+
+      fib_table_lock(lfib_index, FIB_PROTOCOL_MPLS,
+                     (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI));
 
-      lfib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS,
-						     MPLS_FIB_DEFAULT_TABLE_ID);
       vec_validate(mm->fib_index_by_sw_if_index, 0);
       mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index;
     }
@@ -61,15 +69,17 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm,
     {
       ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0);
       if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index])
-        return;
+          return (0);
 
       fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index],
-		       FIB_PROTOCOL_MPLS);
+		       FIB_PROTOCOL_MPLS,
+                       (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI));
     }
 
   vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled",
                                sw_if_index, !is_enable, 0, 0);
 
+  return (0);
 }
 
 static clib_error_t *
@@ -101,7 +111,7 @@ mpls_interface_enable_disable (vlib_main_t * vm,
       goto done;
     }
 
-  mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable);
+  mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable, 0);
 
  done:
   return error;
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
index 5021ac23..7bdfd8c7 100644
--- a/src/vnet/mpls/mpls.c
+++ b/src/vnet/mpls/mpls.c
@@ -536,6 +536,78 @@ VLIB_CLI_COMMAND (mpls_local_label_command, static) = {
   .short_help = "Create/Delete MPL local labels",
 };
 
+clib_error_t *
+vnet_mpls_table_cmd (vlib_main_t * vm,
+                     unformat_input_t * main_input,
+                     vlib_cli_command_t * cmdo)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  clib_error_t *error = NULL;
+  u32 table_id, is_add;
+
+  is_add = 1;
+  table_id = ~0;
+
+  /* Get a line of input. */
+  if (!unformat_user (main_input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%d", &table_id))
+	;
+      else if (unformat (line_input, "del"))
+	is_add = 0;
+      else if (unformat (line_input, "add"))
+	is_add = 1;
+      else
+	{
+	  error = unformat_parse_error (line_input);
+	  goto done;
+	}
+    }
+
+  if (~0 == table_id)
+    {
+      error = clib_error_return (0, "No table id");
+      goto done;
+    }
+  else if (0 == table_id)
+    {
+      error = clib_error_return (0, "Can't change the default table");
+      goto done;
+    }
+  else
+    {
+      if (is_add)
+        {
+          mpls_table_create (table_id, 0);
+        }
+      else
+        {
+          mpls_table_delete (table_id, 0);
+        }
+    }
+
+ done:
+  unformat_free (line_input);
+  return error;
+}
+
+/* *INDENT-ON* */
+/*?
+ * This command is used to add or delete MPLS Tables. All
+ * Tables must be explicitly added before that can be used,
+ * Including the default table.
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_table_command, static) = {
+  .path = "mpla table",
+  .short_help = "mpls table [add|del] <table-id>",
+  .function = vnet_mpls_table_cmd,
+  .is_mp_safe = 1,
+};
+
 int
 mpls_fib_reset_labels (u32 fib_id)
 {
@@ -546,12 +618,8 @@ mpls_fib_reset_labels (u32 fib_id)
 static clib_error_t *
 mpls_init (vlib_main_t * vm)
 {
-  mpls_main_t * mm = &mpls_main;
   clib_error_t * error;
 
-  mm->vlib_main = vm;
-  mm->vnet_main = vnet_get_main();
-
   if ((error = vlib_call_init_function (vm, ip_main_init)))
     return error;
 
diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h
index b0125e60..31cb1746 100644
--- a/src/vnet/mpls/mpls.h
+++ b/src/vnet/mpls/mpls.h
@@ -56,10 +56,6 @@ typedef struct {
 
   /* IP4 enabled count by software interface */
   u8 * mpls_enabled_by_sw_if_index;
-
-  /* convenience */
-  vlib_main_t * vlib_main;
-  vnet_main_t * vnet_main;
 } mpls_main_t;
 
 extern mpls_main_t mpls_main;
@@ -77,8 +73,6 @@ extern vlib_node_registration_t mpls_midchain_node;
 
 /* Parse mpls protocol as 0xXXXX or protocol name.
    In either host or network byte order. */
-unformat_function_t unformat_mpls_protocol_host_byte_order;
-unformat_function_t unformat_mpls_protocol_net_byte_order;
 unformat_function_t unformat_mpls_label_net_byte_order;
 unformat_function_t unformat_mpls_unicast_label;
 
@@ -86,9 +80,10 @@ unformat_function_t unformat_mpls_unicast_label;
 unformat_function_t unformat_mpls_header;
 unformat_function_t unformat_pg_mpls_header;
 
-void mpls_sw_interface_enable_disable (mpls_main_t * mm,
-				       u32 sw_if_index,
-				       u8 is_enable);
+int mpls_sw_interface_enable_disable (mpls_main_t * mm,
+                                      u32 sw_if_index,
+                                      u8 is_enable,
+                                      u8 is_api);
 
 u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
 
@@ -103,4 +98,7 @@ mpls_fib_index_cmp(void * a1, void * a2);
 int
 mpls_label_cmp(void * a1, void * a2);
 
+void mpls_table_create(uint32_t table_id, u8 is_api);
+void mpls_table_delete(uint32_t table_id, u8 is_api);
+
 #endif /* included_vnet_mpls_h */
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
index a44b1a25..38f5b014 100644
--- a/src/vnet/mpls/mpls_api.c
+++ b/src/vnet/mpls/mpls_api.c
@@ -58,6 +58,29 @@ _(MPLS_FIB_DUMP, mpls_fib_dump)
 extern void stats_dslock_with_hint (int hint, int tag);
 extern void stats_dsunlock (void);
 
+void
+mpls_table_delete (u32 table_id, u8 is_api)
+{
+  u32 fib_index;
+
+  /*
+   * The MPLS defult table must also be explicitly created via the API.
+   * So in contrast to IP, it gets no special treatment here.
+   *
+   * The API holds only one lock on the table.
+   * i.e. it can be added many times via the API but needs to be
+   * deleted only once.
+   */
+  fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id);
+
+  if (~0 != fib_index)
+    {
+      fib_table_unlock (fib_index,
+			FIB_PROTOCOL_MPLS,
+			(is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI));
+    }
+}
+
 void
 vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp)
 {
@@ -68,6 +91,13 @@ vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp)
   vnm = vnet_get_main ();
   vnm->api_errno = 0;
 
+  if (mp->mt_is_add)
+    mpls_table_create (ntohl (mp->mt_table_id), 1);
+  else
+    mpls_table_delete (ntohl (mp->mt_table_id), 1);
+
+  rv = (rv == 0) ? vnm->api_errno : rv;
+
   REPLY_MACRO (VL_API_MPLS_TABLE_ADD_DEL_REPLY);
 }
 
@@ -82,14 +112,7 @@ mpls_ip_bind_unbind_handler (vnet_main_t * vnm,
 
   if (~0 == mpls_fib_index)
     {
-      if (mp->mb_create_table_if_needed)
-	{
-	  mpls_fib_index =
-	    fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS,
-					       ntohl (mp->mb_mpls_table_id));
-	}
-      else
-	return VNET_API_ERROR_NO_SUCH_FIB;
+      return VNET_API_ERROR_NO_SUCH_FIB;
     }
 
   ip_fib_index = fib_table_find ((mp->mb_is_ip4 ?
@@ -170,7 +193,6 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
 			    mp->mr_next_hop_sw_if_index,
 			    pfx.fp_payload_proto,
 			    mp->mr_next_hop_table_id,
-			    mp->mr_create_table_if_needed,
 			    mp->mr_is_rpf_id,
 			    &fib_index, &next_hop_fib_index);
 
@@ -235,6 +257,32 @@ vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp)
   REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY);
 }
 
+void
+mpls_table_create (u32 table_id, u8 is_api)
+{
+  u32 fib_index;
+
+  /*
+   * The MPLS defult table must also be explicitly created via the API.
+   * So in contrast to IP, it gets no special treatment here.
+   */
+
+  /*
+   * The API holds only one lock on the table.
+   * i.e. it can be added many times via the API but needs to be
+   * deleted only once.
+   */
+  fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id);
+
+  if (~0 == fib_index)
+    {
+      fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS,
+					 table_id,
+					 (is_api ?
+					  FIB_SOURCE_API : FIB_SOURCE_CLI));
+    }
+}
+
 static void
 vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp)
 {
diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c
index f427bbf3..2f90993a 100755
--- a/src/vnet/srv6/sr_policy_rewrite.c
+++ b/src/vnet/srv6/sr_policy_rewrite.c
@@ -595,8 +595,10 @@ sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
   if (sm->fib_table_ip6 == (u32) ~ 0)
     {
       sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
+						     FIB_SOURCE_SR,
 						     "SRv6 steering of IP6 prefixes through BSIDs");
       sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
+						     FIB_SOURCE_SR,
 						     "SRv6 steering of IP4 prefixes through BSIDs");
     }
 
@@ -684,8 +686,8 @@ sr_policy_del (ip6_address_t * bsid, u32 index)
   /* If FIB empty unlock it */
   if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
     {
-      fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6);
-      fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6);
+      fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6, FIB_SOURCE_SR);
+      fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6, FIB_SOURCE_SR);
       sm->fib_table_ip6 = (u32) ~ 0;
       sm->fib_table_ip4 = (u32) ~ 0;
     }
diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c
index 57fe21f6..cf4e81ab 100755
--- a/src/vnet/srv6/sr_steering.c
+++ b/src/vnet/srv6/sr_steering.c
@@ -159,8 +159,10 @@ sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
 	  /* If no more SR policies or steering policies */
 	  if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
 	    {
-	      fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6);
-	      fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6);
+	      fib_table_unlock (sm->fib_table_ip6,
+				FIB_PROTOCOL_IP6, FIB_SOURCE_SR);
+	      fib_table_unlock (sm->fib_table_ip4,
+				FIB_PROTOCOL_IP6, FIB_SOURCE_SR);
 	      sm->fib_table_ip6 = (u32) ~ 0;
 	      sm->fib_table_ip4 = (u32) ~ 0;
 	    }
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
index f9c3129c..044ddb5b 100644
--- a/src/vpp/api/api.c
+++ b/src/vpp/api/api.c
@@ -699,8 +699,9 @@ static void
 
   VALIDATE_SW_IF_INDEX (mp);
 
-  mpls_sw_interface_enable_disable (&mpls_main,
-				    ntohl (mp->sw_if_index), mp->enable);
+  rv = mpls_sw_interface_enable_disable (&mpls_main,
+					 ntohl (mp->sw_if_index),
+					 mp->enable, 1);
 
   BAD_SW_IF_INDEX_LABEL;
   REPLY_MACRO (VL_API_SW_INTERFACE_SET_MPLS_ENABLE_REPLY);
diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c
index 1353fe28..be74b83a 100644
--- a/src/vpp/api/custom_dump.c
+++ b/src/vpp/api/custom_dump.c
@@ -560,9 +560,6 @@ static void *vl_api_ip_add_del_route_t_print
   if (mp->table_id != 0)
     s = format (s, "vrf %d ", ntohl (mp->table_id));
 
-  if (mp->create_vrf_if_needed)
-    s = format (s, "create-vrf ");
-
   if (mp->next_hop_weight != 1)
     s = format (s, "weight %d ", mp->next_hop_weight);
 
diff --git a/test/test_dhcp.py b/test/test_dhcp.py
index 6fc29182..fe97f6c9 100644
--- a/test/test_dhcp.py
+++ b/test/test_dhcp.py
@@ -6,7 +6,7 @@ import struct
 
 from framework import VppTestCase, VppTestRunner
 from vpp_neighbor import VppNeighbor
-from vpp_ip_route import find_route
+from vpp_ip_route import find_route, VppIpTable
 from util import mk_ll_addr
 
 from scapy.layers.l2 import Ether, getmacbyip, ARP
@@ -34,9 +34,19 @@ class TestDHCP(VppTestCase):
 
         # create 3 pg interfaces
         self.create_pg_interfaces(range(4))
+        self.tables = []
 
         # pg0 and 1 are IP configured in VRF 0 and 1.
         # pg2 and 3 are non IP-configured in VRF 0 and 1
+        table_id = 0
+        for table_id in range(1, 4):
+            tbl4 = VppIpTable(self, table_id)
+            tbl4.add_vpp_config()
+            self.tables.append(tbl4)
+            tbl6 = VppIpTable(self, table_id, is_ip6=1)
+            tbl6.add_vpp_config()
+            self.tables.append(tbl6)
+
         table_id = 0
         for i in self.pg_interfaces[:2]:
             i.admin_up()
@@ -56,11 +66,15 @@ class TestDHCP(VppTestCase):
             table_id += 1
 
     def tearDown(self):
-        super(TestDHCP, self).tearDown()
-        for i in self.pg_interfaces:
+        for i in self.pg_interfaces[:2]:
             i.unconfig_ip4()
             i.unconfig_ip6()
+
+        for i in self.pg_interfaces:
+            i.set_table_ip4(0)
+            i.set_table_ip6(0)
             i.admin_down()
+        super(TestDHCP, self).tearDown()
 
     def send_and_assert_no_replies(self, intf, pkts, remark):
         intf.add_stream(pkts)
@@ -667,6 +681,8 @@ class TestDHCP(VppTestCase):
                                         "DHCP cleanup VRF 0")
         self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1,
                                         "DHCP cleanup VRF 1")
+        self.pg2.unconfig_ip4()
+        self.pg3.unconfig_ip4()
 
     def test_dhcp6_proxy(self):
         """ DHCPv6 Proxy"""
@@ -1045,6 +1061,8 @@ class TestDHCP(VppTestCase):
                                     server_table_id=0,
                                     is_ipv6=1,
                                     is_add=0)
+        self.pg2.unconfig_ip6()
+        self.pg3.unconfig_ip6()
 
     def test_dhcp_client(self):
         """ DHCP Client"""
diff --git a/test/test_gre.py b/test/test_gre.py
index 1afc44fb..9046b05f 100644
--- a/test/test_gre.py
+++ b/test/test_gre.py
@@ -6,7 +6,7 @@ from logging import *
 from framework import VppTestCase, VppTestRunner
 from vpp_sub_interface import VppDot1QSubint
 from vpp_gre_interface import VppGreInterface, VppGre6Interface
-from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
+from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto, VppIpTable
 from vpp_papi_provider import L2_VTR_OP
 
 from scapy.packet import Raw
@@ -30,6 +30,9 @@ class TestGRE(VppTestCase):
 
         # create 3 pg interfaces - set one in a non-default table.
         self.create_pg_interfaces(range(3))
+
+        self.tbl = VppIpTable(self, 1)
+        self.tbl.add_vpp_config()
         self.pg1.set_table_ip4(1)
 
         for i in self.pg_interfaces:
@@ -43,11 +46,12 @@ class TestGRE(VppTestCase):
         self.pg2.resolve_ndp()
 
     def tearDown(self):
-        super(TestGRE, self).tearDown()
         for i in self.pg_interfaces:
             i.unconfig_ip4()
             i.unconfig_ip6()
             i.admin_down()
+        self.pg1.set_table_ip4(0)
+        super(TestGRE, self).tearDown()
 
     def create_stream_ip4(self, src_if, src_ip, dst_ip):
         pkts = []
diff --git a/test/test_ip4.py b/test/test_ip4.py
index 7a7098c3..55d16735 100644
--- a/test/test_ip4.py
+++ b/test/test_ip4.py
@@ -6,7 +6,8 @@ import unittest
 from framework import VppTestCase, VppTestRunner
 from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint
 from vpp_ip_route import VppIpRoute, VppRoutePath, VppIpMRoute, \
-    VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind
+    VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \
+    VppMplsTable
 
 from scapy.packet import Raw
 from scapy.layers.l2 import Ether, Dot1Q, ARP
@@ -774,6 +775,8 @@ class TestIPLoadBalance(VppTestCase):
         super(TestIPLoadBalance, self).setUp()
 
         self.create_pg_interfaces(range(5))
+        mpls_tbl = VppMplsTable(self, 0)
+        mpls_tbl.add_vpp_config()
 
         for i in self.pg_interfaces:
             i.admin_up()
@@ -782,11 +785,11 @@ class TestIPLoadBalance(VppTestCase):
             i.enable_mpls()
 
     def tearDown(self):
-        super(TestIPLoadBalance, self).tearDown()
         for i in self.pg_interfaces:
             i.disable_mpls()
             i.unconfig_ip4()
             i.admin_down()
+        super(TestIPLoadBalance, self).tearDown()
 
     def send_and_expect_load_balancing(self, input, pkts, outputs):
         input.add_stream(pkts)
@@ -966,6 +969,8 @@ class TestIPVlan0(VppTestCase):
         super(TestIPVlan0, self).setUp()
 
         self.create_pg_interfaces(range(2))
+        mpls_tbl = VppMplsTable(self, 0)
+        mpls_tbl.add_vpp_config()
 
         for i in self.pg_interfaces:
             i.admin_up()
@@ -974,11 +979,11 @@ class TestIPVlan0(VppTestCase):
             i.enable_mpls()
 
     def tearDown(self):
-        super(TestIPVlan0, self).tearDown()
         for i in self.pg_interfaces:
             i.disable_mpls()
             i.unconfig_ip4()
             i.admin_down()
+        super(TestIPVlan0, self).tearDown()
 
     def send_and_expect(self, input, pkts, output):
         input.add_stream(pkts)
diff --git a/test/test_ip4_vrf_multi_instance.py b/test/test_ip4_vrf_multi_instance.py
index b73ac948..5a8d6760 100644
--- a/test/test_ip4_vrf_multi_instance.py
+++ b/test/test_ip4_vrf_multi_instance.py
@@ -172,9 +172,10 @@ class TestIp4VrfMultiInst(VppTestCase):
             pg_if = self.pg_if_by_vrf_id[vrf_id][0]
             dest_addr = pg_if.remote_hosts[0].ip4n
             dest_addr_len = 24
+            self.vapi.ip_table_add_del(vrf_id, is_add=1)
             self.vapi.ip_add_del_route(
                 dest_addr, dest_addr_len, pg_if.local_ip4n,
-                table_id=vrf_id, create_vrf_if_needed=1, is_multipath=1)
+                table_id=vrf_id, is_multipath=1)
             self.logger.info("IPv4 VRF ID %d created" % vrf_id)
             if vrf_id not in self.vrf_list:
                 self.vrf_list.append(vrf_id)
@@ -216,6 +217,7 @@ class TestIp4VrfMultiInst(VppTestCase):
         self.logger.info("IPv4 VRF ID %d reset" % vrf_id)
         self.logger.debug(self.vapi.ppcli("show ip fib"))
         self.logger.debug(self.vapi.ppcli("show ip arp"))
+        self.vapi.ip_table_add_del(vrf_id, is_add=0)
 
     def create_stream(self, src_if, packet_sizes):
         """
diff --git a/test/test_ip6.py b/test/test_ip6.py
index 285ce181..aad3713c 100644
--- a/test/test_ip6.py
+++ b/test/test_ip6.py
@@ -8,7 +8,7 @@ from vpp_sub_interface import VppSubInterface, VppDot1QSubint
 from vpp_pg_interface import is_ipv6_misc
 from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, VppIpMRoute, \
     VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \
-    VppMplsRoute, DpoProto
+    VppMplsRoute, DpoProto, VppMplsTable
 from vpp_neighbor import find_nbr, VppNeighbor
 
 from scapy.packet import Raw
@@ -1260,6 +1260,9 @@ class TestIP6LoadBalance(VppTestCase):
 
         self.create_pg_interfaces(range(5))
 
+        mpls_tbl = VppMplsTable(self, 0)
+        mpls_tbl.add_vpp_config()
+
         for i in self.pg_interfaces:
             i.admin_up()
             i.config_ip6()
@@ -1267,11 +1270,11 @@ class TestIP6LoadBalance(VppTestCase):
             i.enable_mpls()
 
     def tearDown(self):
-        super(TestIP6LoadBalance, self).tearDown()
         for i in self.pg_interfaces:
             i.unconfig_ip6()
             i.admin_down()
             i.disable_mpls()
+        super(TestIP6LoadBalance, self).tearDown()
 
     def send_and_expect_load_balancing(self, input, pkts, outputs):
         input.add_stream(pkts)
diff --git a/test/test_ip6_vrf_multi_instance.py b/test/test_ip6_vrf_multi_instance.py
index af80b5ba..769cb2e5 100644
--- a/test/test_ip6_vrf_multi_instance.py
+++ b/test/test_ip6_vrf_multi_instance.py
@@ -187,9 +187,10 @@ class TestIP6VrfMultiInst(VppTestCase):
             pg_if = self.pg_if_by_vrf_id[vrf_id][0]
             dest_addr = pg_if.remote_hosts[0].ip6n
             dest_addr_len = 64
+            self.vapi.ip_table_add_del(vrf_id, is_add=1, is_ipv6=1)
             self.vapi.ip_add_del_route(
                 dest_addr, dest_addr_len, pg_if.local_ip6n, is_ipv6=1,
-                table_id=vrf_id, create_vrf_if_needed=1, is_multipath=1)
+                table_id=vrf_id, is_multipath=1)
             self.logger.info("IPv6 VRF ID %d created" % vrf_id)
             if vrf_id not in self.vrf_list:
                 self.vrf_list.append(vrf_id)
@@ -232,6 +233,7 @@ class TestIP6VrfMultiInst(VppTestCase):
         self.logger.info("IPv6 VRF ID %d reset" % vrf_id)
         self.logger.debug(self.vapi.ppcli("show ip6 fib"))
         self.logger.debug(self.vapi.ppcli("show ip6 neighbors"))
+        self.vapi.ip_table_add_del(vrf_id, is_add=0, is_ipv6=1)
 
     def create_stream(self, src_if, packet_sizes):
         """
diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py
index 276555d6..7cad683c 100644
--- a/test/test_ip_mcast.py
+++ b/test/test_ip_mcast.py
@@ -5,7 +5,7 @@ import unittest
 from framework import VppTestCase, VppTestRunner
 from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint
 from vpp_ip_route import VppIpMRoute, VppMRoutePath, VppMFibSignal, \
-    MRouteItfFlags, MRouteEntryFlags
+    MRouteItfFlags, MRouteEntryFlags, VppIpTable
 
 from scapy.packet import Raw
 from scapy.layers.l2 import Ether
@@ -44,16 +44,37 @@ class TestIPMcast(VppTestCase):
         super(TestIPMcast, self).setUp()
 
         # create 8 pg interfaces
-        self.create_pg_interfaces(range(8))
+        self.create_pg_interfaces(range(9))
 
         # setup interfaces
-        for i in self.pg_interfaces:
+        for i in self.pg_interfaces[:8]:
             i.admin_up()
             i.config_ip4()
             i.config_ip6()
             i.resolve_arp()
             i.resolve_ndp()
 
+        # one more in a vrf
+        tbl4 = VppIpTable(self, 10)
+        tbl4.add_vpp_config()
+        self.pg8.set_table_ip4(10)
+        self.pg8.config_ip4()
+
+        tbl6 = VppIpTable(self, 10, is_ip6=1)
+        tbl6.add_vpp_config()
+        self.pg8.set_table_ip6(10)
+        self.pg8.config_ip6()
+
+    def tearDown(self):
+        for i in self.pg_interfaces:
+            i.unconfig_ip4()
+            i.unconfig_ip6()
+            i.admin_down()
+
+        self.pg8.set_table_ip4(0)
+        self.pg8.set_table_ip6(0)
+        super(TestIPMcast, self).tearDown()
+
     def create_stream_ip4(self, src_if, src_ip, dst_ip, payload_size=0):
         pkts = []
         # default to small packet sizes
@@ -663,6 +684,77 @@ class TestIPMcast(VppTestCase):
         #
         route_232_1_1_1.remove_vpp_config()
 
+    def test_ip_mcast_vrf(self):
+        """ IP Multicast Replication in non-default table"""
+
+        #
+        # An (S,G).
+        # one accepting interface, pg0, 2 forwarding interfaces
+        #
+        route_1_1_1_1_232_1_1_1 = VppIpMRoute(
+            self,
+            "1.1.1.1",
+            "232.1.1.1", 64,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [VppMRoutePath(self.pg8.sw_if_index,
+                           MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             VppMRoutePath(self.pg1.sw_if_index,
+                           MRouteItfFlags.MFIB_ITF_FLAG_FORWARD),
+             VppMRoutePath(self.pg2.sw_if_index,
+                           MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)],
+            table_id=10)
+        route_1_1_1_1_232_1_1_1.add_vpp_config()
+
+        #
+        # a stream that matches the route for (1.1.1.1,232.1.1.1)
+        #  small packets
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip4(self.pg8, "1.1.1.1", "232.1.1.1")
+        self.pg8.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # We expect replications on Pg1 & 2
+        self.verify_capture_ip4(self.pg1, tx)
+        self.verify_capture_ip4(self.pg2, tx)
+
+    def test_ip6_mcast_vrf(self):
+        """ IPv6 Multicast Replication in non-default table"""
+
+        #
+        # An (S,G).
+        # one accepting interface, pg0, 2 forwarding interfaces
+        #
+        route_2001_ff01_1 = VppIpMRoute(
+            self,
+            "2001::1",
+            "ff01::1", 256,
+            MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE,
+            [VppMRoutePath(self.pg8.sw_if_index,
+                           MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT),
+             VppMRoutePath(self.pg1.sw_if_index,
+                           MRouteItfFlags.MFIB_ITF_FLAG_FORWARD),
+             VppMRoutePath(self.pg2.sw_if_index,
+                           MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)],
+            table_id=10,
+            is_ip6=1)
+        route_2001_ff01_1.add_vpp_config()
+
+        #
+        # a stream that matches the route for (2001::1, ff00::1)
+        #
+        self.vapi.cli("clear trace")
+        tx = self.create_stream_ip6(self.pg8, "2001::1", "ff01::1")
+        self.pg8.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # We expect replications on Pg1, 2,
+        self.verify_capture_ip6(self.pg1, tx)
+        self.verify_capture_ip6(self.pg2, tx)
 
 if __name__ == '__main__':
     unittest.main(testRunner=VppTestRunner)
diff --git a/test/test_mpls.py b/test/test_mpls.py
index b2226a74..460a32d1 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -6,7 +6,7 @@ import socket
 from framework import VppTestCase, VppTestRunner
 from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \
     VppMplsIpBind, VppIpMRoute, VppMRoutePath, \
-    MRouteItfFlags, MRouteEntryFlags, DpoProto
+    MRouteItfFlags, MRouteEntryFlags, DpoProto, VppIpTable, VppMplsTable
 from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface
 
 from scapy.packet import Raw
@@ -60,9 +60,23 @@ class TestMPLS(VppTestCase):
         # setup both interfaces
         # assign them different tables.
         table_id = 0
+        self.tables = []
+
+        tbl = VppMplsTable(self, 0)
+        tbl.add_vpp_config()
+        self.tables.append(tbl)
 
         for i in self.pg_interfaces:
             i.admin_up()
+
+            if table_id != 0:
+                tbl = VppIpTable(self, table_id)
+                tbl.add_vpp_config()
+                self.tables.append(tbl)
+                tbl = VppIpTable(self, table_id, is_ip6=1)
+                tbl.add_vpp_config()
+                self.tables.append(tbl)
+
             i.set_table_ip4(table_id)
             i.set_table_ip6(table_id)
             i.config_ip4()
@@ -73,12 +87,15 @@ class TestMPLS(VppTestCase):
             table_id += 1
 
     def tearDown(self):
-        super(TestMPLS, self).tearDown()
         for i in self.pg_interfaces:
             i.unconfig_ip4()
             i.unconfig_ip6()
             i.ip6_disable()
+            i.set_table_ip4(0)
+            i.set_table_ip6(0)
+            i.disable_mpls()
             i.admin_down()
+        super(TestMPLS, self).tearDown()
 
     # the default of 64 matches the IP packet TTL default
     def create_stream_labelled_ip4(
@@ -1092,6 +1109,9 @@ class TestMPLSDisabled(VppTestCase):
         # create 2 pg interfaces
         self.create_pg_interfaces(range(2))
 
+        self.tbl = VppMplsTable(self, 0)
+        self.tbl.add_vpp_config()
+
         # PG0 is MPLS enalbed
         self.pg0.admin_up()
         self.pg0.config_ip4()
@@ -1102,11 +1122,13 @@ class TestMPLSDisabled(VppTestCase):
         self.pg1.admin_up()
 
     def tearDown(self):
-        super(TestMPLSDisabled, self).tearDown()
         for i in self.pg_interfaces:
             i.unconfig_ip4()
             i.admin_down()
 
+        self.pg0.disable_mpls()
+        super(TestMPLSDisabled, self).tearDown()
+
     def send_and_assert_no_replies(self, intf, pkts, remark):
         intf.add_stream(pkts)
         self.pg_enable_capture(self.pg_interfaces)
@@ -1174,6 +1196,13 @@ class TestMPLSPIC(VppTestCase):
         # create 2 pg interfaces
         self.create_pg_interfaces(range(4))
 
+        mpls_tbl = VppMplsTable(self, 0)
+        mpls_tbl.add_vpp_config()
+        tbl4 = VppIpTable(self, 1)
+        tbl4.add_vpp_config()
+        tbl6 = VppIpTable(self, 1, is_ip6=1)
+        tbl6.add_vpp_config()
+
         # core links
         self.pg0.admin_up()
         self.pg0.config_ip4()
@@ -1201,14 +1230,15 @@ class TestMPLSPIC(VppTestCase):
         self.pg3.resolve_ndp()
 
     def tearDown(self):
-        super(TestMPLSPIC, self).tearDown()
         self.pg0.disable_mpls()
+        self.pg1.disable_mpls()
         for i in self.pg_interfaces:
             i.unconfig_ip4()
             i.unconfig_ip6()
             i.set_table_ip4(0)
             i.set_table_ip6(0)
             i.admin_down()
+        super(TestMPLSPIC, self).tearDown()
 
     def test_mpls_ibgp_pic(self):
         """ MPLS iBGP PIC edge convergence
@@ -1534,24 +1564,30 @@ class TestMPLSL2(VppTestCase):
         # create 2 pg interfaces
         self.create_pg_interfaces(range(2))
 
+        # create the default MPLS table
+        self.tables = []
+        tbl = VppMplsTable(self, 0)
+        tbl.add_vpp_config()
+        self.tables.append(tbl)
+
         # use pg0 as the core facing interface
         self.pg0.admin_up()
         self.pg0.config_ip4()
         self.pg0.resolve_arp()
         self.pg0.enable_mpls()
 
-        # use the other 2 for customer facg L2 links
+        # use the other 2 for customer facing L2 links
         for i in self.pg_interfaces[1:]:
             i.admin_up()
 
     def tearDown(self):
-        super(TestMPLSL2, self).tearDown()
         for i in self.pg_interfaces[1:]:
             i.admin_down()
 
         self.pg0.disable_mpls()
         self.pg0.unconfig_ip4()
         self.pg0.admin_down()
+        super(TestMPLSL2, self).tearDown()
 
     def verify_capture_tunneled_ethernet(self, capture, sent, mpls_labels,
                                          ttl=255, top=None):
diff --git a/test/test_nat.py b/test/test_nat.py
index 1f2d17ab..73e9e217 100644
--- a/test/test_nat.py
+++ b/test/test_nat.py
@@ -549,6 +549,8 @@ class TestNAT44(MethodHolder):
             cls.pg0.configure_ipv4_neighbors()
 
             cls.overlapping_interfaces = list(list(cls.pg_interfaces[4:7]))
+            cls.vapi.ip_table_add_del(10, is_add=1)
+            cls.vapi.ip_table_add_del(20, is_add=1)
 
             cls.pg4._local_ip4 = "172.16.255.1"
             cls.pg4._local_ip4n = socket.inet_pton(socket.AF_INET, i.local_ip4)
@@ -1797,6 +1799,8 @@ class TestNAT44(MethodHolder):
 
         self.pg0.unconfig_ip4()
         self.pg1.unconfig_ip4()
+        self.vapi.ip_table_add_del(vrf_id1, is_add=1)
+        self.vapi.ip_table_add_del(vrf_id2, is_add=1)
         self.pg0.set_table_ip4(vrf_id1)
         self.pg1.set_table_ip4(vrf_id2)
         self.pg0.config_ip4()
@@ -1825,6 +1829,13 @@ class TestNAT44(MethodHolder):
         capture = self.pg2.get_capture(len(pkts))
         self.verify_capture_out(capture, nat_ip2)
 
+        self.pg0.unconfig_ip4()
+        self.pg1.unconfig_ip4()
+        self.pg0.set_table_ip4(0)
+        self.pg1.set_table_ip4(0)
+        self.vapi.ip_table_add_del(vrf_id1, is_add=0)
+        self.vapi.ip_table_add_del(vrf_id2, is_add=0)
+
     def test_vrf_feature_independent(self):
         """ NAT44 tenant VRF independent address pool mode """
 
@@ -3042,6 +3053,8 @@ class TestNAT64(MethodHolder):
             cls.ip6_interfaces.append(cls.pg_interfaces[2])
             cls.ip4_interfaces = list(cls.pg_interfaces[1:2])
 
+            cls.vapi.ip_table_add_del(cls.vrf1_id, is_add=1, is_ipv6=1)
+
             cls.pg_interfaces[2].set_table_ip6(cls.vrf1_id)
 
             cls.pg0.generate_remote_hosts(2)
diff --git a/test/test_neighbor.py b/test/test_neighbor.py
index 1c7cc267..68dde2fb 100644
--- a/test/test_neighbor.py
+++ b/test/test_neighbor.py
@@ -5,7 +5,8 @@ from socket import AF_INET, AF_INET6, inet_pton
 
 from framework import VppTestCase, VppTestRunner
 from vpp_neighbor import VppNeighbor, find_nbr
-from vpp_ip_route import VppIpRoute, VppRoutePath, find_route
+from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, \
+    VppIpTable
 
 from scapy.packet import Raw
 from scapy.layers.l2 import Ether, ARP, Dot1Q
@@ -39,11 +40,13 @@ class ARPTestCase(VppTestCase):
         self.pg1.config_ip6()
 
         # pg3 in a different VRF
+        self.tbl = VppIpTable(self, 1)
+        self.tbl.add_vpp_config()
+
         self.pg3.set_table_ip4(1)
         self.pg3.config_ip4()
 
     def tearDown(self):
-        super(ARPTestCase, self).tearDown()
         self.pg0.unconfig_ip4()
         self.pg0.unconfig_ip6()
 
@@ -51,10 +54,13 @@ class ARPTestCase(VppTestCase):
         self.pg1.unconfig_ip6()
 
         self.pg3.unconfig_ip4()
+        self.pg3.set_table_ip4(0)
 
         for i in self.pg_interfaces:
             i.admin_down()
 
+        super(ARPTestCase, self).tearDown()
+
     def verify_arp_req(self, rx, smac, sip, dip):
         ether = rx[Ether]
         self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff")
@@ -1080,6 +1086,62 @@ class ARPTestCase(VppTestCase):
                        self.pg0.remote_ip4,
                        self.pg1.remote_hosts[1].ip4)
 
+    def test_arp_static(self):
+        """ ARP Static"""
+        self.pg2.generate_remote_hosts(3)
+
+        #
+        # Add a static ARP entry
+        #
+        static_arp = VppNeighbor(self,
+                                 self.pg2.sw_if_index,
+                                 self.pg2.remote_hosts[1].mac,
+                                 self.pg2.remote_hosts[1].ip4,
+                                 is_static=1)
+        static_arp.add_vpp_config()
+
+        #
+        # Add the connected prefix to the interface
+        #
+        self.pg2.config_ip4()
+
+        #
+        # We should now find the adj-fib
+        #
+        self.assertTrue(find_nbr(self,
+                                 self.pg2.sw_if_index,
+                                 self.pg2.remote_hosts[1].ip4,
+                                 is_static=1))
+        self.assertTrue(find_route(self,
+                                   self.pg2.remote_hosts[1].ip4,
+                                   32))
+
+        #
+        # remove the connected
+        #
+        self.pg2.unconfig_ip4()
+
+        #
+        # put the interface into table 1
+        #
+        self.pg2.set_table_ip4(1)
+
+        #
+        # configure the same connected and expect to find the
+        # adj fib in the new table
+        #
+        self.pg2.config_ip4()
+        self.assertTrue(find_route(self,
+                                   self.pg2.remote_hosts[1].ip4,
+                                   32,
+                                   table_id=1))
+
+        #
+        # clean-up
+        #
+        self.pg2.unconfig_ip4()
+        self.pg2.set_table_ip4(0)
+
 
 if __name__ == '__main__':
     unittest.main(testRunner=VppTestRunner)
diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py
index 2c489e3c..b7993793 100644
--- a/test/vpp_ip_route.py
+++ b/test/vpp_ip_route.py
@@ -54,6 +54,46 @@ def find_route(test, ip_addr, len, table_id=0, inet=AF_INET):
     return False
 
 
+class VppIpTable(VppObject):
+
+    def __init__(self,
+                 test,
+                 table_id,
+                 is_ip6=0):
+        self._test = test
+        self.table_id = table_id
+        self.is_ip6 = is_ip6
+
+    def add_vpp_config(self):
+        self._test.vapi.ip_table_add_del(
+            self.table_id,
+            is_ipv6=self.is_ip6,
+            is_add=1)
+        self._test.registry.register(self, self._test.logger)
+
+    def remove_vpp_config(self):
+        self._test.vapi.ip_table_add_del(
+            self.table_id,
+            is_ipv6=self.is_ip6,
+            is_add=0)
+
+    def query_vpp_config(self):
+        # find the default route
+        return find_route(self._test,
+                          "::" if self.is_ip6 else "0.0.0.0",
+                          0,
+                          self.table_id,
+                          inet=AF_INET6 if self.is_ip6 == 1 else AF_INET)
+
+    def __str__(self):
+        return self.object_id()
+
+    def object_id(self):
+        return ("table-%s-%d" %
+                ("v6" if self.is_ip6 == 1 else "v4",
+                 self.table_id))
+
+
 class VppRoutePath(object):
 
     def __init__(
@@ -391,6 +431,39 @@ class VppMplsIpBind(VppObject):
                    self.dest_addr_len))
 
 
+class VppMplsTable(VppObject):
+
+    def __init__(self,
+                 test,
+                 table_id):
+        self._test = test
+        self.table_id = table_id
+
+    def add_vpp_config(self):
+        self._test.vapi.mpls_table_add_del(
+            self.table_id,
+            is_add=1)
+        self._test.registry.register(self, self._test.logger)
+
+    def remove_vpp_config(self):
+        self._test.vapi.mpls_table_add_del(
+            self.table_id,
+            is_add=0)
+
+    def query_vpp_config(self):
+        # find the default route
+        dump = self._test.vapi.mpls_fib_dump()
+        if len(dump):
+            return True
+        return False
+
+    def __str__(self):
+        return self.object_id()
+
+    def object_id(self):
+        return ("table-mpls-%d" % (self.table_id))
+
+
 class VppMplsRoute(VppObject):
     """
     MPLS Route/LSP
diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py
index b70da026..519aff80 100644
--- a/test/vpp_papi_provider.py
+++ b/test/vpp_papi_provider.py
@@ -652,6 +652,24 @@ class VppPapiProvider(object):
         return self.api(self.papi.delete_loopback,
                         {'sw_if_index': sw_if_index, })
 
+    def ip_table_add_del(self,
+                         table_id,
+                         is_add=1,
+                         is_ipv6=0):
+        """
+
+        :param table_id
+        :param is_add:  (Default value = 1)
+        :param is_ipv6:  (Default value = 0)
+
+        """
+
+        return self.api(
+            self.papi.ip_table_add_del,
+            {'table_id': table_id,
+             'is_add': is_add,
+             'is_ipv6': is_ipv6})
+
     def ip_add_del_route(
             self,
             dst_address,
@@ -664,7 +682,6 @@ class VppPapiProvider(object):
             next_hop_n_out_labels=0,
             next_hop_out_label_stack=[],
             next_hop_via_label=MPLS_LABEL_INVALID,
-            create_vrf_if_needed=0,
             is_resolve_host=0,
             is_resolve_attached=0,
             classify_table_index=0xFFFFFFFF,
@@ -687,7 +704,6 @@ class VppPapiProvider(object):
         :param vrf_id:  (Default value = 0)
         :param lookup_in_vrf:  (Default value = 0)
         :param classify_table_index:  (Default value = 0xFFFFFFFF)
-        :param create_vrf_if_needed:  (Default value = 0)
         :param is_add:  (Default value = 1)
         :param is_drop:  (Default value = 0)
         :param is_ipv6:  (Default value = 0)
@@ -707,7 +723,6 @@ class VppPapiProvider(object):
              'table_id': table_id,
              'classify_table_index': classify_table_index,
              'next_hop_table_id': next_hop_table_id,
-             'create_vrf_if_needed': create_vrf_if_needed,
              'is_add': is_add,
              'is_drop': is_drop,
              'is_unreach': is_unreach,
@@ -912,6 +927,22 @@ class VppPapiProvider(object):
     def mpls_fib_dump(self):
         return self.api(self.papi.mpls_fib_dump, {})
 
+    def mpls_table_add_del(
+            self,
+            table_id,
+            is_add=1):
+        """
+
+        :param table_id
+        :param is_add:  (Default value = 1)
+
+        """
+
+        return self.api(
+            self.papi.mpls_table_add_del,
+            {'mt_table_id': table_id,
+             'mt_is_add': is_add})
+
     def mpls_route_add_del(
             self,
             label,
@@ -925,7 +956,6 @@ class VppPapiProvider(object):
             next_hop_n_out_labels=0,
             next_hop_out_label_stack=[],
             next_hop_via_label=MPLS_LABEL_INVALID,
-            create_vrf_if_needed=0,
             is_resolve_host=0,
             is_resolve_attached=0,
             is_interface_rx=0,
@@ -947,7 +977,6 @@ class VppPapiProvider(object):
         :param vrf_id:  (Default value = 0)
         :param lookup_in_vrf:  (Default value = 0)
         :param classify_table_index:  (Default value = 0xFFFFFFFF)
-        :param create_vrf_if_needed:  (Default value = 0)
         :param is_add:  (Default value = 1)
         :param is_drop:  (Default value = 0)
         :param is_ipv6:  (Default value = 0)
@@ -968,7 +997,6 @@ class VppPapiProvider(object):
              'mr_eos': eos,
              'mr_table_id': table_id,
              'mr_classify_table_index': classify_table_index,
-             'mr_create_table_if_needed': create_vrf_if_needed,
              'mr_is_add': is_add,
              'mr_is_classify': is_classify,
              'mr_is_multipath': is_multipath,
@@ -994,7 +1022,6 @@ class VppPapiProvider(object):
             table_id=0,
             ip_table_id=0,
             is_ip4=1,
-            create_vrf_if_needed=0,
             is_bind=1):
         """
         """
@@ -1003,7 +1030,6 @@ class VppPapiProvider(object):
             {'mb_mpls_table_id': table_id,
              'mb_label': label,
              'mb_ip_table_id': ip_table_id,
-             'mb_create_table_if_needed': create_vrf_if_needed,
              'mb_is_bind': is_bind,
              'mb_is_ip4': is_ip4,
              'mb_address_length': dst_address_length,
@@ -1020,7 +1046,6 @@ class VppPapiProvider(object):
             next_hop_n_out_labels=0,
             next_hop_out_label_stack=[],
             next_hop_via_label=MPLS_LABEL_INVALID,
-            create_vrf_if_needed=0,
             is_add=1,
             l2_only=0,
             is_multicast=0):
@@ -1034,7 +1059,6 @@ class VppPapiProvider(object):
         :param vrf_id:  (Default value = 0)
         :param lookup_in_vrf:  (Default value = 0)
         :param classify_table_index:  (Default value = 0xFFFFFFFF)
-        :param create_vrf_if_needed:  (Default value = 0)
         :param is_add:  (Default value = 1)
         :param is_drop:  (Default value = 0)
         :param is_ipv6:  (Default value = 0)
@@ -1844,7 +1868,6 @@ class VppPapiProvider(object):
                           i_flags,
                           rpf_id=0,
                           table_id=0,
-                          create_vrf_if_needed=0,
                           is_add=1,
                           is_ipv6=0,
                           is_local=0):
@@ -1857,7 +1880,6 @@ class VppPapiProvider(object):
              'itf_flags': i_flags,
              'table_id': table_id,
              'rpf_id': rpf_id,
-             'create_vrf_if_needed': create_vrf_if_needed,
              'is_add': is_add,
              'is_ipv6': is_ipv6,
              'is_local': is_local,
-- 
cgit 1.2.3-korg