From 6f6311560380d0e992f710558e213df1b098ef94 Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Tue, 3 Oct 2017 08:20:21 -0700
Subject: Distributed Virtual Router Support

A distributed virtual router works by attmpeting to switch a packet, but on failing to find a local consumer (i.e. the packet is destined to a locally attached host) then the packet is sent unmodified 'upstream' to where the rest of the 'distributed' router is present. When L3 switching a packet this means the L2 header must not be modifed. This patch adds a 'l2-bridge' object to the L3 FIB which re-injects packets from the L3 path back into the L2 path - use with extreme caution.

Change-Id: I069724eb45956647d7980cbe40a80a788ee6ee82
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/vnet.am                  |   3 +-
 src/vnet/dpo/dpo.c           |   2 +
 src/vnet/dpo/dpo.h           |   4 +-
 src/vnet/dpo/l2_bridge_dpo.c | 375 +++++++++++++++++++++++++++++++++++++++++++
 src/vnet/dpo/l2_bridge_dpo.h |  56 +++++++
 src/vnet/fib/fib_api.h       |   1 +
 src/vnet/fib/fib_path.c      |  64 +++++---
 src/vnet/fib/fib_table.c     |   3 +-
 src/vnet/fib/fib_test.c      | 130 ++++++---------
 src/vnet/fib/fib_test.h      |   2 +
 src/vnet/ip/ip.api           |   1 +
 src/vnet/ip/ip_api.c         |   5 +
 src/vnet/mpls/mpls_api.c     |   8 +-
 13 files changed, 545 insertions(+), 109 deletions(-)
 create mode 100644 src/vnet/dpo/l2_bridge_dpo.c
 create mode 100644 src/vnet/dpo/l2_bridge_dpo.h

(limited to 'src')

diff --git a/src/vnet.am b/src/vnet.am
index 055d3a7f906..aa3ada1dada 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -1015,7 +1015,8 @@ libvnet_la_SOURCES +=				\
   vnet/dpo/interface_rx_dpo.c  			\
   vnet/dpo/interface_tx_dpo.c  			\
   vnet/dpo/mpls_disposition.c   		\
-  vnet/dpo/mpls_label_dpo.c
+  vnet/dpo/mpls_label_dpo.c			\
+  vnet/dpo/l2_bridge_dpo.c
 
 nobase_include_HEADERS +=			\
   vnet/dpo/load_balance.h			\
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index bd18b66bfd5..e94f347466e 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -40,6 +40,7 @@
 #include <vnet/dpo/interface_rx_dpo.h>
 #include <vnet/dpo/interface_tx_dpo.h>
 #include <vnet/dpo/mpls_disposition.h>
+#include <vnet/dpo/l2_bridge_dpo.h>
 
 /**
  * Array of char* names for the DPO types and protos
@@ -523,6 +524,7 @@ dpo_module_init (vlib_main_t * vm)
     interface_rx_dpo_module_init();
     interface_tx_dpo_module_init();
     mpls_disp_dpo_module_init();
+    l2_bridge_dpo_module_init();
 
     return (NULL);
 }
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
index 33562968272..d1309c19031 100644
--- a/src/vnet/dpo/dpo.h
+++ b/src/vnet/dpo/dpo.h
@@ -114,6 +114,7 @@ typedef enum dpo_type_t_ {
     DPO_MFIB_ENTRY,
     DPO_INTERFACE_RX,
     DPO_INTERFACE_TX,
+    DPO_L2_BRIDGE,
     DPO_LAST,
 } __attribute__((packed)) dpo_type_t;
 
@@ -140,7 +141,8 @@ typedef enum dpo_type_t_ {
     [DPO_MPLS_DISPOSITION] = "dpo-mpls-diposition", \
     [DPO_MFIB_ENTRY] = "dpo-mfib_entry", \
     [DPO_INTERFACE_RX] = "dpo-interface-rx",	\
-    [DPO_INTERFACE_TX] = "dpo-interface-tx"	\
+    [DPO_INTERFACE_TX] = "dpo-interface-tx",	\
+    [DPO_L2_BRIDGE] = "dpo-l2-bridge"	\
 }
 
 /**
diff --git a/src/vnet/dpo/l2_bridge_dpo.c b/src/vnet/dpo/l2_bridge_dpo.c
new file mode 100644
index 00000000000..169478148c3
--- /dev/null
+++ b/src/vnet/dpo/l2_bridge_dpo.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dpo/l2_bridge_dpo.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/ethernet/ethernet.h>
+
+/*
+ * The 'DB' of L2 bridge DPOs.
+ * There is only one per-interface, so this is a per-interface vector
+ */
+static index_t *l2_bridge_dpo_db;
+
+static l2_bridge_dpo_t *
+l2_bridge_dpo_alloc (void)
+{
+    l2_bridge_dpo_t *l2b;
+
+    pool_get(l2_bridge_dpo_pool, l2b);
+
+    return (l2b);
+}
+
+static inline l2_bridge_dpo_t *
+l2_bridge_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+    ASSERT(DPO_L2_BRIDGE == dpo->dpoi_type);
+
+    return (l2_bridge_dpo_get(dpo->dpoi_index));
+}
+
+static inline index_t
+l2_bridge_dpo_get_index (l2_bridge_dpo_t *l2b)
+{
+    return (l2b - l2_bridge_dpo_pool);
+}
+
+static void
+l2_bridge_dpo_lock (dpo_id_t *dpo)
+{
+    l2_bridge_dpo_t *l2b;
+
+    l2b = l2_bridge_dpo_get_from_dpo(dpo);
+    l2b->l2b_locks++;
+}
+
+static void
+l2_bridge_dpo_unlock (dpo_id_t *dpo)
+{
+    l2_bridge_dpo_t *l2b;
+
+    l2b = l2_bridge_dpo_get_from_dpo(dpo);
+    l2b->l2b_locks--;
+
+    if (0 == l2b->l2b_locks)
+    {
+        l2_bridge_dpo_db[l2b->l2b_sw_if_index] = INDEX_INVALID;
+        pool_put(l2_bridge_dpo_pool, l2b);
+    }
+}
+
+/*
+ * l2_bridge_dpo_add_or_lock
+ *
+ * Add/create and lock a new or lock an existing for the L2 Bridge
+ * on the interface given
+ */
+void
+l2_bridge_dpo_add_or_lock (u32 sw_if_index,
+                           dpo_id_t *dpo)
+{
+    l2_bridge_dpo_t *l2b;
+
+    vec_validate_init_empty(l2_bridge_dpo_db,
+                            sw_if_index,
+                            INDEX_INVALID);
+
+    if (INDEX_INVALID == l2_bridge_dpo_db[sw_if_index])
+    {
+        l2b = l2_bridge_dpo_alloc();
+
+        l2b->l2b_sw_if_index = sw_if_index;
+
+        l2_bridge_dpo_db[sw_if_index] =
+            l2_bridge_dpo_get_index(l2b);
+    }
+    else
+    {
+        l2b = l2_bridge_dpo_get(l2_bridge_dpo_db[sw_if_index]);
+    }
+
+    dpo_set(dpo, DPO_L2_BRIDGE, DPO_PROTO_ETHERNET, l2_bridge_dpo_get_index(l2b));
+}
+
+
+static clib_error_t *
+l2_bridge_dpo_interface_state_change (vnet_main_t * vnm,
+                                      u32 sw_if_index,
+                                      u32 flags)
+{
+    /*
+     */
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(
+    l2_bridge_dpo_interface_state_change);
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+l2_bridge_dpo_hw_interface_state_change (vnet_main_t * vnm,
+                                         u32 hw_if_index,
+                                         u32 flags)
+{
+    return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+    l2_bridge_dpo_hw_interface_state_change);
+
+static clib_error_t *
+l2_bridge_dpo_interface_delete (vnet_main_t * vnm,
+                                u32 sw_if_index,
+                                u32 is_add)
+{
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(
+    l2_bridge_dpo_interface_delete);
+
+u8*
+format_l2_bridge_dpo (u8* s, va_list *ap)
+{
+    index_t index = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    vnet_main_t * vnm = vnet_get_main();
+    l2_bridge_dpo_t *l2b = l2_bridge_dpo_get(index);
+
+    return (format(s, "l2-bridge-%U-dpo",
+                   format_vnet_sw_interface_name,
+                   vnm,
+                   vnet_get_sw_interface(vnm, l2b->l2b_sw_if_index)));
+}
+
+static void
+l2_bridge_dpo_mem_show (void)
+{
+    fib_show_memory_usage("L2-bridge",
+                          pool_elts(l2_bridge_dpo_pool),
+                          pool_len(l2_bridge_dpo_pool),
+                          sizeof(l2_bridge_dpo_t));
+}
+
+
+const static dpo_vft_t l2_bridge_dpo_vft = {
+    .dv_lock = l2_bridge_dpo_lock,
+    .dv_unlock = l2_bridge_dpo_unlock,
+    .dv_format = format_l2_bridge_dpo,
+    .dv_mem_show = l2_bridge_dpo_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a glean
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a glean is the
+ * parent object in the DPO-graph.
+ */
+const static char* const l2_bridge_dpo_l2_nodes[] =
+{
+    "l2-bridge-dpo",
+    NULL,
+};
+
+const static char* const * const l2_bridge_dpo_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_ETHERNET]  = l2_bridge_dpo_l2_nodes,
+};
+
+void
+l2_bridge_dpo_module_init (void)
+{
+    dpo_register(DPO_L2_BRIDGE,
+                 &l2_bridge_dpo_vft,
+                 l2_bridge_dpo_nodes);
+}
+
+/**
+ * @brief Interface DPO trace data
+ */
+typedef struct l2_bridge_dpo_trace_t_
+{
+    u32 sw_if_index;
+} l2_bridge_dpo_trace_t;
+
+typedef enum l2_bridge_dpo_next_t_
+{
+    L2_BRIDGE_DPO_DROP = 0,
+    L2_BRIDGE_DPO_OUTPUT = 1,
+} l2_bridge_dpo_next_t;
+
+always_inline uword
+l2_bridge_dpo_inline (vlib_main_t * vm,
+                      vlib_node_runtime_t * node,
+                      vlib_frame_t * from_frame)
+{
+    u32 n_left_from, next_index, * from, * to_next;
+
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+        while (n_left_from >= 4 && n_left_to_next > 2)
+        {
+            const l2_bridge_dpo_t *l2b0, *l2b1;
+            u32 bi0, l2bi0, bi1, l2bi1;
+            vlib_buffer_t *b0, *b1;
+            u8 len0, len1;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            bi1 = from[1];
+            to_next[1] = bi1;
+            from += 2;
+            to_next += 2;
+            n_left_from -= 2;
+            n_left_to_next -= 2;
+
+            b0 = vlib_get_buffer (vm, bi0);
+            b1 = vlib_get_buffer (vm, bi1);
+
+            l2bi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            l2bi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+            l2b0 = l2_bridge_dpo_get(l2bi0);
+            l2b1 = l2_bridge_dpo_get(l2bi1);
+
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = l2b0->l2b_sw_if_index;
+            vnet_buffer(b1)->sw_if_index[VLIB_TX] = l2b1->l2b_sw_if_index;
+
+            len0 = ((u8*)vlib_buffer_get_current(b0) -
+                    (u8*)ethernet_buffer_get_header(b0));
+            len1 = ((u8*)vlib_buffer_get_current(b1) -
+                    (u8*)ethernet_buffer_get_header(b1));
+            vnet_buffer(b0)->l2.l2_len = len0;
+            vnet_buffer(b1)->l2.l2_len = len1;
+
+            vlib_buffer_advance(b0, -len0);
+            vlib_buffer_advance(b1, -len1);
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                l2_bridge_dpo_trace_t *tr0;
+
+                tr0 = vlib_add_trace (vm, node, b0, sizeof (*tr0));
+                tr0->sw_if_index = l2b0->l2b_sw_if_index;
+            }
+            if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                l2_bridge_dpo_trace_t *tr1;
+
+                tr1 = vlib_add_trace (vm, node, b1, sizeof (*tr1));
+                tr1->sw_if_index = l2b1->l2b_sw_if_index;
+            }
+
+            vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0, bi1,
+                                            L2_BRIDGE_DPO_OUTPUT,
+                                            L2_BRIDGE_DPO_OUTPUT);
+        }
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            const l2_bridge_dpo_t * l2b0;
+            vlib_buffer_t * b0;
+            u32 bi0, l2bi0;
+            u8 len0;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            from += 1;
+            to_next += 1;
+            n_left_from -= 1;
+            n_left_to_next -= 1;
+
+            b0 = vlib_get_buffer (vm, bi0);
+
+            l2bi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            l2b0 = l2_bridge_dpo_get(l2bi0);
+
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = l2b0->l2b_sw_if_index;
+
+            /*
+             * take that, and rewind it back...
+             */
+            len0 = ((u8*)vlib_buffer_get_current(b0) -
+                    (u8*)ethernet_buffer_get_header(b0));
+            vnet_buffer(b0)->l2.l2_len = len0;
+            vlib_buffer_advance(b0, -len0);
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                l2_bridge_dpo_trace_t *tr;
+
+                tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+                tr->sw_if_index = l2b0->l2b_sw_if_index;
+            }
+
+            vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0,
+                                            L2_BRIDGE_DPO_OUTPUT);
+        }
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+static u8 *
+format_l2_bridge_dpo_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    l2_bridge_dpo_trace_t * t = va_arg (*args, l2_bridge_dpo_trace_t *);
+    uword indent = format_get_indent (s);
+    s = format (s, "%U sw_if_index:%d",
+                format_white_space, indent,
+                t->sw_if_index);
+    return s;
+}
+
+static uword
+l2_bridge_dpo_l2 (vlib_main_t * vm,
+                  vlib_node_runtime_t * node,
+                  vlib_frame_t * from_frame)
+{
+    return (l2_bridge_dpo_inline(vm, node, from_frame));
+}
+
+
+VLIB_REGISTER_NODE (l2_bridge_dpo_l2_node) = {
+    .function = l2_bridge_dpo_l2,
+    .name = "l2-bridge-dpo",
+    .vector_size = sizeof (u32),
+    .format_trace = format_l2_bridge_dpo_trace,
+
+    .n_next_nodes = 2,
+    .next_nodes = {
+        [L2_BRIDGE_DPO_DROP] = "error-drop",
+        [L2_BRIDGE_DPO_OUTPUT] = "l2-output",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_bridge_dpo_l2_node,
+                              l2_bridge_dpo_l2)
diff --git a/src/vnet/dpo/l2_bridge_dpo.h b/src/vnet/dpo/l2_bridge_dpo.h
new file mode 100644
index 00000000000..0a20dd79335
--- /dev/null
+++ b/src/vnet/dpo/l2_bridge_dpo.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __L2_BRIDGE_DPO_H__
+#define __L2_BRIDGE_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief
+ * The data-path object representing an L2 bridge.
+ * If a packet encounters an object of this type in the L3 data-path, it
+ * is injected back into the L2 bridge.
+ */
+typedef struct l2_bridge_dpo_t_
+{
+    /**
+     * The Software interface index that the packets will output on
+     */
+    u32 l2b_sw_if_index;
+
+    /**
+     * number of locks.
+     */
+    u16 l2b_locks;
+} l2_bridge_dpo_t;
+
+extern void l2_bridge_dpo_add_or_lock (u32 sw_if_index,
+                                       dpo_id_t *dpo);
+
+extern void l2_bridge_dpo_module_init(void);
+
+/**
+ * @brief pool of all interface DPOs
+ */
+l2_bridge_dpo_t *l2_bridge_dpo_pool;
+
+static inline l2_bridge_dpo_t *
+l2_bridge_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(l2_bridge_dpo_pool, index));
+}
+
+#endif
diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h
index f5a107ca352..c369e8f8588 100644
--- a/src/vnet/fib/fib_api.h
+++ b/src/vnet/fib/fib_api.h
@@ -40,6 +40,7 @@ add_del_route_t_handler (u8 is_multipath,
 			 u8 is_resolve_attached,
 			 u8 is_interface_rx,
                          u8 is_rpf_id,
+                         u8 is_l2_bridged,
 			 u32 fib_index,
 			 const fib_prefix_t * prefix,
 			 dpo_proto_t next_hop_proto,
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index f126333425a..889d17def9c 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -23,6 +23,7 @@
 #include <vnet/dpo/lookup_dpo.h>
 #include <vnet/dpo/interface_rx_dpo.h>
 #include <vnet/dpo/mpls_disposition.h>
+#include <vnet/dpo/l2_bridge_dpo.h>
 
 #include <vnet/adj/adj.h>
 #include <vnet/adj/adj_mcast.h>
@@ -771,11 +772,18 @@ fib_path_unresolve (fib_path_t *path)
 	}
 	break;
     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
-    case FIB_PATH_TYPE_ATTACHED:
 	adj_child_remove(path->fp_dpo.dpoi_index,
 			 path->fp_sibling);
         adj_unlock(path->fp_dpo.dpoi_index);
         break;
+    case FIB_PATH_TYPE_ATTACHED:
+        if (DPO_PROTO_ETHERNET != path->fp_nh_proto)
+        {
+            adj_child_remove(path->fp_dpo.dpoi_index,
+                             path->fp_sibling);
+            adj_unlock(path->fp_dpo.dpoi_index);
+        }
+        break;
     case FIB_PATH_TYPE_EXCLUSIVE:
 	dpo_reset(&path->exclusive.fp_ex_dpo);
         break;
@@ -1594,28 +1602,35 @@ fib_path_resolve (fib_node_index_t path_index)
 	fib_path_attached_next_hop_set(path);
 	break;
     case FIB_PATH_TYPE_ATTACHED:
-	/*
-	 * path->attached.fp_interface
-	 */
-	if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
-					   path->attached.fp_interface))
-	{
-	    path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
-	}
-        dpo_set(&path->fp_dpo,
-                DPO_ADJACENCY,
-                path->fp_nh_proto,
-                fib_path_attached_get_adj(path,
-                                          dpo_proto_to_link(path->fp_nh_proto)));
-
-	/*
-	 * become a child of the adjacency so we receive updates
-	 * when the interface state changes
-	 */
-	path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
-					 FIB_NODE_TYPE_PATH,
-					 fib_path_get_index(path));
+        if (DPO_PROTO_ETHERNET == path->fp_nh_proto)
+        {
+            l2_bridge_dpo_add_or_lock(path->attached.fp_interface,
+                                      &path->fp_dpo);
+        }
+        else
+        {
+            /*
+             * path->attached.fp_interface
+             */
+            if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
+                                               path->attached.fp_interface))
+            {
+                path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+            }
+            dpo_set(&path->fp_dpo,
+                    DPO_ADJACENCY,
+                    path->fp_nh_proto,
+                    fib_path_attached_get_adj(path,
+                                              dpo_proto_to_link(path->fp_nh_proto)));
 
+            /*
+             * become a child of the adjacency so we receive updates
+             * when the interface state changes
+             */
+            path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
+                                             FIB_NODE_TYPE_PATH,
+                                             fib_path_get_index(path));
+        }
 	break;
     case FIB_PATH_TYPE_RECURSIVE:
     {
@@ -1996,6 +2011,11 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
 	    dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
 	    break;
         case FIB_PATH_TYPE_ATTACHED:
+            if (DPO_PROTO_ETHERNET == path->fp_nh_proto)
+            {
+                dpo_copy(dpo, &path->fp_dpo);
+                break;
+            }
 	    switch (fct)
 	    {
 	    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 6daa61c2612..d5625d83673 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -481,7 +481,8 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix,
     }
     if (fib_prefix_is_host(prefix) &&
 	ip46_address_is_zero(&path->frp_addr) &&
-	path->frp_sw_if_index != ~0)
+	path->frp_sw_if_index != ~0 &&
+        path->frp_proto != DPO_PROTO_ETHERNET)
     {
 	path->frp_addr = prefix->fp_addr;
         path->frp_flags |= FIB_ROUTE_PATH_ATTACHED;
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index 64d9047163b..03c9ee75f48 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -27,9 +27,11 @@
 #include <vnet/bfd/bfd_main.h>
 #include <vnet/dpo/interface_rx_dpo.h>
 #include <vnet/dpo/replicate_dpo.h>
+#include <vnet/dpo/l2_bridge_dpo.h>
 
 #include <vnet/mpls/mpls.h>
 
+#include <vnet/fib/fib_test.h>
 #include <vnet/fib/fib_path_list.h>
 #include <vnet/fib/fib_entry_src.h>
 #include <vnet/fib/fib_walk.h>
@@ -266,83 +268,6 @@ fib_test_build_rewrite (u8 *eth_addr)
     return (rewrite);
 }
 
-typedef enum fib_test_lb_bucket_type_t_ {
-    FT_LB_LABEL_O_ADJ,
-    FT_LB_LABEL_STACK_O_ADJ,
-    FT_LB_LABEL_O_LB,
-    FT_LB_O_LB,
-    FT_LB_SPECIAL,
-    FT_LB_ADJ,
-    FT_LB_INTF,
-} fib_test_lb_bucket_type_t;
-
-typedef struct fib_test_lb_bucket_t_ {
-    fib_test_lb_bucket_type_t type;
-
-    union
-    {
-	struct
-	{
-	    mpls_eos_bit_t eos;
-	    mpls_label_t label;
-	    u8 ttl;
-	    adj_index_t adj;
-	} label_o_adj;
-	struct
-	{
-	    mpls_eos_bit_t eos;
-	    mpls_label_t label_stack[8];
-	    u8 label_stack_size;
-	    u8 ttl;
-	    adj_index_t adj;
-	} label_stack_o_adj;
-	struct
-	{
-	    mpls_eos_bit_t eos;
-	    mpls_label_t label;
-	    u8 ttl;
-	    index_t lb;
-	} label_o_lb;
-	struct
-	{
-	    index_t adj;
-	} adj;
-	struct
-	{
-	    index_t lb;
-	} lb;
-	struct
-	{
-	    index_t adj;
-	} special;
-    };
-} fib_test_lb_bucket_t;
-
-typedef enum fib_test_rep_bucket_type_t_ {
-    FT_REP_LABEL_O_ADJ,
-    FT_REP_DISP_MFIB_LOOKUP,
-    FT_REP_INTF,
-} fib_test_rep_bucket_type_t;
-
-typedef struct fib_test_rep_bucket_t_ {
-    fib_test_rep_bucket_type_t type;
-
-    union
-    {
-	struct
-	{
-	    mpls_eos_bit_t eos;
-	    mpls_label_t label;
-	    u8 ttl;
-	    adj_index_t adj;
-	} label_o_adj;
- 	struct
-	{
-	    adj_index_t adj;
-	} adj;
-   };
-} fib_test_rep_bucket_t;
-
 #define FIB_TEST_LB(_cond, _comment, _args...)			\
 {								\
     if (!FIB_TEST_I(_cond, _comment, ##_args)) {		\
@@ -598,6 +523,16 @@ fib_test_validate_lb_v (const load_balance_t *lb,
 			bucket,
 			exp->adj.adj);
 	    break;
+	case FT_LB_L2:
+	    FIB_TEST_I((DPO_L2_BRIDGE == dpo->dpoi_type),
+		       "bucket %d stacks on %U",
+		       bucket,
+		       format_dpo_type, dpo->dpoi_type);
+	    FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index),
+			"bucket %d stacks on adj %d",
+			bucket,
+			exp->adj.adj);
+	    break;
 	case FT_LB_O_LB:
 	    FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type),
                        "bucket %d stacks on %U",
@@ -4066,6 +4001,45 @@ fib_test_v4 (void)
              "Table and LB newhash config match: %U",
              format_ip_flow_hash_config, lb->lb_hash_config);
 
+    /*
+     * A route via an L2 Bridge
+     */
+    fei = fib_table_entry_path_add(fib_index,
+                                   &pfx_10_10_10_3_s_32,
+                                   FIB_SOURCE_API,
+                                   FIB_ENTRY_FLAG_NONE,
+                                   DPO_PROTO_ETHERNET,
+                                   &zero_addr,
+                                   tm->hw[0]->sw_if_index,
+                                   ~0,
+                                   1,
+                                   NULL,
+                                   FIB_ROUTE_PATH_FLAG_NONE);
+    dpo_id_t l2_dpo = DPO_INVALID;
+    l2_bridge_dpo_add_or_lock(tm->hw[0]->sw_if_index, &l2_dpo);
+    fib_test_lb_bucket_t ip_o_l2 = {
+        .type = FT_LB_L2,
+        .adj = {
+            .adj = l2_dpo.dpoi_index,
+        },
+    };
+
+    FIB_TEST(fib_test_validate_entry(fei,
+                                     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                     1,
+                                     &ip_o_l2),
+             "10.10.10.3 via L2 on Eth0");
+    fib_table_entry_path_remove(fib_index,
+                                &pfx_10_10_10_3_s_32,
+                                FIB_SOURCE_API,
+                                DPO_PROTO_ETHERNET,
+                                &zero_addr,
+                                tm->hw[0]->sw_if_index,
+                                fib_index,
+                                1,
+                                FIB_ROUTE_PATH_FLAG_NONE);
+    dpo_reset(&l2_dpo);
+
     /*
      * CLEANUP
      *    remove adj-fibs: 
@@ -4165,6 +4139,8 @@ fib_test_v4 (void)
     	     pool_elts(load_balance_map_pool));
     FIB_TEST((lb_count == pool_elts(load_balance_pool)), "LB pool size is %d",
              pool_elts(load_balance_pool));
+    FIB_TEST((0 == pool_elts(l2_bridge_dpo_pool)), "L2 DPO pool size is %d",
+             pool_elts(l2_bridge_dpo_pool));
 
     return 0;
 }
diff --git a/src/vnet/fib/fib_test.h b/src/vnet/fib/fib_test.h
index 3692f57386d..5adc52ec658 100644
--- a/src/vnet/fib/fib_test.h
+++ b/src/vnet/fib/fib_test.h
@@ -26,6 +26,7 @@ typedef enum fib_test_lb_bucket_type_t_ {
     FT_LB_SPECIAL,
     FT_LB_ADJ,
     FT_LB_INTF,
+    FT_LB_L2,
 } fib_test_lb_bucket_type_t;
 
 typedef struct fib_test_lb_bucket_t_ {
@@ -72,6 +73,7 @@ typedef struct fib_test_lb_bucket_t_ {
 
 typedef enum fib_test_rep_bucket_type_t_ {
     FT_REP_LABEL_O_ADJ,
+    FT_REP_DISP_MFIB_LOOKUP,
     FT_REP_INTF,
 } fib_test_rep_bucket_type_t;
 
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index f26d79436a5..4b7019f087e 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -397,6 +397,7 @@ autoreply define ip_add_del_route
   u8 is_multipath;
   u8 is_resolve_host;
   u8 is_resolve_attached;
+  u8 is_l2_bridged;
   /* Is last/not-last message in group of multiple add/del messages. */
   u8 not_last;
   u8 next_hop_weight;
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index 6981c84cb0e..c34ec57dc84 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -769,6 +769,7 @@ add_del_route_t_handler (u8 is_multipath,
 			 u8 is_resolve_attached,
 			 u8 is_interface_rx,
 			 u8 is_rpf_id,
+			 u8 is_l2_bridged,
 			 u32 fib_index,
 			 const fib_prefix_t * prefix,
 			 dpo_proto_t next_hop_proto,
@@ -806,6 +807,8 @@ add_del_route_t_handler (u8 is_multipath,
       path.frp_local_label = next_hop_via_label;
       path.frp_eos = MPLS_NON_EOS;
     }
+  if (is_l2_bridged)
+    path.frp_proto = DPO_PROTO_ETHERNET;
   if (is_resolve_host)
     path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
   if (is_resolve_attached)
@@ -1043,6 +1046,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 				   mp->classify_table_index,
 				   mp->is_resolve_host,
 				   mp->is_resolve_attached, 0, 0,
+				   mp->is_l2_bridged,
 				   fib_index, &pfx, DPO_PROTO_IP4,
 				   &nh,
 				   ntohl (mp->next_hop_sw_if_index),
@@ -1102,6 +1106,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
 				   mp->classify_table_index,
 				   mp->is_resolve_host,
 				   mp->is_resolve_attached, 0, 0,
+				   mp->is_l2_bridged,
 				   fib_index, &pfx, DPO_PROTO_IP6,
 				   &nh, ntohl (mp->next_hop_sw_if_index),
 				   next_hop_fib_index,
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
index 762c40ffa8f..e41466e6cf5 100644
--- a/src/vnet/mpls/mpls_api.c
+++ b/src/vnet/mpls/mpls_api.c
@@ -223,13 +223,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
 				   0,	// mp->is_unreach,
 				   0,	// mp->is_prohibit,
 				   0,	// mp->is_local,
-				   mp->mr_is_multicast,
-				   mp->mr_is_classify,
-				   mp->mr_classify_table_index,
-				   mp->mr_is_resolve_host,
-				   mp->mr_is_resolve_attached,
-				   mp->mr_is_interface_rx,
-				   mp->mr_is_rpf_id,
+				   mp->mr_is_multicast, mp->mr_is_classify, mp->mr_classify_table_index, mp->mr_is_resolve_host, mp->mr_is_resolve_attached, mp->mr_is_interface_rx, mp->mr_is_rpf_id, 0,	// l2_bridged
 				   fib_index, &pfx,
 				   mp->mr_next_hop_proto,
 				   &nh, ntohl (mp->mr_next_hop_sw_if_index),
-- 
cgit