summaryrefslogtreecommitdiffstats
path: root/src/vnet/l2
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/l2')
-rw-r--r--src/vnet/l2/dir.dox24
-rw-r--r--src/vnet/l2/feat_bitmap.c185
-rw-r--r--src/vnet/l2/feat_bitmap.h96
-rw-r--r--src/vnet/l2/l2.api38
-rw-r--r--src/vnet/l2/l2_api.c140
-rw-r--r--src/vnet/l2/l2_bd.c1079
-rw-r--r--src/vnet/l2/l2_bd.h150
-rw-r--r--src/vnet/l2/l2_bvi.c40
-rw-r--r--src/vnet/l2/l2_bvi.h117
-rw-r--r--src/vnet/l2/l2_classify.h116
-rw-r--r--src/vnet/l2/l2_efp_filter.c614
-rw-r--r--src/vnet/l2/l2_efp_filter.h33
-rw-r--r--src/vnet/l2/l2_fib.c857
-rw-r--r--src/vnet/l2/l2_fib.h341
-rw-r--r--src/vnet/l2/l2_flood.c568
-rw-r--r--src/vnet/l2/l2_flood.h35
-rw-r--r--src/vnet/l2/l2_fwd.c544
-rw-r--r--src/vnet/l2/l2_fwd.h36
-rw-r--r--src/vnet/l2/l2_input.c1116
-rw-r--r--src/vnet/l2/l2_input.h266
-rw-r--r--src/vnet/l2/l2_input_acl.c434
-rw-r--r--src/vnet/l2/l2_input_classify.c655
-rw-r--r--src/vnet/l2/l2_input_vtr.c401
-rw-r--r--src/vnet/l2/l2_input_vtr.h54
-rw-r--r--src/vnet/l2/l2_learn.c597
-rw-r--r--src/vnet/l2/l2_learn.h64
-rw-r--r--src/vnet/l2/l2_output.c708
-rw-r--r--src/vnet/l2/l2_output.h285
-rw-r--r--src/vnet/l2/l2_output_acl.c358
-rw-r--r--src/vnet/l2/l2_output_classify.c657
-rw-r--r--src/vnet/l2/l2_patch.c452
-rw-r--r--src/vnet/l2/l2_rw.c719
-rw-r--r--src/vnet/l2/l2_rw.h95
-rw-r--r--src/vnet/l2/l2_vtr.c770
-rw-r--r--src/vnet/l2/l2_vtr.h270
-rw-r--r--src/vnet/l2/l2_xcrw.c591
-rw-r--r--src/vnet/l2/l2_xcrw.h91
37 files changed, 13596 insertions, 0 deletions
diff --git a/src/vnet/l2/dir.dox b/src/vnet/l2/dir.dox
new file mode 100644
index 00000000000..8497a2f64cc
--- /dev/null
+++ b/src/vnet/l2/dir.dox
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+@dir
+@brief Layer 2 Forwarding Code.
+
+This directory contains the source code for basic Layer 2 forwarding.
+
+*/
+/*? %%clicmd:group_label Layer 2 CLI %% ?*/
diff --git a/src/vnet/l2/feat_bitmap.c b/src/vnet/l2/feat_bitmap.c
new file mode 100644
index 00000000000..6c046467f2c
--- /dev/null
+++ b/src/vnet/l2/feat_bitmap.c
@@ -0,0 +1,185 @@
+/*
+ * feat_bitmap.c: bitmap for managing feature invocation
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+
+/*
+ * Drop node for feature bitmaps
+ * For features that just do a drop, or are not yet implemented.
+ * Initial feature dispatch nodes don't need to set b0->error
+ * in case of a possible drop because that will be done here.
+ *The next node is always error-drop.
+ */
+
+static vlib_node_registration_t feat_bitmap_drop_node;
+
+#define foreach_feat_bitmap_drop_error \
+_(NO_FWD, "L2 feature forwarding disabled") \
+_(NYI, "L2 feature not implemented")
+
+typedef enum
+{
+#define _(sym,str) FEAT_BITMAP_DROP_ERROR_##sym,
+ foreach_feat_bitmap_drop_error
+#undef _
+ FEAT_BITMAP_DROP_N_ERROR,
+} feat_bitmap_drop_error_t;
+
+static char *feat_bitmap_drop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_feat_bitmap_drop_error
+#undef _
+};
+
+typedef enum
+{
+ FEAT_BITMAP_DROP_NEXT_DROP,
+ FEAT_BITMAP_DROP_N_NEXT,
+} feat_bitmap_drop_next_t;
+
+typedef struct
+{
+ u32 feature_bitmap;
+} feat_bitmap_drop_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_feat_bitmap_drop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ feat_bitmap_drop_trace_t *t = va_arg (*args, feat_bitmap_drop_trace_t *);
+
+ s =
+ format (s, "feat_bitmap_drop: feature bitmap 0x%08x", t->feature_bitmap);
+ return s;
+}
+
+static uword
+feat_bitmap_drop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ feat_bitmap_drop_next_t next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ feat_bitmap_drop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->feature_bitmap = vnet_buffer (b0)->l2.feature_bitmap;
+ }
+
+ if (vnet_buffer (b0)->l2.feature_bitmap == 1)
+ {
+ /*
+ * If we are executing the last feature, this is the
+ * No forwarding catch-all
+ */
+ b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NO_FWD];
+ }
+ else
+ {
+ b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NYI];
+ }
+ next0 = FEAT_BITMAP_DROP_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+clib_error_t *
+feat_bitmap_drop_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (feat_bitmap_drop_init);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = {
+ .function = feat_bitmap_drop_node_fn,
+ .name = "feature-bitmap-drop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_feat_bitmap_drop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(feat_bitmap_drop_error_strings),
+ .error_strings = feat_bitmap_drop_error_strings,
+
+ .n_next_nodes = FEAT_BITMAP_DROP_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/feat_bitmap.h b/src/vnet/l2/feat_bitmap.h
new file mode 100644
index 00000000000..c6e02ecc7c9
--- /dev/null
+++ b/src/vnet/l2/feat_bitmap.h
@@ -0,0 +1,96 @@
+/*
+ * feat_bitmap.h: bitmap for managing feature invocation
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_feat_bitmap_h
+#define included_vnet_l2_feat_bitmap_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+/*
+ * The feature bitmap is a way of organizing input and output feature graph nodes.
+ * The set of features to be executed are arranged in a bitmap with one bit per
+ * feature and each bit positioned in the same order that the features should be
+ * executed. Features can be dynamically removed from the set by masking off their
+ * corresponding bits. The bitmap is stored in packet context. Each feature clears
+ * its bit and then calls feat_bitmap_get_next_node_index() to go to the next
+ * graph node.
+ */
+
+
+/* 32 features in a u32 bitmap */
+#define FEAT_MAX 32
+
+/**
+ Initialize the feature next-node indexes of a graph node.
+ Should be called by the init function of each feature graph node.
+*/
+always_inline void
+feat_bitmap_init_next_nodes (vlib_main_t * vm, u32 node_index, /* the current graph node index */
+ u32 num_features, /* number of entries in feat_names */
+ char **feat_names, /* array of feature graph node names */
+ u32 * next_nodes) /* array of 32 next indexes to init */
+{
+ u32 idx;
+
+ ASSERT (num_features <= FEAT_MAX);
+
+ for (idx = 0; idx < num_features; idx++)
+ {
+ if (vlib_get_node_by_name (vm, (u8 *) feat_names[idx]))
+ {
+ next_nodes[idx] =
+ vlib_node_add_named_next (vm, node_index, feat_names[idx]);
+ }
+ else
+ { // Node may be in plugin which is not installed, use drop node
+ next_nodes[idx] =
+ vlib_node_add_named_next (vm, node_index, "feature-bitmap-drop");
+ }
+ }
+
+ /* All unassigned bits go to the drop node */
+ for (; idx < FEAT_MAX; idx++)
+ {
+ next_nodes[idx] = vlib_node_add_named_next (vm, node_index,
+ "feature-bitmap-drop");
+ }
+}
+
+/**
+ Return the graph node index for the feature corresponding to the
+ first set bit in the bitmap.
+*/
+always_inline
+ u32 feat_bitmap_get_next_node_index (u32 * next_nodes, u32 bitmap)
+{
+ u32 first_bit;
+
+ count_leading_zeros (first_bit, bitmap);
+ first_bit = uword_bits - 1 - first_bit;
+ return next_nodes[first_bit];
+}
+
+#endif /* included_vnet_l2_feat_bitmap_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api
new file mode 100644
index 00000000000..5fce7944b58
--- /dev/null
+++ b/src/vnet/l2/l2.api
@@ -0,0 +1,38 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Reply to l2_xconnect_dump
+ @param context - sender context which was passed in the request
+ @param rx_sw_if_index - Receive interface index
+ @param tx_sw_if_index - Transmit interface index
+ */
+define l2_xconnect_details
+{
+ u32 context;
+ u32 rx_sw_if_index;
+ u32 tx_sw_if_index;
+};
+
+/** \brief Dump L2 XConnects
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define l2_xconnect_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c
new file mode 100644
index 00000000000..ca4f593f1ec
--- /dev/null
+++ b/src/vnet/l2/l2_api.c
@@ -0,0 +1,140 @@
+/*
+ *------------------------------------------------------------------
+ * l2_api.c - layer 2 forwarding api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/l2/l2_input.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(L2_XCONNECT_DUMP, l2_xconnect_dump)
+
+static void
+send_l2_xconnect_details (unix_shared_memory_queue_t * q, u32 context,
+ u32 rx_sw_if_index, u32 tx_sw_if_index)
+{
+ vl_api_l2_xconnect_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_L2_XCONNECT_DETAILS);
+ mp->context = context;
+ mp->rx_sw_if_index = htonl (rx_sw_if_index);
+ mp->tx_sw_if_index = htonl (tx_sw_if_index);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ l2input_main_t *l2im = &l2input_main;
+ vnet_sw_interface_t *swif;
+ l2_input_config_t *config;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (swif, im->sw_interfaces,
+ ({
+ config = vec_elt_at_index (l2im->configs, swif->sw_if_index);
+ if (config->xconnect)
+ send_l2_xconnect_details (q, mp->context, swif->sw_if_index,
+ config->output_sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_l2;
+#undef _
+}
+
+static clib_error_t *
+l2_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (l2_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c
new file mode 100644
index 00000000000..22f83d0b3e4
--- /dev/null
+++ b/src/vnet/l2/l2_bd.c
@@ -0,0 +1,1079 @@
+/*
+ * l2_bd.c : layer 2 bridge domain
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vlib/cli.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/format.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/l2/l2_learn.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/vec.h>
+
+/**
+ * @file
+ * @brief Ethernet Bridge Domain.
+ *
+ * Code in this file manages Layer 2 bridge domains.
+ *
+ */
+
+bd_main_t bd_main;
+
+/**
+ Init bridge domain if not done already.
+ For feature bitmap, set all bits except ARP termination
+*/
+void
+bd_validate (l2_bridge_domain_t * bd_config)
+{
+ if (!bd_is_valid (bd_config))
+ {
+ bd_config->feature_bitmap = ~L2INPUT_FEAT_ARP_TERM;
+ bd_config->bvi_sw_if_index = ~0;
+ bd_config->members = 0;
+ bd_config->flood_count = 0;
+ bd_config->tun_master_count = 0;
+ bd_config->tun_normal_count = 0;
+ bd_config->mac_by_ip4 = 0;
+ bd_config->mac_by_ip6 = hash_create_mem (0, sizeof (ip6_address_t),
+ sizeof (uword));
+ }
+}
+
+u32
+bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id)
+{
+ uword *p;
+ u32 rv;
+
+ if (bd_id == ~0)
+ {
+ bd_id = 0;
+ while (hash_get (bdm->bd_index_by_bd_id, bd_id))
+ bd_id++;
+ }
+ else
+ {
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ return (p[0]);
+ }
+
+ rv = clib_bitmap_first_clear (bdm->bd_index_bitmap);
+
+ /* mark this index busy */
+ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, rv, 1);
+
+ hash_set (bdm->bd_index_by_bd_id, bd_id, rv);
+
+ vec_validate (l2input_main.bd_configs, rv);
+ l2input_main.bd_configs[rv].bd_id = bd_id;
+
+ return rv;
+}
+
+int
+bd_delete_bd_index (bd_main_t * bdm, u32 bd_id)
+{
+ uword *p;
+ u32 bd_index;
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p == 0)
+ return -1;
+
+ bd_index = p[0];
+
+ /* mark this index clear */
+ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, bd_index, 0);
+ hash_unset (bdm->bd_index_by_bd_id, bd_id);
+
+ l2input_main.bd_configs[bd_index].bd_id = ~0;
+ l2input_main.bd_configs[bd_index].feature_bitmap = 0;
+
+ return 0;
+}
+
+static void
+update_flood_count (l2_bridge_domain_t * bd_config)
+{
+ bd_config->flood_count = vec_len (bd_config->members) -
+ (bd_config->tun_master_count ? bd_config->tun_normal_count : 0);
+}
+
+void
+bd_add_member (l2_bridge_domain_t * bd_config, l2_flood_member_t * member)
+{
+ u32 ix;
+ vnet_sw_interface_t *sw_if = vnet_get_sw_interface
+ (vnet_get_main (), member->sw_if_index);
+
+ /*
+ * Add one element to the vector
+ * vector is ordered [ bvi, normal/tun_masters..., tun_normals... ]
+ * When flooding, the bvi interface (if present) must be the last member
+ * processed due to how BVI processing can change the packet. To enable
+ * this order, we make the bvi interface the first in the vector and
+ * flooding walks the vector in reverse.
+ */
+ switch (sw_if->flood_class)
+ {
+ case VNET_FLOOD_CLASS_TUNNEL_MASTER:
+ bd_config->tun_master_count++;
+ /* Fall through */
+ default:
+ /* Fall through */
+ case VNET_FLOOD_CLASS_NORMAL:
+ ix = (member->flags & L2_FLOOD_MEMBER_BVI) ? 0 :
+ vec_len (bd_config->members) - bd_config->tun_normal_count;
+ break;
+ case VNET_FLOOD_CLASS_TUNNEL_NORMAL:
+ ix = vec_len (bd_config->members);
+ bd_config->tun_normal_count++;
+ break;
+ }
+
+ vec_insert_elts (bd_config->members, member, 1, ix);
+ update_flood_count (bd_config);
+}
+
+#define BD_REMOVE_ERROR_OK 0
+#define BD_REMOVE_ERROR_NOT_FOUND 1
+
+u32
+bd_remove_member (l2_bridge_domain_t * bd_config, u32 sw_if_index)
+{
+ u32 ix;
+
+ /* Find and delete the member */
+ vec_foreach_index (ix, bd_config->members)
+ {
+ l2_flood_member_t *m = vec_elt_at_index (bd_config->members, ix);
+ if (m->sw_if_index == sw_if_index)
+ {
+ vnet_sw_interface_t *sw_if = vnet_get_sw_interface
+ (vnet_get_main (), sw_if_index);
+
+ if (sw_if->flood_class != VNET_FLOOD_CLASS_NORMAL)
+ {
+ if (sw_if->flood_class == VNET_FLOOD_CLASS_TUNNEL_MASTER)
+ bd_config->tun_master_count--;
+ else if (sw_if->flood_class == VNET_FLOOD_CLASS_TUNNEL_NORMAL)
+ bd_config->tun_normal_count--;
+ }
+ vec_del1 (bd_config->members, ix);
+ update_flood_count (bd_config);
+
+ return BD_REMOVE_ERROR_OK;
+ }
+ }
+
+ return BD_REMOVE_ERROR_NOT_FOUND;
+}
+
+
+clib_error_t *
+l2bd_init (vlib_main_t * vm)
+{
+ bd_main_t *bdm = &bd_main;
+ u32 bd_index;
+ bdm->bd_index_by_bd_id = hash_create (0, sizeof (uword));
+ /*
+ * create a dummy bd with bd_id of 0 and bd_index of 0 with feature set
+ * to packet drop only. Thus, packets received from any L2 interface with
+ * uninitialized bd_index of 0 can be dropped safely.
+ */
+ bd_index = bd_find_or_add_bd_index (bdm, 0);
+ ASSERT (bd_index == 0);
+ l2input_main.bd_configs[0].feature_bitmap = L2INPUT_FEAT_DROP;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2bd_init);
+
+
+/**
+ Set the learn/forward/flood flags for the bridge domain.
+ Return 0 if ok, non-zero if for an error.
+*/
+u32
+bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable)
+{
+
+ l2_bridge_domain_t *bd_config;
+ u32 feature_bitmap = 0;
+
+ vec_validate (l2input_main.bd_configs, bd_index);
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+
+ bd_validate (bd_config);
+
+ if (flags & L2_LEARN)
+ {
+ feature_bitmap |= L2INPUT_FEAT_LEARN;
+ }
+ if (flags & L2_FWD)
+ {
+ feature_bitmap |= L2INPUT_FEAT_FWD;
+ }
+ if (flags & L2_FLOOD)
+ {
+ feature_bitmap |= L2INPUT_FEAT_FLOOD;
+ }
+ if (flags & L2_UU_FLOOD)
+ {
+ feature_bitmap |= L2INPUT_FEAT_UU_FLOOD;
+ }
+ if (flags & L2_ARP_TERM)
+ {
+ feature_bitmap |= L2INPUT_FEAT_ARP_TERM;
+ }
+
+ if (enable)
+ {
+ bd_config->feature_bitmap |= feature_bitmap;
+ }
+ else
+ {
+ bd_config->feature_bitmap &= ~feature_bitmap;
+ }
+
+ return 0;
+}
+
+/**
+ Set the mac age for the bridge domain.
+*/
+void
+bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age)
+{
+ l2_bridge_domain_t *bd_config;
+ int enable = 0;
+
+ vec_validate (l2input_main.bd_configs, bd_index);
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ bd_config->mac_age = age;
+
+ /* check if there is at least one bd with mac aging enabled */
+ vec_foreach (bd_config, l2input_main.bd_configs)
+ if (bd_config->bd_id != ~0 && bd_config->mac_age != 0)
+ enable = 1;
+
+ vlib_process_signal_event (vm, l2fib_mac_age_scanner_process_node.index,
+ enable ? L2_MAC_AGE_PROCESS_EVENT_START :
+ L2_MAC_AGE_PROCESS_EVENT_STOP, 0);
+}
+
+/**
+ Set bridge-domain learn enable/disable.
+ The CLI format is:
+ set bridge-domain learn <bd_id> [disable]
+*/
+static clib_error_t *
+bd_learn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the bridge domain flag */
+ if (bd_set_flags (vm, bd_index, L2_LEARN, enable))
+ {
+ error =
+ clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 learning can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage bridge-domains. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable learning (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain learn 200}
+ * Example of how to disable learning (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain learn 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_learn_cli, static) = {
+ .path = "set bridge-domain learn",
+ .short_help = "set bridge-domain learn <bridge-domain-id> [disable]",
+ .function = bd_learn,
+};
+/* *INDENT-ON* */
+
+/**
+ Set bridge-domain forward enable/disable.
+ The CLI format is:
+ set bridge-domain forward <bd_index> [disable]
+*/
+static clib_error_t *
+bd_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the bridge domain flag */
+ if (bd_set_flags (vm, bd_index, L2_FWD, enable))
+ {
+ error =
+ clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+
+/*?
+ * Layer 2 unicast forwarding can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage bridge-domains. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable forwarding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain forward 200}
+ * Example of how to disable forwarding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain forward 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_fwd_cli, static) = {
+ .path = "set bridge-domain forward",
+ .short_help = "set bridge-domain forward <bridge-domain-id> [disable]",
+ .function = bd_fwd,
+};
+/* *INDENT-ON* */
+
+/**
+ Set bridge-domain flood enable/disable.
+ The CLI format is:
+ set bridge-domain flood <bd_index> [disable]
+*/
+static clib_error_t *
+bd_flood (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the bridge domain flag */
+ if (bd_set_flags (vm, bd_index, L2_FLOOD, enable))
+ {
+ error =
+ clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 flooding can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage bridge-domains. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable flooding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain flood 200}
+ * Example of how to disable flooding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain flood 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_flood_cli, static) = {
+ .path = "set bridge-domain flood",
+ .short_help = "set bridge-domain flood <bridge-domain-id> [disable]",
+ .function = bd_flood,
+};
+/* *INDENT-ON* */
+
+/**
+ Set bridge-domain unkown-unicast flood enable/disable.
+ The CLI format is:
+ set bridge-domain uu-flood <bd_index> [disable]
+*/
+static clib_error_t *
+bd_uu_flood (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the bridge domain flag */
+ if (bd_set_flags (vm, bd_index, L2_UU_FLOOD, enable))
+ {
+ error =
+ clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 unknown-unicast flooding can be enabled and disabled on each
+ * bridge-domain. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable unknown-unicast flooding (where 200 is the
+ * bridge-domain-id):
+ * @cliexcmd{set bridge-domain uu-flood 200}
+ * Example of how to disable unknown-unicast flooding (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain uu-flood 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = {
+ .path = "set bridge-domain uu-flood",
+ .short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]",
+ .function = bd_uu_flood,
+};
+/* *INDENT-ON* */
+
+/**
+ Set bridge-domain arp term enable/disable.
+ The CLI format is:
+ set bridge-domain arp term <bridge-domain-id> [disable]
+*/
+static clib_error_t *
+bd_arp_term (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ bd_index = *p;
+ else
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ enable = 0;
+
+ /* set the bridge domain flag */
+ if (bd_set_flags (vm, bd_index, L2_ARP_TERM, enable))
+ {
+ error =
+ clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+static clib_error_t *
+bd_mac_age (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 age;
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ if (!unformat (input, "%u", &age))
+ {
+ error =
+ clib_error_return (0, "expecting ageing time in minutes but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ /* set the bridge domain flag */
+ if (age > 255)
+ {
+ error =
+ clib_error_return (0, "mac aging time cannot be bigger than 255");
+ goto done;
+ }
+ bd_set_mac_age (vm, bd_index, (u8) age);
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 mac aging can be enabled and disabled on each
+ * bridge-domain. Use this command to set or disable mac aging
+ * on specific bridge-domains. It is disabled by default.
+ *
+ * @cliexpar
+ * Example of how to set mac aging (where 200 is the bridge-domain-id and
+ * 5 is aging time in minutes):
+ * @cliexcmd{set bridge-domain mac-age 200 5}
+ * Example of how to disable mac aging (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain flood 200 0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_mac_age_cli, static) = {
+ .path = "set bridge-domain mac-age",
+ .short_help = "set bridge-domain mac-age <bridge-domain-id> <mins>",
+ .function = bd_mac_age,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Modify whether or not an existing bridge-domain should terminate and respond
+ * to ARP Requests. ARP Termination is disabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable ARP termination (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain arp term 200}
+ * Example of how to disable ARP termination (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain arp term 200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_arp_term_cli, static) = {
+ .path = "set bridge-domain arp term",
+ .short_help = "set bridge-domain arp term <bridge-domain-id> [disable]",
+ .function = bd_arp_term,
+};
+/* *INDENT-ON* */
+
+
+/**
+ * Add/delete IP address to MAC address mapping.
+ *
+ * The clib hash implementation stores uword entries in the hash table.
+ * The hash table mac_by_ip4 is keyed via IP4 address and store the
+ * 6-byte MAC address directly in the hash table entry uword.
+ *
+ * @warning This only works for 64-bit processor with 8-byte uword;
+ * which means this code *WILL NOT WORK* for a 32-bit prcessor with
+ * 4-byte uword.
+ */
+u32
+bd_add_del_ip_mac (u32 bd_index,
+ u8 * ip_addr, u8 * mac_addr, u8 is_ip6, u8 is_add)
+{
+ l2input_main_t *l2im = &l2input_main;
+ l2_bridge_domain_t *bd_cfg = l2input_bd_config_from_index (l2im, bd_index);
+ u64 new_mac = *(u64 *) mac_addr;
+ u64 *old_mac;
+ u16 *mac16 = (u16 *) & new_mac;
+
+ ASSERT (sizeof (uword) == sizeof (u64)); /* make sure uword is 8 bytes */
+
+ mac16[3] = 0; /* Clear last 2 unsed bytes of the 8-byte MAC address */
+ if (is_ip6)
+ {
+ ip6_address_t *ip6_addr_key;
+ hash_pair_t *hp;
+ old_mac = (u64 *) hash_get_mem (bd_cfg->mac_by_ip6, ip_addr);
+ if (is_add)
+ {
+ if (old_mac == 0)
+ { /* new entry - allocate and craete ip6 address key */
+ ip6_addr_key = clib_mem_alloc (sizeof (ip6_address_t));
+ clib_memcpy (ip6_addr_key, ip_addr, sizeof (ip6_address_t));
+ }
+ else if (*old_mac == new_mac)
+ { /* same mac entry already exist for ip6 address */
+ return 0;
+ }
+ else
+ { /* updat mac for ip6 address */
+ hp = hash_get_pair (bd_cfg->mac_by_ip6, ip_addr);
+ ip6_addr_key = (ip6_address_t *) hp->key;
+ }
+ hash_set_mem (bd_cfg->mac_by_ip6, ip6_addr_key, new_mac);
+ }
+ else
+ {
+ if (old_mac && (*old_mac == new_mac))
+ {
+ hp = hash_get_pair (bd_cfg->mac_by_ip6, ip_addr);
+ ip6_addr_key = (ip6_address_t *) hp->key;
+ hash_unset_mem (bd_cfg->mac_by_ip6, ip_addr);
+ clib_mem_free (ip6_addr_key);
+ }
+ else
+ return 1;
+ }
+ }
+ else
+ {
+ ip4_address_t ip4_addr = *(ip4_address_t *) ip_addr;
+ old_mac = (u64 *) hash_get (bd_cfg->mac_by_ip4, ip4_addr.as_u32);
+ if (is_add)
+ {
+ if (old_mac && (*old_mac == new_mac))
+ return 0; /* mac entry already exist */
+ hash_set (bd_cfg->mac_by_ip4, ip4_addr.as_u32, new_mac);
+ }
+ else
+ {
+ if (old_mac && (*old_mac == new_mac))
+ hash_unset (bd_cfg->mac_by_ip4, ip4_addr.as_u32);
+ else
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/**
+ Set bridge-domain arp entry add/delete.
+ The CLI format is:
+ set bridge-domain arp entry <bridge-domain-id> <ip-addr> <mac-addr> [del]
+*/
+static clib_error_t *
+bd_arp_entry (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u8 is_add = 1;
+ u8 is_ip6 = 0;
+ u8 ip_addr[16];
+ u8 mac_addr[6];
+ uword *p;
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p)
+ bd_index = *p;
+ else
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ if (unformat (input, "%U", unformat_ip4_address, ip_addr))
+ {
+ is_ip6 = 0;
+ }
+ else if (unformat (input, "%U", unformat_ip6_address, ip_addr))
+ {
+ is_ip6 = 1;
+ }
+ else
+ {
+ error = clib_error_return (0, "expecting IP address but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%U", unformat_ethernet_address, mac_addr))
+ {
+ error = clib_error_return (0, "expecting MAC address but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "del"))
+ {
+ is_add = 0;
+ }
+
+ /* set the bridge domain flagAdd IP-MAC entry into bridge domain */
+ if (bd_add_del_ip_mac (bd_index, ip_addr, mac_addr, is_ip6, is_add))
+ {
+ error = clib_error_return (0, "MAC %s for IP %U and MAC %U failed",
+ is_add ? "add" : "del",
+ is_ip6 ?
+ format_ip4_address : format_ip6_address,
+ ip_addr, format_ethernet_address, mac_addr);
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Add an ARP entry to an existing bridge-domain.
+ *
+ * @cliexpar
+ * Example of how to add an ARP entry (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a}
+ * Example of how to delete an ARP entry (where 200 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a del}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = {
+ .path = "set bridge-domain arp entry",
+ .short_help = "set bridge-domain arp entry <bridge-domain-id> <ip-addr> <mac-addr> [del]",
+ .function = bd_arp_entry,
+};
+/* *INDENT-ON* */
+
+u8 *
+format_vtr (u8 * s, va_list * args)
+{
+ u32 vtr_op = va_arg (*args, u32);
+ u32 dot1q = va_arg (*args, u32);
+ u32 tag1 = va_arg (*args, u32);
+ u32 tag2 = va_arg (*args, u32);
+ switch (vtr_op)
+ {
+ case L2_VTR_DISABLED:
+ return format (s, "none");
+ case L2_VTR_PUSH_1:
+ return format (s, "push-1 %s %d", dot1q ? "dot1q" : "dot1ad", tag1);
+ case L2_VTR_PUSH_2:
+ return format (s, "push-2 %s %d %d", dot1q ? "dot1q" : "dot1ad", tag1,
+ tag2);
+ case L2_VTR_POP_1:
+ return format (s, "pop-1");
+ case L2_VTR_POP_2:
+ return format (s, "pop-2");
+ case L2_VTR_TRANSLATE_1_1:
+ return format (s, "trans-1-1 %s %d", dot1q ? "dot1q" : "dot1ad", tag1);
+ case L2_VTR_TRANSLATE_1_2:
+ return format (s, "trans-1-2 %s %d %d", dot1q ? "dot1q" : "dot1ad",
+ tag1, tag2);
+ case L2_VTR_TRANSLATE_2_1:
+ return format (s, "trans-2-1 %s %d", dot1q ? "dot1q" : "dot1ad", tag1);
+ case L2_VTR_TRANSLATE_2_2:
+ return format (s, "trans-2-2 %s %d %d", dot1q ? "dot1q" : "dot1ad",
+ tag1, tag2);
+ default:
+ return format (s, "none");
+ }
+}
+
+/**
+ Show bridge-domain state.
+ The CLI format is:
+ show bridge-domain [<bd_index>]
+*/
+static clib_error_t *
+bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u32 bd_index = ~0;
+ l2_bridge_domain_t *bd_config;
+ u32 start, end;
+ u32 printed;
+ u32 detail = 0;
+ u32 intf = 0;
+ u32 arp = 0;
+ u32 bd_id = ~0;
+ uword *p;
+
+ start = 0;
+ end = vec_len (l2input_main.bd_configs);
+
+ if (unformat (input, "%d", &bd_id))
+ {
+ if (unformat (input, "detail"))
+ detail = 1;
+ else if (unformat (input, "det"))
+ detail = 1;
+ if (unformat (input, "int"))
+ intf = 1;
+ if (unformat (input, "arp"))
+ arp = 1;
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ bd_index = *p;
+ else
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ vec_validate (l2input_main.bd_configs, bd_index);
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ if (bd_is_valid (bd_config))
+ {
+ start = bd_index;
+ end = start + 1;
+ }
+ else
+ {
+ vlib_cli_output (vm, "bridge-domain %d not in use", bd_id);
+ goto done;
+ }
+ }
+
+ /* Show all bridge-domains that have been initialized */
+ printed = 0;
+ for (bd_index = start; bd_index < end; bd_index++)
+ {
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ if (bd_is_valid (bd_config))
+ {
+ if (!printed)
+ {
+ printed = 1;
+ vlib_cli_output (vm,
+ "%=5s %=7s %=10s %=10s %=10s %=10s %=10s %=14s",
+ "ID", "Index", "Learning", "U-Forwrd",
+ "UU-Flood", "Flooding", "ARP-Term",
+ "BVI-Intf");
+ }
+
+ vlib_cli_output (vm,
+ "%=5d %=7d %=10s %=10s %=10s %=10s %=10s %=14U",
+ bd_config->bd_id, bd_index,
+ bd_config->feature_bitmap & L2INPUT_FEAT_LEARN ?
+ "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_FWD ? "on"
+ : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD ?
+ "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD ?
+ "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM ?
+ "on" : "off", format_vnet_sw_if_index_name_with_NA,
+ vnm, bd_config->bvi_sw_if_index);
+
+ if (detail || intf)
+ {
+ /* Show all member interfaces */
+ int i;
+ vec_foreach_index (i, bd_config->members)
+ {
+ l2_flood_member_t *member =
+ vec_elt_at_index (bd_config->members, i);
+ u32 vtr_opr, dot1q, tag1, tag2;
+ if (i == 0)
+ {
+ vlib_cli_output (vm, "\n%=30s%=7s%=5s%=5s%=9s%=30s",
+ "Interface", "Index", "SHG", "BVI",
+ "TxFlood", "VLAN-Tag-Rewrite");
+ }
+ l2vtr_get (vm, vnm, member->sw_if_index, &vtr_opr, &dot1q,
+ &tag1, &tag2);
+ vlib_cli_output (vm, "%=30U%=7d%=5d%=5s%=9s%=30U",
+ format_vnet_sw_if_index_name, vnm,
+ member->sw_if_index, member->sw_if_index,
+ member->shg,
+ member->flags & L2_FLOOD_MEMBER_BVI ? "*" :
+ "-", i < bd_config->flood_count ? "*" : "-",
+ format_vtr, vtr_opr, dot1q, tag1, tag2);
+ }
+ }
+
+ if ((detail || arp) &&
+ (bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM))
+ {
+ u32 ip4_addr;
+ ip6_address_t *ip6_addr;
+ u64 mac_addr;
+ vlib_cli_output (vm,
+ "\n IP4/IP6 to MAC table for ARP Termination");
+
+ /* *INDENT-OFF* */
+ hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4,
+ ({
+ vlib_cli_output (vm, "%=40U => %=20U",
+ format_ip4_address, &ip4_addr,
+ format_ethernet_address, &mac_addr);
+ }));
+
+ hash_foreach_mem (ip6_addr, mac_addr, bd_config->mac_by_ip6,
+ ({
+ vlib_cli_output (vm, "%=40U => %=20U",
+ format_ip6_address, ip6_addr,
+ format_ethernet_address, &mac_addr);
+ }));
+ /* *INDENT-ON* */
+ }
+ }
+ }
+
+ if (!printed)
+ {
+ vlib_cli_output (vm, "no bridge-domains in use");
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Show a summary of all the bridge-domain instances or detailed view of a
+ * single bridge-domain. Bridge-domains are created by adding an interface
+ * to a bridge using the '<em>set interface l2 bridge</em>' command.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of displaying all bridge-domains:
+ * @cliexstart{show bridge-domain}
+ * ID Index Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf
+ * 0 0 off off off off off local0
+ * 200 1 on on on on off N/A
+ * @cliexend
+ *
+ * Example of displaying details of a single bridge-domains:
+ * @cliexstart{show bridge-domain 200 detail}
+ * ID Index Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf
+ * 200 1 on on on on off N/A
+ *
+ * Interface Index SHG BVI VLAN-Tag-Rewrite
+ * GigabitEthernet0/8/0.200 3 0 - none
+ * GigabitEthernet0/9/0.200 4 0 - none
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bd_show_cli, static) = {
+ .path = "show bridge-domain",
+ .short_help = "show bridge-domain [bridge-domain-id [detail|int|arp]]",
+ .function = bd_show,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h
new file mode 100644
index 00000000000..4bb9bc9b24c
--- /dev/null
+++ b/src/vnet/l2/l2_bd.h
@@ -0,0 +1,150 @@
+/*
+ * l2_bd.h : layer 2 bridge domain
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2bd_h
+#define included_l2bd_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+typedef struct
+{
+ /* hash bd_id -> bd_index */
+ uword *bd_index_by_bd_id;
+
+ /* Busy bd_index bitmap */
+ uword *bd_index_bitmap;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} bd_main_t;
+
+bd_main_t bd_main;
+
+/* Bridge domain member */
+
+#define L2_FLOOD_MEMBER_NORMAL 0
+#define L2_FLOOD_MEMBER_BVI 1
+
+typedef struct
+{
+ u32 sw_if_index; /* the output L2 interface */
+ u8 flags; /* 0=normal, 1=bvi */
+ u8 shg; /* split horizon group number */
+ u16 spare;
+} l2_flood_member_t;
+
+
+/* Per-bridge domain configuration */
+
+typedef struct
+{
+ u32 feature_bitmap;
+ /*
+ * Contains bit enables for flooding, learning, and forwarding.
+ * All other feature bits should always be set.
+ *
+ * identity of the bridge-domain's BVI interface
+ * set to ~0 if there is no BVI
+ */
+ u32 bvi_sw_if_index;
+
+ /* bridge domain id, not to be confused with bd_index */
+ u32 bd_id;
+
+ /* Vector of member ports */
+ l2_flood_member_t *members;
+
+ /* First flood_count member ports are flooded */
+ u32 flood_count;
+
+ /* Tunnel Master (Multicast vxlan) are always flooded */
+ u32 tun_master_count;
+
+ /* Tunnels (Unicast vxlan) are flooded if there are no masters */
+ u32 tun_normal_count;
+
+ /* hash ip4/ip6 -> mac for arp/nd termination */
+ uword *mac_by_ip4;
+ uword *mac_by_ip6;
+
+ /* mac aging */
+ u8 mac_age;
+
+} l2_bridge_domain_t;
+
+/* Return 1 if bridge domain has been initialized */
+always_inline u32
+bd_is_valid (l2_bridge_domain_t * bd_config)
+{
+ return (bd_config->feature_bitmap != 0);
+}
+
+/* Init bridge domain if not done already */
+void bd_validate (l2_bridge_domain_t * bd_config);
+
+
+void
+bd_add_member (l2_bridge_domain_t * bd_config, l2_flood_member_t * member);
+
+u32 bd_remove_member (l2_bridge_domain_t * bd_config, u32 sw_if_index);
+
+
+#define L2_LEARN (1<<0)
+#define L2_FWD (1<<1)
+#define L2_FLOOD (1<<2)
+#define L2_UU_FLOOD (1<<3)
+#define L2_ARP_TERM (1<<4)
+
+u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable);
+void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age);
+
+/**
+ * \brief Get or create a bridge domain.
+ *
+ * Get or create a bridge domain with the given bridge domain ID.
+ *
+ * \param bdm bd_main pointer.
+ * \param bd_id The bridge domain ID or ~0 if an arbitrary unused bridge domain should be used.
+ * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector.
+ */
+u32 bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id);
+
+/**
+ * \brief Delete a bridge domain.
+ *
+ * Delete an existing bridge domain with the given bridge domain ID.
+ *
+ * \param bdm bd_main pointer.
+ * \param bd_id The bridge domain ID.
+ * \return 0 on success and -1 if the bridge domain does not exist.
+ */
+int bd_delete_bd_index (bd_main_t * bdm, u32 bd_id);
+
+u32 bd_add_del_ip_mac (u32 bd_index,
+ u8 * ip_addr, u8 * mac_addr, u8 is_ip6, u8 is_add);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_bvi.c b/src/vnet/l2/l2_bvi.c
new file mode 100644
index 00000000000..f239743a9c4
--- /dev/null
+++ b/src/vnet/l2/l2_bvi.c
@@ -0,0 +1,40 @@
+/*
+ * l2_bvi.c : layer 2 Bridged Virtual Interface
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/l2_fwd.h>
+#include <vnet/l2/l2_flood.h>
+#include <vnet/l2/l2_bvi.h>
+
+
+/* Call the L2 nodes that need the ethertype mapping */
+void
+l2bvi_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index)
+{
+ l2fwd_register_input_type (vm, type, node_index);
+ l2flood_register_input_type (vm, type, node_index);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h
new file mode 100644
index 00000000000..dd1130a6c29
--- /dev/null
+++ b/src/vnet/l2/l2_bvi.h
@@ -0,0 +1,117 @@
+/*
+ * l2_bvi.h : layer 2 Bridged Virtual Interface
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2bvi_h
+#define included_l2bvi_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/sparse_vec.h>
+
+#include <vnet/l2/l2_input.h>
+
+#define TO_BVI_ERR_OK 0
+#define TO_BVI_ERR_BAD_MAC 1
+#define TO_BVI_ERR_ETHERTYPE 2
+
+/**
+ * Send a packet from L2 processing to L3 via the BVI interface.
+ * Set next0 to the proper L3 input node.
+ * Return an error if the packet isn't what we expect.
+ */
+
+static_always_inline u32
+l2_to_bvi (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ vlib_buffer_t * b0,
+ u32 bvi_sw_if_index, next_by_ethertype_t * l3_next, u32 * next0)
+{
+ u8 l2_len;
+ u16 ethertype;
+ u8 *l3h;
+ ethernet_header_t *e0;
+ vnet_hw_interface_t *hi;
+
+ e0 = vlib_buffer_get_current (b0);
+ hi = vnet_get_sup_hw_interface (vnet_main, bvi_sw_if_index);
+
+ /* Perform L3 my-mac filter */
+ if ((!ethernet_address_cast (e0->dst_address)) &&
+ (!eth_mac_equal ((u8 *) e0, hi->hw_address)))
+ {
+ return TO_BVI_ERR_BAD_MAC;
+ }
+
+ /* Save L2 header position which may be changed due to packet replication */
+ vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data;
+
+ /* Strip L2 header */
+ l2_len = vnet_buffer (b0)->l2.l2_len;
+ vlib_buffer_advance (b0, l2_len);
+
+ l3h = vlib_buffer_get_current (b0);
+ ethertype = clib_net_to_host_u16 (*(u16 *) (l3h - 2));
+
+ /* Set the input interface to be the BVI interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+
+ /* Go to appropriate L3 input node */
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ *next0 = l3_next->input_next_ip4;
+ }
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ {
+ *next0 = l3_next->input_next_ip6;
+ }
+ else
+ {
+ /* uncommon ethertype, check table */
+ u32 i0;
+
+ i0 = sparse_vec_index (l3_next->input_next_by_type, ethertype);
+ *next0 = vec_elt (l3_next->input_next_by_type, i0);
+
+ if (i0 == SPARSE_VEC_INVALID_INDEX)
+ {
+ return TO_BVI_ERR_ETHERTYPE;
+ }
+ }
+
+ /* increment BVI RX interface stat */
+ vlib_increment_combined_counter
+ (vnet_main->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_main->cpu_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX],
+ 1, vlib_buffer_length_in_chain (vlib_main, b0));
+ return TO_BVI_ERR_OK;
+}
+
+void
+l2bvi_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index);
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_classify.h b/src/vnet/l2/l2_classify.h
new file mode 100644
index 00000000000..184187ff879
--- /dev/null
+++ b/src/vnet/l2/l2_classify.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_vnet_l2_input_classify_h__
+#define __included_vnet_l2_input_classify_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+#include <vnet/classify/vnet_classify.h>
+
+typedef enum
+{
+ L2_INPUT_CLASSIFY_NEXT_DROP,
+ L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT,
+ L2_INPUT_CLASSIFY_NEXT_IP4_INPUT,
+ L2_INPUT_CLASSIFY_NEXT_IP6_INPUT,
+ L2_INPUT_CLASSIFY_NEXT_LI,
+ L2_INPUT_CLASSIFY_N_NEXT,
+} l2_input_classify_next_t;
+
+typedef enum
+{
+ L2_INPUT_CLASSIFY_TABLE_IP4,
+ L2_INPUT_CLASSIFY_TABLE_IP6,
+ L2_INPUT_CLASSIFY_TABLE_OTHER,
+ L2_INPUT_CLASSIFY_N_TABLES,
+} l2_input_classify_table_id_t;
+
+typedef enum
+{
+ L2_OUTPUT_CLASSIFY_NEXT_DROP,
+ L2_OUTPUT_CLASSIFY_N_NEXT,
+} l2_output_classify_next_t;
+
+typedef enum
+{
+ L2_OUTPUT_CLASSIFY_TABLE_IP4,
+ L2_OUTPUT_CLASSIFY_TABLE_IP6,
+ L2_OUTPUT_CLASSIFY_TABLE_OTHER,
+ L2_OUTPUT_CLASSIFY_N_TABLES,
+} l2_output_classify_table_id_t;
+
+typedef struct _l2_classify_main
+{
+ /* Next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* Per-address-family classifier table vectors */
+ u32 *classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_N_TABLES];
+
+ /* Next nodes for features and output interfaces */
+ l2_output_next_nodes_st next_nodes;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ vnet_classify_main_t *vnet_classify_main;
+} l2_input_classify_main_t;
+
+typedef struct _l2_classify_main l2_output_classify_main_t;
+
+extern l2_input_classify_main_t l2_input_classify_main;
+extern vlib_node_registration_t l2_input_classify_node;
+
+extern l2_output_classify_main_t l2_output_classify_main;
+extern vlib_node_registration_t l2_output_classify_node;
+
+void vnet_l2_input_classify_enable_disable (u32 sw_if_index,
+ int enable_disable);
+
+int vnet_l2_input_classify_set_tables (u32 sw_if_index, u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 other_table_index);
+
+void vnet_l2_output_classify_enable_disable (u32 sw_if_index,
+ int enable_disable);
+
+int vnet_l2_output_classify_set_tables (u32 sw_if_index, u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 other_table_index);
+
+#endif /* __included_vnet_l2_input_classify_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_efp_filter.c b/src/vnet/l2/l2_efp_filter.c
new file mode 100644
index 00000000000..2db4dc69c9c
--- /dev/null
+++ b/src/vnet/l2/l2_efp_filter.c
@@ -0,0 +1,614 @@
+/*
+ * l2_efp_filter.c : layer 2 egress EFP Filter processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/cache.h>
+
+/**
+ * @file
+ * @brief EFP-filter - Ethernet Flow Point Filter.
+ *
+ * It is possible to transmit a packet out a subinterface with VLAN tags
+ * that are not compatible with that subinterface. In other words, if that
+ * packet arrived on the output port, it would not be classified as coming
+ * from the output subinterface. This can happen in various ways: through
+ * misconfiguration, by putting subinterfaces with different VLAN encaps in
+ * the same bridge-domain, etc. The EFP Filter Check detects such packets
+ * and drops them. It consists of two checks, one that verifies the packet
+ * prior to output VLAN tag rewrite and one that verifies the packet after
+ * VLAN tag rewrite.
+ *
+ */
+typedef struct
+{
+
+ /* Next nodes for features and output interfaces */
+ l2_output_next_nodes_st next_nodes;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_efp_filter_main_t;
+
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u8 raw[12]; /* raw data (vlans) */
+ u32 sw_if_index;
+} l2_efp_filter_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_efp_filter_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_efp_filter_trace_t *t = va_arg (*args, l2_efp_filter_trace_t *);
+
+ s = format (s, "l2-output-vtr: sw_if_index %d dst %U src %U data "
+ "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4],
+ t->raw[5], t->raw[6], t->raw[7], t->raw[8], t->raw[9],
+ t->raw[10], t->raw[11]);
+ return s;
+}
+
+l2_efp_filter_main_t l2_efp_filter_main;
+
+static vlib_node_registration_t l2_efp_filter_node;
+
+#define foreach_l2_efp_filter_error \
+_(L2_EFP_FILTER, "L2 EFP filter packets") \
+_(DROP, "L2 EFP filter post-rewrite drops")
+
+typedef enum
+{
+#define _(sym,str) L2_EFP_FILTER_ERROR_##sym,
+ foreach_l2_efp_filter_error
+#undef _
+ L2_EFP_FILTER_N_ERROR,
+} l2_efp_filter_error_t;
+
+static char *l2_efp_filter_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_efp_filter_error
+#undef _
+};
+
+typedef enum
+{
+ L2_EFP_FILTER_NEXT_DROP,
+ L2_EFP_FILTER_N_NEXT,
+} l2_efp_filter_next_t;
+
+
+/**
+ * Extract fields from the packet that will be used in interface
+ * classification.
+ */
+static_always_inline void
+extract_keys (vnet_main_t * vnet_main,
+ u32 sw_if_index0,
+ vlib_buffer_t * b0,
+ u32 * port_sw_if_index0,
+ u16 * first_ethertype0,
+ u16 * outer_id0, u16 * inner_id0, u32 * match_flags0)
+{
+ ethernet_header_t *e0;
+ ethernet_vlan_header_t *h0;
+ u32 tag_len;
+ u32 tag_num;
+
+ *port_sw_if_index0 =
+ vnet_get_sup_sw_interface (vnet_main, sw_if_index0)->sw_if_index;
+
+ e0 = vlib_buffer_get_current (b0);
+ h0 = (ethernet_vlan_header_t *) (e0 + 1);
+
+ *first_ethertype0 = clib_net_to_host_u16 (e0->type);
+ *outer_id0 = clib_net_to_host_u16 (h0[0].priority_cfi_and_id);
+ *inner_id0 = clib_net_to_host_u16 (h0[1].priority_cfi_and_id);
+
+ tag_len = vnet_buffer (b0)->l2.l2_len - sizeof (ethernet_header_t);
+ tag_num = tag_len / sizeof (ethernet_vlan_header_t);
+ *match_flags0 = eth_create_valid_subint_match_flags (tag_num);
+}
+
+/*
+ * EFP filtering is a basic switch feature which prevents an interface from
+ * transmitting a packet that doesn't match the interface's ingress match
+ * criteria. The check has two parts, one performed before egress vlan tag
+ * rewrite and one after.
+ *
+ * The pre-rewrite check insures the packet matches what an ingress packet looks
+ * like after going through the interface's ingress tag rewrite operation. Only
+ * pushed tags are compared. So:
+ * - if the ingress vlan tag rewrite pushes no tags (or is not enabled),
+ * any packet passes the filter
+ * - if the ingress vlan tag rewrite pushes one tag,
+ * the packet must have at least one tag, and the outer tag must match the pushed tag
+ * - if the ingress vlan tag rewrite pushes two tags,
+ * the packet must have at least two tags, and the outer two tags must match the pushed tags
+ *
+ * The pre-rewrite check is performed in the l2-output node.
+ *
+ * The post-rewrite check insures the packet matches what an ingress packet looks
+ * like before going through the interface's ingress tag rewrite operation. It verifies
+ * that such a packet arriving on the wire at this port would be classified as arriving
+ * an input interface equal to the packet's output interface. This can be done by running
+ * the output packet's vlan tags and output port through the interface classification,
+ * and checking if the resulting interface matches the output interface.
+ *
+ * The post-rewrite check is performed here.
+ */
+
+static uword
+l2_efp_filter_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_efp_filter_next_t next_index;
+ l2_efp_filter_main_t *msm = &l2_efp_filter_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_efp_filter_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+ u32 cached_sw_if_index = ~0;
+ u32 cached_next_index = ~0;
+
+ /* invalidate cache to begin with */
+ cached_sw_if_index = ~0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 feature_bitmap0, feature_bitmap1;
+ u16 first_ethertype0, first_ethertype1;
+ u16 outer_id0, inner_id0, outer_id1, inner_id1;
+ u32 match_flags0, match_flags1;
+ u32 port_sw_if_index0, subint_sw_if_index0, port_sw_if_index1,
+ subint_sw_if_index1;
+ vnet_hw_interface_t *hi0, *hi1;
+ main_intf_t *main_intf0, *main_intf1;
+ vlan_intf_t *vlan_intf0, *vlan_intf1;
+ qinq_intf_t *qinq_intf0, *qinq_intf1;
+ u32 is_l20, is_l21;
+ __attribute__ ((unused)) u32 matched0, matched1;
+ u8 error0, error1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3, *p4, *p5;
+ __attribute__ ((unused)) u32 sw_if_index2, sw_if_index3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+ /*
+ * Prefetch the input config for the N+1 loop iteration
+ * This depends on the buffer header above
+ */
+ sw_if_index2 = vnet_buffer (p2)->sw_if_index[VLIB_TX];
+ sw_if_index3 = vnet_buffer (p3)->sw_if_index[VLIB_TX];
+ /*
+ * $$$ TODO
+ * CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index2), CLIB_CACHE_LINE_BYTES, LOAD);
+ * CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index3), CLIB_CACHE_LINE_BYTES, LOAD);
+ */
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* TX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+
+ /* process 2 packets */
+ em->counters[node_counter_base_index +
+ L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 2;
+
+ /* Remove ourself from the feature bitmap */
+ feature_bitmap0 =
+ vnet_buffer (b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER;
+ feature_bitmap1 =
+ vnet_buffer (b1)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER;
+
+ /* Determine next node */
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2_efp_filter_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0, sw_if_index0, feature_bitmap0, &next0);
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2_efp_filter_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b1, sw_if_index1, feature_bitmap1, &next1);
+
+ /* perform the efp filter check on two packets */
+
+ extract_keys (msm->vnet_main,
+ sw_if_index0,
+ b0,
+ &port_sw_if_index0,
+ &first_ethertype0,
+ &outer_id0, &inner_id0, &match_flags0);
+
+ extract_keys (msm->vnet_main,
+ sw_if_index1,
+ b1,
+ &port_sw_if_index1,
+ &first_ethertype1,
+ &outer_id1, &inner_id1, &match_flags1);
+
+ eth_vlan_table_lookups (&ethernet_main,
+ msm->vnet_main,
+ port_sw_if_index0,
+ first_ethertype0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0, &vlan_intf0, &qinq_intf0);
+
+ eth_vlan_table_lookups (&ethernet_main,
+ msm->vnet_main,
+ port_sw_if_index1,
+ first_ethertype1,
+ outer_id1,
+ inner_id1,
+ &hi1,
+ &main_intf1, &vlan_intf1, &qinq_intf1);
+
+ matched0 = eth_identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0,
+ &subint_sw_if_index0,
+ &error0, &is_l20);
+
+ matched1 = eth_identify_subint (hi1,
+ b1,
+ match_flags1,
+ main_intf1,
+ vlan_intf1,
+ qinq_intf1,
+ &subint_sw_if_index1,
+ &error1, &is_l21);
+
+ if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0))
+ {
+ /* Drop packet */
+ next0 = L2_EFP_FILTER_NEXT_DROP;
+ b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE (sw_if_index1 != subint_sw_if_index1))
+ {
+ /* Drop packet */
+ next1 = L2_EFP_FILTER_NEXT_DROP;
+ b1->error = node->errors[L2_EFP_FILTER_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ l2_efp_filter_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h1 = vlib_buffer_get_current (b1);
+ l2_efp_filter_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ clib_memcpy (t->raw, &h1->type, sizeof (t->raw));
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ u32 feature_bitmap0;
+ u16 first_ethertype0;
+ u16 outer_id0, inner_id0;
+ u32 match_flags0;
+ u32 port_sw_if_index0, subint_sw_if_index0;
+ vnet_hw_interface_t *hi0;
+ main_intf_t *main_intf0;
+ vlan_intf_t *vlan_intf0;
+ qinq_intf_t *qinq_intf0;
+ u32 is_l20;
+ __attribute__ ((unused)) u32 matched0;
+ u8 error0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ /* process 1 packet */
+ em->counters[node_counter_base_index +
+ L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 1;
+
+ /* Remove ourself from the feature bitmap */
+ feature_bitmap0 =
+ vnet_buffer (b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER;
+
+ /* Determine next node */
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2_efp_filter_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0, sw_if_index0, feature_bitmap0, &next0);
+
+ /* perform the efp filter check on one packet */
+
+ extract_keys (msm->vnet_main,
+ sw_if_index0,
+ b0,
+ &port_sw_if_index0,
+ &first_ethertype0,
+ &outer_id0, &inner_id0, &match_flags0);
+
+ eth_vlan_table_lookups (&ethernet_main,
+ msm->vnet_main,
+ port_sw_if_index0,
+ first_ethertype0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0, &vlan_intf0, &qinq_intf0);
+
+ matched0 = eth_identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0,
+ &subint_sw_if_index0,
+ &error0, &is_l20);
+
+ if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0))
+ {
+ /* Drop packet */
+ next0 = L2_EFP_FILTER_NEXT_DROP;
+ b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ l2_efp_filter_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_efp_filter_node,static) = {
+ .function = l2_efp_filter_node_fn,
+ .name = "l2-efp-filter",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_efp_filter_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_efp_filter_error_strings),
+ .error_strings = l2_efp_filter_error_strings,
+
+ .n_next_nodes = L2_EFP_FILTER_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_EFP_FILTER_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_efp_filter_node, l2_efp_filter_node_fn)
+ clib_error_t *l2_efp_filter_init (vlib_main_t * vm)
+{
+ l2_efp_filter_main_t *mp = &l2_efp_filter_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_efp_filter_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ mp->next_nodes.feat_next_node_index);
+
+ /* Initialize the output node mapping table */
+ l2output_init_output_node_vec (&mp->next_nodes.output_node_index_vec);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_efp_filter_init);
+
+
+/** Enable/disable the EFP Filter check on the subinterface. */
+void
+l2_efp_filter_configure (vnet_main_t * vnet_main, u32 sw_if_index, u32 enable)
+{
+ /* set the interface flag */
+ l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_EFP_FILTER, enable);
+}
+
+
+/**
+ * Set subinterface egress efp filter enable/disable.
+ * The CLI format is:
+ * set interface l2 efp-filter <interface> [disable]]
+ */
+static clib_error_t *
+int_l2_efp_filter (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* enable/disable the feature */
+ l2_efp_filter_configure (vnm, sw_if_index, enable);
+
+done:
+ return error;
+}
+
+
+/*?
+ * EFP filtering is a basic switch feature which prevents an interface from
+ * transmitting a packet that doesn't match the interface's ingress match
+ * criteria. The check has two parts, one performed before egress vlan tag
+ * rewrite and one after. This command enables or disables the EFP filtering
+ * for a given sub-interface.
+ *
+ * @cliexpar
+ * Example of how to enable a Layer 2 efp-filter on a sub-interface:
+ * @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200}
+ * Example of how to disable a Layer 2 efp-filter on a sub-interface:
+ * @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = {
+ .path = "set interface l2 efp-filter",
+ .short_help = "set interface l2 efp-filter <interface> [disable]",
+ .function = int_l2_efp_filter,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_efp_filter.h b/src/vnet/l2/l2_efp_filter.h
new file mode 100644
index 00000000000..f40851df3bd
--- /dev/null
+++ b/src/vnet/l2/l2_efp_filter.h
@@ -0,0 +1,33 @@
+/*
+ * l2_efp_filter.h : layer 2 egress EFP Filter processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef included_vnet_l2_efp_filter_h
+#define included_vnet_l2_efp_filter_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c
new file mode 100644
index 00000000000..d34836e33d0
--- /dev/null
+++ b/src/vnet/l2/l2_fib.c
@@ -0,0 +1,857 @@
+/*
+ * l2_fib.c : layer 2 forwarding table (aka mac table)
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_learn.h>
+#include <vnet/l2/l2_bd.h>
+
+#include <vppinfra/bihash_template.c>
+
+/**
+ * @file
+ * @brief Ethernet MAC Address FIB Table Management.
+ *
+ * The MAC Address forwarding table for bridge-domains is called the l2fib.
+ * Entries are added automatically as part of mac learning, but MAC Addresses
+ * entries can also be added manually.
+ *
+ */
+
+typedef struct
+{
+
+ /* hash table */
+ BVT (clib_bihash) mac_table;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2fib_main_t;
+
+l2fib_main_t l2fib_main;
+
+
+/** Format sw_if_index. If the value is ~0, use the text "N/A" */
+u8 *
+format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ u32 sw_if_index = va_arg (*args, u32);
+ if (sw_if_index == ~0)
+ return format (s, "N/A");
+ else
+ return format (s, "%U",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index));
+}
+
+void
+l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key,
+ l2fib_entry_result_t ** l2fe_res)
+{
+ l2fib_main_t *msm = &l2fib_main;
+ BVT (clib_bihash) * h = &msm->mac_table;
+ clib_bihash_bucket_t *b;
+ BVT (clib_bihash_value) * v;
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ int i, j, k;
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1 << b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ continue;
+
+ key.raw = v->kvp[k].key;
+ result.raw = v->kvp[k].value;
+
+ if ((bd_index == ~0) || (bd_index == key.fields.bd_index))
+ {
+ vec_add1 (*l2fe_key, key);
+ vec_add1 (*l2fe_res, result);
+ }
+ }
+ v++;
+ }
+ }
+}
+
+/** Display the contents of the l2fib. */
+static clib_error_t *
+show_l2fib (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ l2fib_main_t *msm = &l2fib_main;
+ l2_bridge_domain_t *bd_config;
+ BVT (clib_bihash) * h = &msm->mac_table;
+ clib_bihash_bucket_t *b;
+ BVT (clib_bihash_value) * v;
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ u32 first_entry = 1;
+ u64 total_entries = 0;
+ int i, j, k;
+ u8 verbose = 0;
+ u8 raw = 0;
+ u32 bd_id, bd_index = ~0;
+ u8 now = (u8) (vlib_time_now (vm) / 60);
+ u8 *s = 0;
+
+ if (unformat (input, "raw"))
+ raw = 1;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else if (unformat (input, "bd_index %d", &bd_index))
+ verbose = 1;
+ else if (unformat (input, "bd_id %d", &bd_id))
+ {
+ uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ {
+ verbose = 1;
+ bd_index = p[0];
+ }
+ else
+ {
+ vlib_cli_output (vm, "no such bridge domain id");
+ return 0;
+ }
+ }
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1 << b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ continue;
+
+ if (verbose && first_entry)
+ {
+ first_entry = 0;
+ vlib_cli_output (vm,
+ "%=19s%=7s%=30s%=7s%=8s%=8s%=5s%=16s",
+ "Mac Address", "BD Idx", "Interface",
+ "Index", "static", "filter", "bvi",
+ "Mac Age (min)");
+ }
+
+ key.raw = v->kvp[k].key;
+ result.raw = v->kvp[k].value;
+
+ if (verbose
+ & ((bd_index >> 31) || (bd_index == key.fields.bd_index)))
+ {
+ bd_config = vec_elt_at_index (l2input_main.bd_configs,
+ key.fields.bd_index);
+
+ if (bd_config->mac_age)
+ {
+ i16 delta = now - result.fields.timestamp;
+ delta += delta < 0 ? 256 : 0;
+ s = format (s, "%d", delta);
+ }
+ else
+ s = format (s, "disabled");
+
+ vlib_cli_output (vm,
+ "%=19U%=7d%=30U%=7d%=8d%=8d%=5d%=16v",
+ format_ethernet_address, key.fields.mac,
+ key.fields.bd_index,
+ format_vnet_sw_if_index_name_with_NA,
+ msm->vnet_main, result.fields.sw_if_index,
+ result.fields.sw_if_index == ~0
+ ? -1 : result.fields.sw_if_index,
+ result.fields.static_mac,
+ result.fields.filter,
+ result.fields.bvi, s);
+ vec_reset_length (s);
+ }
+ total_entries++;
+ }
+ v++;
+ }
+ }
+
+ if (total_entries == 0)
+ vlib_cli_output (vm, "no l2fib entries");
+ else
+ vlib_cli_output (vm, "%lld l2fib entries", total_entries);
+
+ if (raw)
+ vlib_cli_output (vm, "Raw Hash Table:\n%U\n",
+ BV (format_bihash), h, 1 /* verbose */ );
+
+ vec_free (s);
+ return 0;
+}
+
+/*?
+ * This command dispays the MAC Address entries of the L2 FIB table.
+ * Output can be filtered to just get the number of MAC Addresses or display
+ * each MAC Address for all bridge domains or just a single bridge domain.
+ *
+ * @cliexpar
+ * Example of how to display the number of MAC Address entries in the L2
+ * FIB table:
+ * @cliexstart{show l2fib}
+ * 3 l2fib entries
+ * @cliexend
+ * Example of how to display all the MAC Address entries in the L2
+ * FIB table:
+ * @cliexstart{show l2fib verbose}
+ * Mac Address BD Idx Interface Index static filter bvi refresh timestamp
+ * 52:54:00:53:18:33 1 GigabitEthernet0/8/0.200 3 0 0 0 0 0
+ * 52:54:00:53:18:55 1 GigabitEthernet0/8/0.200 3 1 0 0 0 0
+ * 52:54:00:53:18:77 1 N/A -1 1 1 0 0 0
+ * 3 l2fib entries
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_l2fib_cli, static) = {
+ .path = "show l2fib",
+ .short_help = "show l2fib [verbose | bd_id <nn> | bd_index <nn> | raw]",
+ .function = show_l2fib,
+};
+/* *INDENT-ON* */
+
+
+/* Remove all entries from the l2fib */
+void
+l2fib_clear_table (uint keep_static)
+{
+ l2fib_main_t *mp = &l2fib_main;
+
+ if (keep_static)
+ {
+ /* TODO: remove only non-static entries */
+ }
+ else
+ {
+ /* Remove all entries */
+ BV (clib_bihash_free) (&mp->mac_table);
+ BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table",
+ L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE);
+ }
+
+ l2learn_main.global_learn_count = 0;
+}
+
+/** Clear all entries in L2FIB.
+ * @TODO: Later we may want a way to remove only the non-static entries
+ */
+static clib_error_t *
+clear_l2fib (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2fib_clear_table (0);
+ return 0;
+}
+
+/*?
+ * This command clears all the MAC Address entries from the L2 FIB table.
+ *
+ * @cliexpar
+ * Example of how to clear the L2 FIB Table:
+ * @cliexcmd{clear l2fib}
+ * Example to show the L2 FIB Table has been cleared:
+ * @cliexstart{show l2fib verbose}
+ * no l2fib entries
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_l2fib_cli, static) = {
+ .path = "clear l2fib",
+ .short_help = "clear l2fib",
+ .function = clear_l2fib,
+};
+/* *INDENT-ON* */
+
+
+/**
+ * Add an entry to the l2fib.
+ * If the entry already exists then overwrite it
+ */
+void
+l2fib_add_entry (u64 mac,
+ u32 bd_index,
+ u32 sw_if_index, u32 static_mac, u32 filter_mac, u32 bvi_mac)
+{
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ __attribute__ ((unused)) u32 bucket_contents;
+ l2fib_main_t *mp = &l2fib_main;
+ BVT (clib_bihash_kv) kv;
+
+ /* set up key */
+ key.raw = l2fib_make_key ((u8 *) & mac, bd_index);
+
+ /* set up result */
+ result.raw = 0; /* clear all fields */
+ result.fields.sw_if_index = sw_if_index;
+ result.fields.static_mac = static_mac;
+ result.fields.filter = filter_mac;
+ result.fields.bvi = bvi_mac;
+
+ kv.key = key.raw;
+ kv.value = result.raw;
+
+ BV (clib_bihash_add_del) (&mp->mac_table, &kv, 1 /* is_add */ );
+
+ /* increment counter if dynamically learned mac */
+ if (result.fields.static_mac)
+ {
+ l2learn_main.global_learn_count++;
+ }
+}
+
+/**
+ * Add an entry to the L2FIB.
+ * The CLI format is:
+ * l2fib add <mac> <bd> <intf> [static] [bvi]
+ * l2fib add <mac> <bd> filter
+ * Note that filter and bvi entries are always static
+ */
+static clib_error_t *
+l2fib_add (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u64 mac;
+ u32 bd_id;
+ u32 bd_index;
+ u32 sw_if_index = ~0;
+ u32 filter_mac = 0;
+ u32 static_mac = 0;
+ u32 bvi_mac = 0;
+ uword *p;
+
+ if (!unformat_user (input, unformat_ethernet_address, &mac))
+ {
+ error = clib_error_return (0, "expected mac address `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expected bridge domain ID `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (!p)
+ {
+ error = clib_error_return (0, "bridge domain ID %d invalid", bd_id);
+ goto done;
+ }
+ bd_index = p[0];
+
+ if (unformat (input, "filter"))
+ {
+ filter_mac = 1;
+ static_mac = 1;
+
+ }
+ else
+ {
+
+ if (!unformat_user
+ (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ if (unformat (input, "static"))
+ {
+ static_mac = 1;
+ }
+ else if (unformat (input, "bvi"))
+ {
+ bvi_mac = 1;
+ static_mac = 1;
+ }
+ }
+
+ l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, filter_mac,
+ bvi_mac);
+
+done:
+ return error;
+}
+
+/*?
+ * This command adds a MAC Address entry to the L2 FIB table
+ * of an existing bridge-domain. The MAC Address can be static
+ * or dynamic. This command also allows a filter to be added,
+ * such that packets with given MAC Addresses (source mac or
+ * destination mac match) are dropped.
+ *
+ * @cliexpar
+ * Example of how to add a dynamic MAC Address entry to the L2 FIB table
+ * of a bridge-domain (where 200 is the bridge-domain-id):
+ * @cliexcmd{l2fib add 52:54:00:53:18:33 200 GigabitEthernet0/8/0.200}
+ * Example of how to add a static MAC Address entry to the L2 FIB table
+ * of a bridge-domain (where 200 is the bridge-domain-id):
+ * @cliexcmd{l2fib add 52:54:00:53:18:55 200 GigabitEthernet0/8/0.200 static}
+ * Example of how to add a filter such that a packet with the given MAC
+ * Address will be dropped in a given bridge-domain (where 200 is the
+ * bridge-domain-id):
+ * @cliexcmd{l2fib add 52:54:00:53:18:77 200 filter}
+ * Example of show command of the provisioned MAC Addresses and filters:
+ * @cliexstart{show l2fib verbose}
+ * Mac Address BD Idx Interface Index static filter bvi refresh timestamp
+ * 52:54:00:53:18:33 1 GigabitEthernet0/8/0.200 3 0 0 0 0 0
+ * 52:54:00:53:18:55 1 GigabitEthernet0/8/0.200 3 1 0 0 0 0
+ * 52:54:00:53:18:77 1 N/A -1 1 1 0 0 0
+ * 3 l2fib entries
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2fib_add_cli, static) = {
+ .path = "l2fib add",
+ .short_help = "l2fib add <mac> <bridge-domain-id> filter | <intf> [static | bvi]",
+ .function = l2fib_add,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+l2fib_test_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ u64 mac, save_mac;
+ u32 bd_index = 0;
+ u32 sw_if_index = 8;
+ u32 filter_mac = 0;
+ u32 bvi_mac = 0;
+ u32 is_add = 0;
+ u32 is_del = 0;
+ u32 is_check = 0;
+ u32 count = 1;
+ int mac_set = 0;
+ int i;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "mac %U", unformat_ethernet_address, &mac))
+ mac_set = 1;
+ else if (unformat (input, "add"))
+ is_add = 1;
+ else if (unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "check"))
+ is_check = 1;
+ else if (unformat (input, "count %d", &count))
+ ;
+ else
+ break;
+ }
+
+ if (mac_set == 0)
+ return clib_error_return (0, "mac not set");
+
+ if (is_add == 0 && is_del == 0 && is_check == 0)
+ return clib_error_return (0,
+ "noop: pick at least one of (add,del,check)");
+
+ save_mac = mac;
+
+ if (is_add)
+ {
+ for (i = 0; i < count; i++)
+ {
+ u64 tmp;
+ l2fib_add_entry (mac, bd_index, sw_if_index, mac,
+ filter_mac, bvi_mac);
+ tmp = clib_net_to_host_u64 (mac);
+ tmp >>= 16;
+ tmp++;
+ tmp <<= 16;
+ mac = clib_host_to_net_u64 (tmp);
+ }
+ }
+
+ if (is_check)
+ {
+ BVT (clib_bihash_kv) kv;
+ l2fib_main_t *mp = &l2fib_main;
+
+ mac = save_mac;
+
+ for (i = 0; i < count; i++)
+ {
+ u64 tmp;
+ kv.key = l2fib_make_key ((u8 *) & mac, bd_index);
+ if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv))
+ {
+ clib_warning ("key %U AWOL", format_ethernet_address, &mac);
+ break;
+ }
+ tmp = clib_net_to_host_u64 (mac);
+ tmp >>= 16;
+ tmp++;
+ tmp <<= 16;
+ mac = clib_host_to_net_u64 (tmp);
+ }
+ }
+
+ if (is_del)
+ {
+ for (i = 0; i < count; i++)
+ {
+ u64 tmp;
+
+ l2fib_del_entry (mac, bd_index);
+
+ tmp = clib_net_to_host_u64 (mac);
+ tmp >>= 16;
+ tmp++;
+ tmp <<= 16;
+ mac = clib_host_to_net_u64 (tmp);
+ }
+ }
+
+ return error;
+}
+
+/*?
+ * The set of '<em>test l2fib</em>' commands allow the L2 FIB table of the default
+ * bridge domain (bridge-domain-id of 0) to be modified.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to add a set of 4 sequential MAC Address entries to L2
+ * FIB table of the default bridge-domain:
+ * @cliexcmd{test l2fib add mac 52:54:00:53:00:00 count 4}
+ *
+ * Show the set of 4 sequential MAC Address entries that were added:
+ * @cliexstart{show l2fib verbose}
+ * Mac Address BD Idx Interface Index static filter bvi refresh timestamp
+ * 52:54:00:53:00:00 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0
+ * 52:54:00:53:00:01 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0
+ * 52:54:00:53:00:03 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0
+ * 52:54:00:53:00:02 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0
+ * 4 l2fib entries
+ * @cliexend
+ *
+ * Example of how to check that the set of 4 sequential MAC Address
+ * entries were added to L2 FIB table of the default
+ * bridge-domain. Used a count of 5 to produce an error:
+ *
+ * @cliexcmd{test l2fib check mac 52:54:00:53:00:00 count 5}
+ * The output of the check command is in the log files. Log file
+ * location may vary based on your OS and Version:
+ *
+ * <b><em># tail -f /var/log/messages | grep l2fib_test_command_fn</em></b>
+ *
+ * Sep 7 17:15:24 localhost vnet[4952]: l2fib_test_command_fn:446: key 52:54:00:53:00:04 AWOL
+ *
+ * Example of how to delete a set of 4 sequential MAC Address entries
+ * from L2 FIB table of the default bridge-domain:
+ * @cliexcmd{test l2fib del mac 52:54:00:53:00:00 count 4}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2fib_test_command, static) = {
+ .path = "test l2fib",
+ .short_help = "test l2fib [add|del|check] mac <base-addr> count <nn>",
+ .function = l2fib_test_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/**
+ * Delete an entry from the l2fib.
+ * Return 0 if the entry was deleted, or 1 if it was not found
+ */
+u32
+l2fib_del_entry (u64 mac, u32 bd_index)
+{
+
+ l2fib_entry_result_t result;
+ l2fib_main_t *mp = &l2fib_main;
+ BVT (clib_bihash_kv) kv;
+
+ /* set up key */
+ kv.key = l2fib_make_key ((u8 *) & mac, bd_index);
+
+ if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv))
+ return 1;
+
+ result.raw = kv.value;
+
+ /* decrement counter if dynamically learned mac */
+ if (result.fields.static_mac)
+ {
+ if (l2learn_main.global_learn_count > 0)
+ {
+ l2learn_main.global_learn_count--;
+ }
+ }
+
+ /* Remove entry from hash table */
+ BV (clib_bihash_add_del) (&mp->mac_table, &kv, 0 /* is_add */ );
+ return 0;
+}
+
+/**
+ * Delete an entry from the L2FIB.
+ * The CLI format is:
+ * l2fib del <mac> <bd-id>
+ */
+static clib_error_t *
+l2fib_del (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ bd_main_t *bdm = &bd_main;
+ clib_error_t *error = 0;
+ u64 mac;
+ u32 bd_id;
+ u32 bd_index;
+ uword *p;
+
+ if (!unformat_user (input, unformat_ethernet_address, &mac))
+ {
+ error = clib_error_return (0, "expected mac address `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expected bridge domain ID `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (!p)
+ {
+ error = clib_error_return (0, "bridge domain ID %d invalid", bd_id);
+ goto done;
+ }
+ bd_index = p[0];
+
+ /* Delete the entry */
+ if (l2fib_del_entry (mac, bd_index))
+ {
+ error = clib_error_return (0, "mac entry not found");
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * This command deletes an existing MAC Address entry from the L2 FIB
+ * table of an existing bridge-domain.
+ *
+ * @cliexpar
+ * Example of how to delete a MAC Address entry from the L2 FIB table of a bridge-domain (where 200 is the bridge-domain-id):
+ * @cliexcmd{l2fib del 52:54:00:53:18:33 200}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2fib_del_cli, static) = {
+ .path = "l2fib del",
+ .short_help = "l2fib del <mac> <bridge-domain-id>",
+ .function = l2fib_del,
+};
+/* *INDENT-ON* */
+
+
+BVT (clib_bihash) * get_mac_table (void)
+{
+ l2fib_main_t *mp = &l2fib_main;
+ return &mp->mac_table;
+}
+
+static uword
+l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ uword event_type, *event_data = 0;
+ l2fib_main_t *msm = &l2fib_main;
+ l2_bridge_domain_t *bd_config;
+ BVT (clib_bihash) * h = &msm->mac_table;
+ clib_bihash_bucket_t *b;
+ BVT (clib_bihash_value) * v;
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ int i, j, k;
+ bool enabled = 0;
+ f64 start_time, last_run_duration = 0, t;
+ i16 delta;
+
+ while (1)
+ {
+ if (enabled)
+ vlib_process_wait_for_event_or_clock (vm, 60 - last_run_duration);
+ else
+ vlib_process_wait_for_event (vm);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ switch (event_type)
+ {
+ case ~0:
+ break;
+ case L2_MAC_AGE_PROCESS_EVENT_START:
+ enabled = 1;
+ break;
+ case L2_MAC_AGE_PROCESS_EVENT_STOP:
+ enabled = 0;
+ continue;
+ default:
+ ASSERT (0);
+ }
+ last_run_duration = start_time = vlib_time_now (vm);
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ /* Allow no more than 10us without a pause */
+ t = vlib_time_now (vm);
+ if (t > start_time + 10e-6)
+ {
+ vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */
+ start_time = vlib_time_now (vm);
+ }
+
+ if (i < (h->nbuckets - 3))
+ {
+ b = &h->buckets[i + 3];
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
+ b = &h->buckets[i + 1];
+ if (b->offset)
+ {
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+ }
+
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+ v = BV (clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1 << b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ continue;
+
+ key.raw = v->kvp[k].key;
+ result.raw = v->kvp[k].value;
+
+ if (result.fields.static_mac)
+ continue;
+
+ bd_config = vec_elt_at_index (l2input_main.bd_configs,
+ key.fields.bd_index);
+
+ if (bd_config->mac_age == 0)
+ continue;
+
+ delta = (u8) (start_time / 60) - result.fields.timestamp;
+ delta += delta < 0 ? 256 : 0;
+
+ if (delta > bd_config->mac_age)
+ {
+ void *p = &key.fields.mac;
+ l2fib_del_entry (*(u64 *) p, key.fields.bd_index);
+ }
+ }
+ v++;
+ }
+ }
+ last_run_duration = vlib_time_now (vm) - last_run_duration;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2fib_mac_age_scanner_process_node) = {
+ .function = l2fib_mac_age_scanner_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "l2fib-mac-age-scanner-process",
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+l2fib_init (vlib_main_t * vm)
+{
+ l2fib_main_t *mp = &l2fib_main;
+ l2fib_entry_key_t test_key;
+ u8 test_mac[6];
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Create the hash table */
+ BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table",
+ L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE);
+
+ /* verify the key constructor is good, since it is endian-sensitive */
+ memset (test_mac, 0, sizeof (test_mac));
+ test_mac[0] = 0x11;
+ test_key.raw = 0;
+ test_key.raw = l2fib_make_key ((u8 *) & test_mac, 0x1234);
+ ASSERT (test_key.fields.mac[0] == 0x11);
+ ASSERT (test_key.fields.bd_index == 0x1234);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2fib_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h
new file mode 100644
index 00000000000..4a2da59bc01
--- /dev/null
+++ b/src/vnet/l2/l2_fib.h
@@ -0,0 +1,341 @@
+/*
+ * l2_fib.h : layer 2 forwarding table (aka mac table)
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2fib_h
+#define included_l2fib_h
+
+#include <vlib/vlib.h>
+#include <vppinfra/bihash_8_8.h>
+
+/*
+ * The size of the hash table
+ */
+#define L2FIB_NUM_BUCKETS (64 * 1024)
+#define L2FIB_MEMORY_SIZE (256<<20)
+
+/*
+ * The L2fib key is the mac address and bridge domain ID
+ */
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u16 bd_index;
+ u8 mac[6];
+ } fields;
+ struct
+ {
+ u32 w0;
+ u32 w1;
+ } words;
+ u64 raw;
+ };
+} l2fib_entry_key_t;
+
+STATIC_ASSERT_SIZEOF (l2fib_entry_key_t, 8);
+
+/*
+ * The l2fib entry results
+ */
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u32 sw_if_index; /* output sw_if_index (L3 interface if bvi==1) */
+
+ u8 static_mac:1; /* static mac, no dataplane learning */
+ u8 bvi:1; /* mac is for a bridged virtual interface */
+ u8 filter:1; /* drop packets to/from this mac */
+ u8 unused1:5;
+ u8 timestamp; /* timestamp for aging */
+ u16 unused2;
+ } fields;
+ u64 raw;
+ };
+} l2fib_entry_result_t;
+
+STATIC_ASSERT_SIZEOF (l2fib_entry_result_t, 8);
+
+/**
+ * Compute the hash for the given key and return
+ * the corresponding bucket index
+ */
+always_inline u32
+l2fib_compute_hash_bucket (l2fib_entry_key_t * key)
+{
+ u32 result;
+ u32 temp_a;
+ u32 temp_b;
+
+ result = 0xa5a5a5a5; /* some seed */
+ temp_a = key->words.w0;
+ temp_b = key->words.w1;
+ hash_mix32 (temp_a, temp_b, result);
+
+ return result % L2FIB_NUM_BUCKETS;
+}
+
+always_inline u64
+l2fib_make_key (u8 * mac_address, u16 bd_index)
+{
+ u64 temp;
+
+ /*
+ * The mac address in memory is A:B:C:D:E:F
+ * The bd id in register is H:L
+ */
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ /*
+ * Create the in-register key as F:E:D:C:B:A:H:L
+ * In memory the key is L:H:A:B:C:D:E:F
+ */
+ temp = *((u64 *) (mac_address)) << 16;
+ temp = (temp & ~0xffff) | (u64) (bd_index);
+#else
+ /*
+ * Create the in-register key as H:L:A:B:C:D:E:F
+ * In memory the key is H:L:A:B:C:D:E:F
+ */
+ temp = *((u64 *) (mac_address)) >> 16;
+ temp = temp | (((u64) bd_index) << 48);
+#endif
+
+ return temp;
+}
+
+
+
+/**
+ * Lookup the entry for mac and bd_index in the mac table for 1 packet.
+ * Cached_key and cached_result are used as a one-entry cache.
+ * The function reads and updates them as needed.
+ *
+ * mac0 and bd_index0 are the keys. The entry is written to result0.
+ * If the entry was not found, result0 is set to ~0.
+ *
+ * key0 and bucket0 return with the computed key and hash bucket,
+ * convenient if the entry needs to be updated afterward.
+ * If the cached_result was used, bucket0 is set to ~0.
+ */
+
+static_always_inline void
+l2fib_lookup_1 (BVT (clib_bihash) * mac_table,
+ l2fib_entry_key_t * cached_key,
+ l2fib_entry_result_t * cached_result,
+ u8 * mac0,
+ u16 bd_index0,
+ l2fib_entry_key_t * key0,
+ u32 * bucket0, l2fib_entry_result_t * result0)
+{
+ /* set up key */
+ key0->raw = l2fib_make_key (mac0, bd_index0);
+ *bucket0 = ~0;
+
+ if (key0->raw == cached_key->raw)
+ {
+ /* Hit in the one-entry cache */
+ result0->raw = cached_result->raw;
+ }
+ else
+ {
+ /* Do a regular mac table lookup */
+ BVT (clib_bihash_kv) kv;
+
+ kv.key = key0->raw;
+ kv.value = ~0ULL;
+ BV (clib_bihash_search_inline) (mac_table, &kv);
+ result0->raw = kv.value;
+
+ /* Update one-entry cache */
+ cached_key->raw = key0->raw;
+ cached_result->raw = result0->raw;
+ }
+}
+
+
+/**
+ * Lookup the entry for mac and bd_index in the mac table for 2 packets.
+ * The lookups for the two packets are interleaved.
+ *
+ * Cached_key and cached_result are used as a one-entry cache.
+ * The function reads and updates them as needed.
+ *
+ * mac0 and bd_index0 are the keys. The entry is written to result0.
+ * If the entry was not found, result0 is set to ~0. The same
+ * holds for mac1/bd_index1/result1.
+ */
+static_always_inline void
+l2fib_lookup_2 (BVT (clib_bihash) * mac_table,
+ l2fib_entry_key_t * cached_key,
+ l2fib_entry_result_t * cached_result,
+ u8 * mac0,
+ u8 * mac1,
+ u16 bd_index0,
+ u16 bd_index1,
+ l2fib_entry_key_t * key0,
+ l2fib_entry_key_t * key1,
+ u32 * bucket0,
+ u32 * bucket1,
+ l2fib_entry_result_t * result0,
+ l2fib_entry_result_t * result1)
+{
+ /* set up key */
+ key0->raw = l2fib_make_key (mac0, bd_index0);
+ key1->raw = l2fib_make_key (mac1, bd_index1);
+
+ if ((key0->raw == cached_key->raw) && (key1->raw == cached_key->raw))
+ {
+ /* Both hit in the one-entry cache */
+ result0->raw = cached_result->raw;
+ result1->raw = cached_result->raw;
+ *bucket0 = ~0;
+ *bucket1 = ~0;
+
+ }
+ else
+ {
+ BVT (clib_bihash_kv) kv0, kv1;
+
+ /*
+ * Do a regular mac table lookup
+ * Interleave lookups for packet 0 and packet 1
+ */
+ kv0.key = key0->raw;
+ kv1.key = key1->raw;
+ kv0.value = ~0ULL;
+ kv1.value = ~0ULL;
+
+ BV (clib_bihash_search_inline) (mac_table, &kv0);
+ BV (clib_bihash_search_inline) (mac_table, &kv1);
+
+ result0->raw = kv0.value;
+ result1->raw = kv1.value;
+
+ /* Update one-entry cache */
+ cached_key->raw = key1->raw;
+ cached_result->raw = result1->raw;
+ }
+}
+
+static_always_inline void
+l2fib_lookup_4 (BVT (clib_bihash) * mac_table,
+ l2fib_entry_key_t * cached_key,
+ l2fib_entry_result_t * cached_result,
+ u8 * mac0,
+ u8 * mac1,
+ u8 * mac2,
+ u8 * mac3,
+ u16 bd_index0,
+ u16 bd_index1,
+ u16 bd_index2,
+ u16 bd_index3,
+ l2fib_entry_key_t * key0,
+ l2fib_entry_key_t * key1,
+ l2fib_entry_key_t * key2,
+ l2fib_entry_key_t * key3,
+ u32 * bucket0,
+ u32 * bucket1,
+ u32 * bucket2,
+ u32 * bucket3,
+ l2fib_entry_result_t * result0,
+ l2fib_entry_result_t * result1,
+ l2fib_entry_result_t * result2,
+ l2fib_entry_result_t * result3)
+{
+ /* set up key */
+ key0->raw = l2fib_make_key (mac0, bd_index0);
+ key1->raw = l2fib_make_key (mac1, bd_index1);
+ key2->raw = l2fib_make_key (mac2, bd_index2);
+ key3->raw = l2fib_make_key (mac3, bd_index3);
+
+ if ((key0->raw == cached_key->raw) && (key1->raw == cached_key->raw) &&
+ (key2->raw == cached_key->raw) && (key3->raw == cached_key->raw))
+ {
+ /* Both hit in the one-entry cache */
+ result0->raw = cached_result->raw;
+ result1->raw = cached_result->raw;
+ result2->raw = cached_result->raw;
+ result3->raw = cached_result->raw;
+ *bucket0 = ~0;
+ *bucket1 = ~0;
+ *bucket2 = ~0;
+ *bucket3 = ~0;
+
+ }
+ else
+ {
+ BVT (clib_bihash_kv) kv0, kv1, kv2, kv3;
+
+ /*
+ * Do a regular mac table lookup
+ * Interleave lookups for packet 0 and packet 1
+ */
+ kv0.key = key0->raw;
+ kv1.key = key1->raw;
+ kv2.key = key2->raw;
+ kv3.key = key3->raw;
+ kv0.value = ~0ULL;
+ kv1.value = ~0ULL;
+ kv2.value = ~0ULL;
+ kv3.value = ~0ULL;
+
+ BV (clib_bihash_search_inline) (mac_table, &kv0);
+ BV (clib_bihash_search_inline) (mac_table, &kv1);
+ BV (clib_bihash_search_inline) (mac_table, &kv2);
+ BV (clib_bihash_search_inline) (mac_table, &kv3);
+
+ result0->raw = kv0.value;
+ result1->raw = kv1.value;
+ result2->raw = kv2.value;
+ result3->raw = kv3.value;
+
+ /* Update one-entry cache */
+ cached_key->raw = key1->raw;
+ cached_result->raw = result1->raw;
+ }
+}
+
+BVT (clib_bihash) * get_mac_table (void);
+ void
+ l2fib_clear_table (uint keep_static);
+ void
+ l2fib_add_entry (u64 mac,
+ u32 bd_index,
+ u32 sw_if_index,
+ u32 static_mac, u32 drop_mac, u32 bvi_mac);
+u32
+l2fib_del_entry (u64 mac, u32 bd_index);
+
+ void
+ l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key,
+ l2fib_entry_result_t ** l2fe_res);
+
+ u8 *format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c
new file mode 100644
index 00000000000..ed9e5ac2258
--- /dev/null
+++ b/src/vnet/l2/l2_flood.c
@@ -0,0 +1,568 @@
+/*
+ * l2_flood.c : layer 2 flooding
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_bvi.h>
+#include <vnet/replication.h>
+#include <vnet/l2/l2_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+
+
+/**
+ * @file
+ * @brief Ethernet Flooding.
+ *
+ * Flooding uses the packet replication infrastructure to send a copy of the
+ * packet to each member interface. Logically the replication infrastructure
+ * expects two graph nodes: a prep node that initiates replication and sends the
+ * packet to the first destination, and a recycle node that is passed the packet
+ * after it has been transmitted.
+ *
+ * To decrease the amount of code, l2 flooding implements both functions in
+ * the same graph node. This node can tell if is it being called as the "prep"
+ * or "recycle" using replication_is_recycled().
+ */
+
+
+typedef struct
+{
+
+ /* Next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* next node index for the L3 input node of each ethertype */
+ next_by_ethertype_t l3_next;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2flood_main_t;
+
+typedef struct
+{
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u16 bd_index;
+} l2flood_trace_t;
+
+
+/* packet trace format function */
+static u8 *
+format_l2flood_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2flood_trace_t *t = va_arg (*args, l2flood_trace_t *);
+
+ s = format (s, "l2-flood: sw_if_index %d dst %U src %U bd_index %d",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src, t->bd_index);
+ return s;
+}
+
+l2flood_main_t l2flood_main;
+
+static vlib_node_registration_t l2flood_node;
+
+#define foreach_l2flood_error \
+_(L2FLOOD, "L2 flood packets") \
+_(REPL_FAIL, "L2 replication failures") \
+_(NO_MEMBERS, "L2 replication complete") \
+_(BVI_BAD_MAC, "BVI L3 mac mismatch") \
+_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype")
+
+typedef enum
+{
+#define _(sym,str) L2FLOOD_ERROR_##sym,
+ foreach_l2flood_error
+#undef _
+ L2FLOOD_N_ERROR,
+} l2flood_error_t;
+
+static char *l2flood_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2flood_error
+#undef _
+};
+
+typedef enum
+{
+ L2FLOOD_NEXT_L2_OUTPUT,
+ L2FLOOD_NEXT_DROP,
+ L2FLOOD_N_NEXT,
+} l2flood_next_t;
+
+/*
+ * Perform flooding on one packet
+ *
+ * Due to the way BVI processing can modify the packet, the BVI interface
+ * (if present) must be processed last in the replication. The member vector
+ * is arranged so that the BVI interface is always the first element.
+ * Flooding walks the vector in reverse.
+ *
+ * BVI processing causes the packet to go to L3 processing. This strips the
+ * L2 header, which is fine because the replication infrastructure restores
+ * it. However L3 processing can trigger larger changes to the packet. For
+ * example, an ARP request could be turned into an ARP reply, an ICMP request
+ * could be turned into an ICMP reply. If BVI processing is not performed
+ * last, the modified packet would be replicated to the remaining members.
+ */
+
+static_always_inline void
+l2flood_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ l2flood_main_t * msm,
+ u64 * counter_base,
+ vlib_buffer_t * b0,
+ u32 * sw_if_index0,
+ l2fib_entry_key_t * key0,
+ u32 * bucket0, l2fib_entry_result_t * result0, u32 * next0)
+{
+ u16 bd_index0;
+ l2_bridge_domain_t *bd_config;
+ l2_flood_member_t *members;
+ i32 current_member; /* signed */
+ replication_context_t *ctx;
+ u8 in_shg = vnet_buffer (b0)->l2.shg;
+
+ if (!replication_is_recycled (b0))
+ {
+
+ /* Do flood "prep node" processing */
+
+ /* Get config for the bridge domain interface */
+ bd_index0 = vnet_buffer (b0)->l2.bd_index;
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index0);
+ members = bd_config->members;
+
+ /* Find first member that passes the reflection and SHG checks */
+ current_member = bd_config->flood_count - 1;
+ while ((current_member >= 0) &&
+ ((members[current_member].sw_if_index == *sw_if_index0) ||
+ (in_shg && members[current_member].shg == in_shg)))
+ {
+ current_member--;
+ }
+
+ if (current_member < 0)
+ {
+ /* No members to flood to */
+ *next0 = L2FLOOD_NEXT_DROP;
+ b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS];
+ return;
+ }
+
+ if ((current_member > 0) &&
+ ((current_member > 1) ||
+ ((members[0].sw_if_index != *sw_if_index0) &&
+ (!in_shg || members[0].shg != in_shg))))
+ {
+ /* If more than one member then initiate replication */
+ ctx =
+ replication_prep (vm, b0, l2flood_node.index, 1 /* l2_packet */ );
+ ctx->feature_replicas = (uword) members;
+ ctx->feature_counter = current_member;
+ }
+
+ }
+ else
+ {
+ vnet_buffer_opaque_t *vnet_buff_op;
+
+ /* Do flood "recycle node" processing */
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL))
+ {
+ (void) replication_recycle (vm, b0, 1 /* is_last */ );
+ *next0 = L2FLOOD_NEXT_DROP;
+ b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL];
+ return;
+ }
+
+ ctx = replication_get_ctx (b0);
+ replication_clear_recycled (b0);
+
+ members = (l2_flood_member_t *) (intptr_t) ctx->feature_replicas;
+ current_member = (i32) ctx->feature_counter - 1;
+
+ /* Need to update input index from saved packet context */
+ vnet_buff_op = (vnet_buffer_opaque_t *) ctx->vnet_buffer;
+ *sw_if_index0 = vnet_buff_op->sw_if_index[VLIB_RX];
+
+ /* Find next member that passes the reflection and SHG check */
+ while ((current_member >= 0) &&
+ ((members[current_member].sw_if_index == *sw_if_index0) ||
+ (in_shg && members[current_member].shg == in_shg)))
+ {
+ current_member--;
+ }
+
+ if (current_member < 0)
+ {
+ /*
+ * No more members to flood to.
+ * Terminate replication and drop packet.
+ */
+
+ replication_recycle (vm, b0, 1 /* is_last */ );
+
+ *next0 = L2FLOOD_NEXT_DROP;
+ /* Ideally we woudn't bump a counter here, just silently complete */
+ b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS];
+ return;
+ }
+
+ /* Restore packet and context and continue replication */
+ ctx->feature_counter = current_member;
+ replication_recycle (vm, b0, ((current_member == 0) || /*is_last */
+ ((current_member == 1) &&
+ ((members[0].sw_if_index ==
+ *sw_if_index0) || (in_shg
+ && members[0].shg ==
+ in_shg)))));
+ }
+
+ /* Forward packet to the current member */
+ if (PREDICT_FALSE (members[current_member].flags & L2_FLOOD_MEMBER_BVI))
+ {
+ /* Do BVI processing */
+ u32 rc;
+ rc = l2_to_bvi (vm,
+ msm->vnet_main,
+ b0,
+ members[current_member].sw_if_index,
+ &msm->l3_next, next0);
+
+ if (PREDICT_FALSE (rc))
+ {
+ if (rc == TO_BVI_ERR_BAD_MAC)
+ {
+ b0->error = node->errors[L2FLOOD_ERROR_BVI_BAD_MAC];
+ *next0 = L2FLOOD_NEXT_DROP;
+ }
+ else if (rc == TO_BVI_ERR_ETHERTYPE)
+ {
+ b0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE];
+ *next0 = L2FLOOD_NEXT_DROP;
+ }
+ }
+ }
+ else
+ {
+ /* Do normal L2 forwarding */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ members[current_member].sw_if_index;
+ *next0 = L2FLOOD_NEXT_L2_OUTPUT;
+
+ }
+
+}
+
+
+static uword
+l2flood_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2flood_next_t next_index;
+ l2flood_main_t *msm = &l2flood_main;
+ vlib_node_t *n = vlib_get_node (vm, l2flood_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ l2fib_entry_key_t key0, key1;
+ l2fib_entry_result_t result0, result1;
+ u32 bucket0, bucket1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3, *p4, *p5;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ /* Prefetch the buffer header for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ /* Prefetch the replication context for the N+1 loop iteration */
+ /* This depends on the buffer header above */
+ replication_prefetch_ctx (p2);
+ replication_prefetch_ctx (p3);
+
+ /* Prefetch the packet for the N+1 loop iteration */
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ /* process 2 pkts */
+ em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 2;
+
+ l2flood_process (vm, node, msm,
+ &em->counters[node_counter_base_index], b0,
+ &sw_if_index0, &key0, &bucket0, &result0, &next0);
+
+ l2flood_process (vm, node, msm,
+ &em->counters[node_counter_base_index], b1,
+ &sw_if_index1, &key1, &bucket1, &result1, &next1);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2flood_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2flood_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ ethernet_header_t *h1 = vlib_buffer_get_current (b1);
+ t->sw_if_index = sw_if_index1;
+ t->bd_index = vnet_buffer (b1)->l2.bd_index;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ l2fib_entry_key_t key0;
+ l2fib_entry_result_t result0;
+ u32 bucket0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ /* process 1 pkt */
+ em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 1;
+
+ l2flood_process (vm, node, msm,
+ &em->counters[node_counter_base_index], b0,
+ &sw_if_index0, &key0, &bucket0, &result0, &next0);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2flood_node,static) = {
+ .function = l2flood_node_fn,
+ .name = "l2-flood",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2flood_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2flood_error_strings),
+ .error_strings = l2flood_error_strings,
+
+ .n_next_nodes = L2FLOOD_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2FLOOD_NEXT_L2_OUTPUT] = "l2-output",
+ [L2FLOOD_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2flood_node, l2flood_node_fn)
+ clib_error_t *l2flood_init (vlib_main_t * vm)
+{
+ l2flood_main_t *mp = &l2flood_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2flood_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2flood_init);
+
+
+
+/** Add the L3 input node for this ethertype to the next nodes structure. */
+void
+l2flood_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index)
+{
+ l2flood_main_t *mp = &l2flood_main;
+ u32 next_index;
+
+ next_index = vlib_node_add_next (vm, l2flood_node.index, node_index);
+
+ next_by_ethertype_register (&mp->l3_next, type, next_index);
+}
+
+
+/**
+ * Set subinterface flood enable/disable.
+ * The CLI format is:
+ * set interface l2 flood <interface> [disable]
+ */
+static clib_error_t *
+int_flood (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the interface flag */
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_FLOOD, enable);
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 flooding can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage interfaces. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable flooding:
+ * @cliexcmd{set interface l2 flood GigabitEthernet0/8/0}
+ * Example of how to disable flooding:
+ * @cliexcmd{set interface l2 flood GigabitEthernet0/8/0 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_flood_cli, static) = {
+ .path = "set interface l2 flood",
+ .short_help = "set interface l2 flood <interface> [disable]",
+ .function = int_flood,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_flood.h b/src/vnet/l2/l2_flood.h
new file mode 100644
index 00000000000..acd7c905aaf
--- /dev/null
+++ b/src/vnet/l2/l2_flood.h
@@ -0,0 +1,35 @@
+/*
+ * l2_flood.h : layer 2 flooding
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2flood_h
+#define included_l2flood_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+void
+l2flood_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index);
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c
new file mode 100644
index 00000000000..710a9d9e8c3
--- /dev/null
+++ b/src/vnet/l2/l2_fwd.c
@@ -0,0 +1,544 @@
+/*
+ * l2_fwd.c : layer 2 forwarding using l2fib
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_bvi.h>
+#include <vnet/l2/l2_fwd.h>
+#include <vnet/l2/l2_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/sparse_vec.h>
+
+
+/**
+ * @file
+ * @brief Ethernet Forwarding.
+ *
+ * Code in this file handles forwarding Layer 2 packets. This file calls
+ * the FIB lookup, packet learning and the packet flooding as necessary.
+ * Packet is then sent to the next graph node.
+ */
+
+typedef struct
+{
+
+ /* Hash table */
+ BVT (clib_bihash) * mac_table;
+
+ /* next node index for the L3 input node of each ethertype */
+ next_by_ethertype_t l3_next;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2fwd_main_t;
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u16 bd_index;
+} l2fwd_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2fwd_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2fwd_trace_t *t = va_arg (*args, l2fwd_trace_t *);
+
+ s = format (s, "l2-fwd: sw_if_index %d dst %U src %U bd_index %d",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src, t->bd_index);
+ return s;
+}
+
+l2fwd_main_t l2fwd_main;
+
+static vlib_node_registration_t l2fwd_node;
+
+#define foreach_l2fwd_error \
+_(L2FWD, "L2 forward packets") \
+_(FLOOD, "L2 forward misses") \
+_(HIT, "L2 forward hits") \
+_(BVI_BAD_MAC, "BVI L3 MAC mismatch") \
+_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") \
+_(FILTER_DROP, "Filter Mac Drop") \
+_(REFLECT_DROP, "Reflection Drop")
+
+typedef enum
+{
+#define _(sym,str) L2FWD_ERROR_##sym,
+ foreach_l2fwd_error
+#undef _
+ L2FWD_N_ERROR,
+} l2fwd_error_t;
+
+static char *l2fwd_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2fwd_error
+#undef _
+};
+
+typedef enum
+{
+ L2FWD_NEXT_L2_OUTPUT,
+ L2FWD_NEXT_FLOOD,
+ L2FWD_NEXT_DROP,
+ L2FWD_N_NEXT,
+} l2fwd_next_t;
+
+/** Forward one packet based on the mac table lookup result. */
+
+static_always_inline void
+l2fwd_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ l2fwd_main_t * msm,
+ vlib_error_main_t * em,
+ vlib_buffer_t * b0,
+ u32 sw_if_index0, l2fib_entry_result_t * result0, u32 * next0)
+{
+ if (PREDICT_FALSE (result0->raw == ~0))
+ {
+ /*
+ * lookup miss, so flood
+ * TODO:replicate packet to each intf in bridge-domain
+ * For now just drop
+ */
+ if (vnet_buffer (b0)->l2.feature_bitmap & L2INPUT_FEAT_UU_FLOOD)
+ {
+ *next0 = L2FWD_NEXT_FLOOD;
+ }
+ else
+ {
+ /* Flooding is disabled */
+ b0->error = node->errors[L2FWD_ERROR_FLOOD];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+
+ }
+ else
+ {
+
+ /* lookup hit, forward packet */
+#ifdef COUNTERS
+ em->counters[node_counter_base_index + L2FWD_ERROR_HIT] += 1;
+#endif
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = result0->fields.sw_if_index;
+ *next0 = L2FWD_NEXT_L2_OUTPUT;
+
+ /* perform reflection check */
+ if (PREDICT_FALSE (sw_if_index0 == result0->fields.sw_if_index))
+ {
+ b0->error = node->errors[L2FWD_ERROR_REFLECT_DROP];
+ *next0 = L2FWD_NEXT_DROP;
+
+ /* perform filter check */
+ }
+ else if (PREDICT_FALSE (result0->fields.filter))
+ {
+ b0->error = node->errors[L2FWD_ERROR_FILTER_DROP];
+ *next0 = L2FWD_NEXT_DROP;
+
+ /* perform BVI check */
+ }
+ else if (PREDICT_FALSE (result0->fields.bvi))
+ {
+ u32 rc;
+ rc = l2_to_bvi (vm,
+ msm->vnet_main,
+ b0,
+ vnet_buffer (b0)->sw_if_index[VLIB_TX],
+ &msm->l3_next, next0);
+
+ if (PREDICT_FALSE (rc))
+ {
+ if (rc == TO_BVI_ERR_BAD_MAC)
+ {
+ b0->error = node->errors[L2FWD_ERROR_BVI_BAD_MAC];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+ else if (rc == TO_BVI_ERR_ETHERTYPE)
+ {
+ b0->error = node->errors[L2FWD_ERROR_BVI_ETHERTYPE];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+ }
+ }
+ }
+}
+
+
+static uword
+l2fwd_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2fwd_next_t next_index;
+ l2fwd_main_t *msm = &l2fwd_main;
+ vlib_node_t *n = vlib_get_node (vm, l2fwd_node.index);
+ CLIB_UNUSED (u32 node_counter_base_index) = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+ l2fib_entry_key_t cached_key;
+ l2fib_entry_result_t cached_result;
+
+ /* Clear the one-entry cache in case mac table was updated */
+ cached_key.raw = ~0;
+ cached_result.raw = ~0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+ ethernet_header_t *h0, *h1, *h2, *h3;
+ l2fib_entry_key_t key0, key1, key2, key3;
+ l2fib_entry_result_t result0, result1, result2, result3;
+ u32 bucket0, bucket1, bucket2, bucket3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->bd_index = vnet_buffer (b1)->l2.bd_index;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->sw_if_index = sw_if_index2;
+ t->bd_index = vnet_buffer (b2)->l2.bd_index;
+ clib_memcpy (t->src, h2->src_address, 6);
+ clib_memcpy (t->dst, h2->dst_address, 6);
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->sw_if_index = sw_if_index3;
+ t->bd_index = vnet_buffer (b3)->l2.bd_index;
+ clib_memcpy (t->src, h3->src_address, 6);
+ clib_memcpy (t->dst, h3->dst_address, 6);
+ }
+ }
+
+ /* process 2 pkts */
+#ifdef COUNTERS
+ em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 4;
+#endif
+ /* *INDENT-OFF* */
+ l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result,
+ h0->dst_address, h1->dst_address,
+ h2->dst_address, h3->dst_address,
+ vnet_buffer (b0)->l2.bd_index,
+ vnet_buffer (b1)->l2.bd_index,
+ vnet_buffer (b2)->l2.bd_index,
+ vnet_buffer (b3)->l2.bd_index,
+ &key0, /* not used */
+ &key1, /* not used */
+ &key2, /* not used */
+ &key3, /* not used */
+ &bucket0, /* not used */
+ &bucket1, /* not used */
+ &bucket2, /* not used */
+ &bucket3, /* not used */
+ &result0,
+ &result1,
+ &result2,
+ &result3);
+ /* *INDENT-ON* */
+ l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0,
+ &next0);
+ l2fwd_process (vm, node, msm, em, b1, sw_if_index1, &result1,
+ &next1);
+ l2fwd_process (vm, node, msm, em, b2, sw_if_index2, &result2,
+ &next2);
+ l2fwd_process (vm, node, msm, em, b3, sw_if_index3, &result3,
+ &next3);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t *h0;
+ l2fib_entry_key_t key0;
+ l2fib_entry_result_t result0;
+ u32 bucket0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2fwd_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ /* process 1 pkt */
+#ifdef COUNTERS
+ em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 1;
+#endif
+ l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result, h0->dst_address, vnet_buffer (b0)->l2.bd_index, &key0, /* not used */
+ &bucket0, /* not used */
+ &result0);
+ l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0,
+ &next0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2fwd_node,static) = {
+ .function = l2fwd_node_fn,
+ .name = "l2-fwd",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2fwd_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2fwd_error_strings),
+ .error_strings = l2fwd_error_strings,
+
+ .n_next_nodes = L2FWD_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2FWD_NEXT_L2_OUTPUT] = "l2-output",
+ [L2FWD_NEXT_FLOOD] = "l2-flood",
+ [L2FWD_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2fwd_node, l2fwd_node_fn)
+ clib_error_t *l2fwd_init (vlib_main_t * vm)
+{
+ l2fwd_main_t *mp = &l2fwd_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* init the hash table ptr */
+ mp->mac_table = get_mac_table ();
+
+ /* Initialize the next nodes for each ethertype */
+ next_by_ethertype_init (&mp->l3_next);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2fwd_init);
+
+
+/** Add the L3 input node for this ethertype to the next nodes structure. */
+void
+l2fwd_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index)
+{
+ l2fwd_main_t *mp = &l2fwd_main;
+ u32 next_index;
+
+ next_index = vlib_node_add_next (vm, l2fwd_node.index, node_index);
+
+ next_by_ethertype_register (&mp->l3_next, type, next_index);
+}
+
+
+/**
+ * Set subinterface forward enable/disable.
+ * The CLI format is:
+ * set interface l2 forward <interface> [disable]
+ */
+static clib_error_t *
+int_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the interface flag */
+ if (l2input_intf_config (sw_if_index)->xconnect)
+ {
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_XCONNECT, enable);
+ }
+ else
+ {
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_FWD, enable);
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 unicast forwarding can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage interfaces. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable fowarding:
+ * @cliexcmd{set interface l2 forward GigabitEthernet0/8/0}
+ * Example of how to disable fowarding:
+ * @cliexcmd{set interface l2 forward GigabitEthernet0/8/0 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_fwd_cli, static) = {
+ .path = "set interface l2 forward",
+ .short_help = "set interface l2 forward <interface> [disable]",
+ .function = int_fwd,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_fwd.h b/src/vnet/l2/l2_fwd.h
new file mode 100644
index 00000000000..3968732dbc2
--- /dev/null
+++ b/src/vnet/l2/l2_fwd.h
@@ -0,0 +1,36 @@
+/*
+ * l2_fwd.c : layer 2 forwarding using l2fib
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2fwd_h
+#define included_l2fwd_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+
+void
+l2fwd_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index);
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
new file mode 100644
index 00000000000..a104ec9eebb
--- /dev/null
+++ b/src/vnet/l2/l2_input.c
@@ -0,0 +1,1116 @@
+/*
+ * l2_input.c : layer 2 input packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_bvi.h>
+#include <vnet/l2/l2_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+/**
+ * @file
+ * @brief Interface Input Mode (Layer 2 Cross-Connect or Bridge / Layer 3).
+ *
+ * This file contains the CLI Commands that modify the input mode of an
+ * interface. For interfaces in a Layer 2 cross-connect, all packets
+ * received on one interface will be transmitted to the other. For
+ * interfaces in a bridge-domain, packets will be forwarded to other
+ * interfaces in the same bridge-domain based on destination mac address.
+ * For interfaces in Layer 3 mode, the packets will be routed.
+ */
+
+/* Feature graph node names */
+static char *l2input_feat_names[] = {
+#define _(sym,name) name,
+ foreach_l2input_feat
+#undef _
+};
+
+char **
+l2input_get_feat_names (void)
+{
+ return l2input_feat_names;
+}
+
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 next_index;
+ u32 sw_if_index;
+} l2input_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2input_trace_t *t = va_arg (*args, l2input_trace_t *);
+
+ s = format (s, "l2-input: sw_if_index %d dst %U src %U",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src);
+ return s;
+}
+
+l2input_main_t l2input_main;
+
+#define foreach_l2input_error \
+_(L2INPUT, "L2 input packets") \
+_(DROP, "L2 input drops")
+
+typedef enum
+{
+#define _(sym,str) L2INPUT_ERROR_##sym,
+ foreach_l2input_error
+#undef _
+ L2INPUT_N_ERROR,
+} l2input_error_t;
+
+static char *l2input_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2input_error
+#undef _
+};
+
+typedef enum
+{ /* */
+ L2INPUT_NEXT_LEARN,
+ L2INPUT_NEXT_FWD,
+ L2INPUT_NEXT_DROP,
+ L2INPUT_N_NEXT,
+} l2input_next_t;
+
+
+static_always_inline void
+classify_and_dispatch (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 cpu_index,
+ l2input_main_t * msm, vlib_buffer_t * b0, u32 * next0)
+{
+ /*
+ * Load L2 input feature struct
+ * Load bridge domain struct
+ * Parse ethernet header to determine unicast/mcast/broadcast
+ * take L2 input stat
+ * classify packet as IP/UDP/TCP, control, other
+ * mask feature bitmap
+ * go to first node in bitmap
+ * Later: optimize VTM
+ *
+ * For L2XC,
+ * set tx sw-if-handle
+ */
+
+ u16 ethertype;
+ u8 protocol;
+ l2_input_config_t *config;
+ l2_bridge_domain_t *bd_config;
+ u16 bd_index0;
+ u32 feature_bitmap;
+ u32 feat_mask;
+ ethernet_header_t *h0;
+ u8 *l3h0;
+ u32 sw_if_index0;
+
+#define get_u16(addr) ( *((u16 *)(addr)) )
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+ l3h0 = (u8 *) h0 + vnet_buffer (b0)->l2.l2_len;
+
+ ethertype = clib_net_to_host_u16 (get_u16 (l3h0 - 2));
+ feat_mask = ~0;
+
+ /* Get config for the input interface */
+ config = vec_elt_at_index (msm->configs, sw_if_index0);
+
+ /* Save split horizon group */
+ vnet_buffer (b0)->l2.shg = config->shg;
+
+ /* determine layer2 kind for stat and mask */
+ if (PREDICT_FALSE (ethernet_address_cast (h0->dst_address)))
+ {
+ protocol = ((ip6_header_t *) l3h0)->protocol;
+
+ /* Disable bridge forwarding (flooding will execute instead if not xconnect) */
+ feat_mask &= ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD);
+
+ /* Disable ARP-term for non-ARP and non-ICMP6 packet */
+ if (ethertype != ETHERNET_TYPE_ARP &&
+ (ethertype != ETHERNET_TYPE_IP6 || protocol != IP_PROTOCOL_ICMP6))
+ feat_mask &= ~(L2INPUT_FEAT_ARP_TERM);
+ }
+ else
+ {
+ /*
+ * Check for from-BVI processing - set SHG of unicast packets from BVI
+ * to 0 so it is not dropped for VXLAN tunnels or other ports with the
+ * same SHG as that of the BVI.
+ */
+ if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] ==
+ L2INPUT_BVI))
+ vnet_buffer (b0)->l2.shg = 0;
+ }
+
+
+ if (config->xconnect)
+ {
+ /* Set the output interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index;
+ }
+ else
+ {
+ /* Do bridge-domain processing */
+ bd_index0 = config->bd_index;
+ /* save BD ID for next feature graph nodes */
+ vnet_buffer (b0)->l2.bd_index = bd_index0;
+
+ /* Get config for the bridge domain interface */
+ bd_config = vec_elt_at_index (msm->bd_configs, bd_index0);
+
+ /*
+ * Process bridge domain feature enables.
+ * To perform learning/flooding/forwarding, the corresponding bit
+ * must be enabled in both the input interface config and in the
+ * bridge domain config. In the bd_bitmap, bits for features other
+ * than learning/flooding/forwarding should always be set.
+ */
+ feat_mask = feat_mask & bd_config->feature_bitmap;
+ }
+
+ /* mask out features from bitmap using packet type and bd config */
+ feature_bitmap = config->feature_bitmap & feat_mask;
+
+ /* save for next feature graph nodes */
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap;
+
+ /* Determine the next node */
+ *next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
+ feature_bitmap);
+}
+
+
+static uword
+l2input_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2input_next_t next_index;
+ l2input_main_t *msm = &l2input_main;
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+ /*
+ * Don't bother prefetching the bridge-domain config (which
+ * depends on the input config above). Only a small number of
+ * bridge domains are expected. Plus the structure is small
+ * and several fit in a cache line.
+ */
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h1 = vlib_buffer_get_current (b1);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h2 = vlib_buffer_get_current (b2);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->sw_if_index = sw_if_index2;
+ clib_memcpy (t->src, h2->src_address, 6);
+ clib_memcpy (t->dst, h2->dst_address, 6);
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *h3 = vlib_buffer_get_current (b3);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->sw_if_index = sw_if_index3;
+ clib_memcpy (t->src, h3->src_address, 6);
+ clib_memcpy (t->dst, h3->dst_address, 6);
+ }
+ }
+
+ vlib_node_increment_counter (vm, l2input_node.index,
+ L2INPUT_ERROR_L2INPUT, 4);
+
+ classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0);
+ classify_and_dispatch (vm, node, cpu_index, msm, b1, &next1);
+ classify_and_dispatch (vm, node, cpu_index, msm, b2, &next2);
+ classify_and_dispatch (vm, node, cpu_index, msm, b3, &next3);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ l2input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ vlib_node_increment_counter (vm, l2input_node.index,
+ L2INPUT_ERROR_L2INPUT, 1);
+
+ classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2input_node) = {
+ .function = l2input_node_fn,
+ .name = "l2-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2input_trace,
+ .format_buffer = format_ethernet_header_with_length,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2input_error_strings),
+ .error_strings = l2input_error_strings,
+
+ .n_next_nodes = L2INPUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2INPUT_NEXT_LEARN] = "l2-learn",
+ [L2INPUT_NEXT_FWD] = "l2-fwd",
+ [L2INPUT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2input_node, l2input_node_fn)
+ clib_error_t *l2input_init (vlib_main_t * vm)
+{
+ l2input_main_t *mp = &l2input_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Get packets RX'd from L2 interfaces */
+ ethernet_register_l2_input (vm, l2input_node.index);
+
+ /* Create the config vector */
+ vec_validate (mp->configs, 100);
+ /* create 100 sw interface entries and zero them */
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2input_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2input_init);
+
+
+/** Get a pointer to the config for the given interface. */
+l2_input_config_t *
+l2input_intf_config (u32 sw_if_index)
+{
+ l2input_main_t *mp = &l2input_main;
+
+ vec_validate (mp->configs, sw_if_index);
+ return vec_elt_at_index (mp->configs, sw_if_index);
+}
+
+/** Enable (or disable) the feature in the bitmap for the given interface. */
+u32
+l2input_intf_bitmap_enable (u32 sw_if_index, u32 feature_bitmap, u32 enable)
+{
+ l2input_main_t *mp = &l2input_main;
+ l2_input_config_t *config;
+
+ vec_validate (mp->configs, sw_if_index);
+ config = vec_elt_at_index (mp->configs, sw_if_index);
+
+ if (enable)
+ {
+ config->feature_bitmap |= feature_bitmap;
+ }
+ else
+ {
+ config->feature_bitmap &= ~feature_bitmap;
+ }
+
+ return config->feature_bitmap;
+}
+
+u32
+l2input_set_bridge_features (u32 bd_index, u32 feat_mask, u32 feat_value)
+{
+ l2_bridge_domain_t *bd_config;
+ vec_validate (l2input_main.bd_configs, bd_index);
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ bd_validate (bd_config);
+ bd_config->feature_bitmap =
+ (bd_config->feature_bitmap & ~feat_mask) | feat_value;
+ return bd_config->feature_bitmap;
+}
+
+/**
+ * Set the subinterface to run in l2 or l3 mode.
+ * For L3 mode, just the sw_if_index is specified.
+ * For bridged mode, the bd id and bvi flag are also specified.
+ * For xconnect mode, the peer sw_if_index is also specified.
+ * Return 0 if ok, or non-0 if there was an error.
+ */
+
+u32
+set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, u32 mode, u32 sw_if_index, u32 bd_index, /* for bridged interface */
+ u32 bvi, /* the bridged interface is the BVI */
+ u32 shg, /* the bridged interface's split horizon group */
+ u32 xc_sw_if_index) /* peer interface for xconnect */
+{
+ l2input_main_t *mp = &l2input_main;
+ l2output_main_t *l2om = &l2output_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi;
+ l2_output_config_t *out_config;
+ l2_input_config_t *config;
+ l2_bridge_domain_t *bd_config;
+ l2_flood_member_t member;
+ u64 mac;
+ i32 l2_if_adjust = 0;
+ u32 slot;
+
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+ config = l2input_intf_config (sw_if_index);
+
+ if (config->bridge)
+ {
+ /* Interface is already in bridge mode. Undo the existing config. */
+ bd_config = vec_elt_at_index (mp->bd_configs, config->bd_index);
+
+ /* remove interface from flood vector */
+ bd_remove_member (bd_config, sw_if_index);
+
+ /* undo any BVI-related config */
+ if (bd_config->bvi_sw_if_index == sw_if_index)
+ {
+ bd_config->bvi_sw_if_index = ~0;
+ config->bvi = 0;
+
+ /* delete the l2fib entry for the bvi interface */
+ mac = *((u64 *) hi->hw_address);
+ l2fib_del_entry (mac, config->bd_index);
+
+ /* Make loop output node send packet back to ethernet-input node */
+ slot =
+ vlib_node_add_named_next_with_slot (vm, hi->tx_node_index,
+ "ethernet-input",
+ VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ }
+ l2_if_adjust--;
+ }
+ else if (config->xconnect)
+ {
+ l2_if_adjust--;
+ }
+
+ /*
+ * Directs the l2 output path to work out the interface
+ * output next-arc itself. Needed when recycling a sw_if_index.
+ */
+ vec_validate_init_empty (l2om->next_nodes.output_node_index_vec,
+ sw_if_index, ~0);
+ l2om->next_nodes.output_node_index_vec[sw_if_index] = ~0;
+
+ /* Initialize the l2-input configuration for the interface */
+ if (mode == MODE_L3)
+ {
+ /* Set L2 config to BD index 0 so that if any packet accidentally
+ * came in on L2 path, it will be dropped in BD 0 */
+ config->xconnect = 0;
+ config->bridge = 0;
+ config->shg = 0;
+ config->bd_index = 0;
+ config->feature_bitmap = L2INPUT_FEAT_DROP;
+
+ /* Make sure any L2-output packet to this interface now in L3 mode is
+ * dropped. This may happen if L2 FIB MAC entry is stale */
+ l2om->next_nodes.output_node_index_vec[sw_if_index] =
+ L2OUTPUT_NEXT_BAD_INTF;
+ }
+ else if (mode == MODE_L2_CLASSIFY)
+ {
+ config->xconnect = 1;
+ config->bridge = 0;
+ config->output_sw_if_index = xc_sw_if_index;
+
+ /* Make sure last-chance drop is configured */
+ config->feature_bitmap |=
+ L2INPUT_FEAT_DROP | L2INPUT_FEAT_INPUT_CLASSIFY;
+
+ /* Make sure bridging features are disabled */
+ config->feature_bitmap &=
+ ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD);
+ shg = 0; /* not used in xconnect */
+
+ /* Insure all packets go to ethernet-input */
+ ethernet_set_rx_redirect (vnet_main, hi, 1);
+ }
+ else
+ {
+
+ if (mode == MODE_L2_BRIDGE)
+ {
+ /*
+ * Remove a check that the interface must be an Ethernet.
+ * Specifically so we can bridge to L3 tunnel interfaces.
+ * Here's the check:
+ * if (hi->hw_class_index != ethernet_hw_interface_class.index)
+ *
+ */
+ if (!hi)
+ return MODE_ERROR_ETH; /* non-ethernet */
+
+ config->xconnect = 0;
+ config->bridge = 1;
+ config->bd_index = bd_index;
+
+ /*
+ * Enable forwarding, flooding, learning and ARP termination by default
+ * (note that ARP term is disabled on BD feature bitmap by default)
+ */
+ config->feature_bitmap |= L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD |
+ L2INPUT_FEAT_FLOOD | L2INPUT_FEAT_LEARN | L2INPUT_FEAT_ARP_TERM;
+
+ /* Make sure last-chance drop is configured */
+ config->feature_bitmap |= L2INPUT_FEAT_DROP;
+
+ /* Make sure xconnect is disabled */
+ config->feature_bitmap &= ~L2INPUT_FEAT_XCONNECT;
+
+ /* Set up bridge domain */
+ vec_validate (mp->bd_configs, bd_index);
+ bd_config = vec_elt_at_index (mp->bd_configs, bd_index);
+ bd_validate (bd_config);
+
+ /* TODO: think: add l2fib entry even for non-bvi interface? */
+
+ /* Do BVI interface initializations */
+ if (bvi)
+ {
+ /* ensure BD has no bvi interface (or replace that one with this??) */
+ if (bd_config->bvi_sw_if_index != ~0)
+ {
+ return MODE_ERROR_BVI_DEF; /* bd already has a bvi interface */
+ }
+ bd_config->bvi_sw_if_index = sw_if_index;
+ config->bvi = 1;
+
+ /* create the l2fib entry for the bvi interface */
+ mac = *((u64 *) hi->hw_address);
+ l2fib_add_entry (mac, bd_index, sw_if_index, 1, 0, 1); /* static + bvi */
+
+ /* Disable learning by default. no use since l2fib entry is static. */
+ config->feature_bitmap &= ~L2INPUT_FEAT_LEARN;
+
+ /* Make loop output node send packet to l2-input node */
+ slot =
+ vlib_node_add_named_next_with_slot (vm, hi->tx_node_index,
+ "l2-input",
+ VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ }
+
+ /* Add interface to bridge-domain flood vector */
+ member.sw_if_index = sw_if_index;
+ member.flags = bvi ? L2_FLOOD_MEMBER_BVI : L2_FLOOD_MEMBER_NORMAL;
+ member.shg = shg;
+ bd_add_member (bd_config, &member);
+
+ }
+ else
+ {
+ config->xconnect = 1;
+ config->bridge = 0;
+ config->output_sw_if_index = xc_sw_if_index;
+
+ /* Make sure last-chance drop is configured */
+ config->feature_bitmap |= L2INPUT_FEAT_DROP;
+
+ /* Make sure bridging features are disabled */
+ config->feature_bitmap &=
+ ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD);
+
+ config->feature_bitmap |= L2INPUT_FEAT_XCONNECT;
+ shg = 0; /* not used in xconnect */
+ }
+
+ /* set up split-horizon group */
+ config->shg = shg;
+ out_config = l2output_intf_config (sw_if_index);
+ out_config->shg = shg;
+
+ /*
+ * Test: remove this when non-IP features can be configured.
+ * Enable a non-IP feature to test IP feature masking
+ * config->feature_bitmap |= L2INPUT_FEAT_CTRL_PKT;
+ */
+
+ l2_if_adjust++;
+ }
+
+ /* Adjust count of L2 interfaces */
+ hi->l2_if_count += l2_if_adjust;
+
+ if (hi->hw_class_index == ethernet_hw_interface_class.index)
+ {
+ if ((hi->l2_if_count == 1) && (l2_if_adjust == 1))
+ {
+ /* Just added first L2 interface on this port */
+
+ /* Set promiscuous mode on the l2 interface */
+ ethernet_set_flags (vnet_main, hi->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+
+ /* ensure all packets go to ethernet-input */
+ ethernet_set_rx_redirect (vnet_main, hi, 1);
+
+ }
+ else if ((hi->l2_if_count == 0) && (l2_if_adjust == -1))
+ {
+ /* Just removed only L2 subinterface on this port */
+
+ /* Disable promiscuous mode on the l2 interface */
+ ethernet_set_flags (vnet_main, hi->hw_if_index, 0);
+
+ /* Allow ip packets to go directly to ip4-input etc */
+ ethernet_set_rx_redirect (vnet_main, hi, 0);
+ }
+ }
+
+ /* Set up the L2/L3 flag in the interface parsing tables */
+ ethernet_sw_interface_set_l2_mode (vnm, sw_if_index, (mode != MODE_L3));
+
+ return 0;
+}
+
+/**
+ * Set subinterface in bridging mode with a bridge-domain ID.
+ * The CLI format is:
+ * set interface l2 bridge <interface> <bd> [bvi] [split-horizon-group]
+ */
+static clib_error_t *
+int_l2_bridge (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 bd_index, bd_id;
+ u32 sw_if_index;
+ u32 bvi;
+ u32 rc;
+ u32 shg;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expected bridge domain ID `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ bd_index = bd_find_or_add_bd_index (&bd_main, bd_id);
+
+ /* optional bvi */
+ bvi = unformat (input, "bvi");
+
+ /* optional split horizon group */
+ shg = 0;
+ (void) unformat (input, "%d", &shg);
+
+ /* set the interface mode */
+ if ((rc =
+ set_int_l2_mode (vm, vnm, MODE_L2_BRIDGE, sw_if_index, bd_index, bvi,
+ shg, 0)))
+ {
+ if (rc == MODE_ERROR_ETH)
+ {
+ error = clib_error_return (0, "bridged interface must be ethernet",
+ format_unformat_error, input);
+ }
+ else if (rc == MODE_ERROR_BVI_DEF)
+ {
+ error =
+ clib_error_return (0, "bridge-domain already has a bvi interface",
+ format_unformat_error, input);
+ }
+ else
+ {
+ error = clib_error_return (0, "invalid configuration for interface",
+ format_unformat_error, input);
+ }
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Use this command put an interface into Layer 2 bridge domain. If a
+ * bridge-domain with the provided bridge-domain-id does not exist, it
+ * will be created. Interfaces in a bridge-domain forward packets to
+ * other interfaces in the same bridge-domain based on destination mac
+ * address. To remove an interface from a the Layer 2 bridge domain,
+ * put the interface in a different mode, for example Layer 3 mode.
+ *
+ * Optionally, an interface can be added to a Layer 2 bridge-domain as
+ * a Bridged Virtual Interface (bvi). Only one interface in a Layer 2
+ * bridge-domain can be a bvi.
+ *
+ * Optionally, a split-horizon group can also be specified. This defaults
+ * to 0 if not specified.
+ *
+ * @cliexpar
+ * Example of how to configure a Layer 2 bridge-domain with three
+ * interfaces (where 200 is the bridge-domain-id):
+ * @cliexcmd{set interface l2 bridge GigabitEthernet0/8/0.200 200}
+ * This interface is added a BVI interface:
+ * @cliexcmd{set interface l2 bridge GigabitEthernet0/9/0.200 200 bvi}
+ * This interface also has a split-horizon group of 1 specified:
+ * @cliexcmd{set interface l2 bridge GigabitEthernet0/a/0.200 200 1}
+ * Example of how to remove an interface from a Layer2 bridge-domain:
+ * @cliexcmd{set interface l3 GigabitEthernet0/a/0.200}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = {
+ .path = "set interface l2 bridge",
+ .short_help = "set interface l2 bridge <interface> <bridge-domain-id> [bvi] [shg]",
+ .function = int_l2_bridge,
+};
+/* *INDENT-ON* */
+
+/**
+ * Set subinterface in xconnect mode with another interface.
+ * The CLI format is:
+ * set interface l2 xconnect <interface> <peer interface>
+ */
+static clib_error_t *
+int_l2_xc (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 xc_sw_if_index;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat_user
+ (input, unformat_vnet_sw_interface, vnm, &xc_sw_if_index))
+ {
+ error = clib_error_return (0, "unknown peer interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ /* set the interface mode */
+ if (set_int_l2_mode
+ (vm, vnm, MODE_L2_XC, sw_if_index, 0, 0, 0, xc_sw_if_index))
+ {
+ error = clib_error_return (0, "invalid configuration for interface",
+ format_unformat_error, input);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Use this command put an interface into Layer 2 cross-connect mode.
+ * Both interfaces must be in this mode for bi-directioal traffic. All
+ * packets received on one interface will be transmitted to the other.
+ * To remove the Layer 2 cross-connect, put the interface in a different
+ * mode, for example Layer 3 mode.
+ *
+ * @cliexpar
+ * Example of how to configure a Layer2 cross-connect between two interfaces:
+ * @cliexcmd{set interface l2 xconnect GigabitEthernet0/8/0.300 GigabitEthernet0/9/0.300}
+ * @cliexcmd{set interface l2 xconnect GigabitEthernet0/9/0.300 GigabitEthernet0/8/0.300}
+ * Example of how to remove a Layer2 cross-connect:
+ * @cliexcmd{set interface l3 GigabitEthernet0/8/0.300}
+ * @cliexcmd{set interface l3 GigabitEthernet0/9/0.300}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_xc_cli, static) = {
+ .path = "set interface l2 xconnect",
+ .short_help = "set interface l2 xconnect <interface> <peer interface>",
+ .function = int_l2_xc,
+};
+/* *INDENT-ON* */
+
+/**
+ * Set subinterface in L3 mode.
+ * The CLI format is:
+ * set interface l3 <interface>
+ */
+static clib_error_t *
+int_l3 (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ /* set the interface mode */
+ if (set_int_l2_mode (vm, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0))
+ {
+ error = clib_error_return (0, "invalid configuration for interface",
+ format_unformat_error, input);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Modify the packet processing mode of the interface to Layer 3, which
+ * implies packets will be routed. This is the default mode of an interface.
+ * Use this command to remove an interface from a Layer 2 cross-connect or a
+ * Layer 2 bridge.
+ *
+ * @cliexpar
+ * Example of how to set the mode of an interface to Layer 3:
+ * @cliexcmd{set interface l3 GigabitEthernet0/8/0.200}
+?*/
+/* *INDENT-OFF* */
+ VLIB_CLI_COMMAND (int_l3_cli, static) = {
+ .path = "set interface l3",
+ .short_help = "set interface l3 <interface>",
+ .function = int_l3,
+};
+/* *INDENT-ON* */
+
+/**
+ * Show interface mode.
+ * The CLI format is:
+ * show mode [<if-name1> <if-name2> ...]
+ */
+static clib_error_t *
+show_int_mode (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ char *mode;
+ u8 *args;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *si, *sis = 0;
+ l2input_main_t *mp = &l2input_main;
+ l2_input_config_t *config;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u32 sw_if_index;
+
+ /* See if user wants to show specific interface */
+ if (unformat
+ (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ si = pool_elt_at_index (im->sw_interfaces, sw_if_index);
+ vec_add1 (sis, si[0]);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ }
+
+ if (vec_len (sis) == 0) /* Get all interfaces */
+ {
+ /* Gather interfaces. */
+ sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
+ _vec_len (sis) = 0;
+ /* *INDENT-OFF* */
+ pool_foreach (si, im->sw_interfaces, ({ vec_add1 (sis, si[0]); }));
+ /* *INDENT-ON* */
+ }
+
+ vec_foreach (si, sis)
+ {
+ vec_validate (mp->configs, si->sw_if_index);
+ config = vec_elt_at_index (mp->configs, si->sw_if_index);
+ if (config->bridge)
+ {
+ u32 bd_id;
+ mode = "l2 bridge";
+ bd_id = l2input_main.bd_configs[config->bd_index].bd_id;
+
+ args = format (0, "bd_id %d%s%d", bd_id,
+ config->bvi ? " bvi shg " : " shg ", config->shg);
+ }
+ else if (config->xconnect)
+ {
+ mode = "l2 xconnect";
+ args = format (0, "%U",
+ format_vnet_sw_if_index_name,
+ vnm, config->output_sw_if_index);
+ }
+ else
+ {
+ mode = "l3";
+ args = format (0, " ");
+ }
+ vlib_cli_output (vm, "%s %U %v\n",
+ mode,
+ format_vnet_sw_if_index_name,
+ vnm, si->sw_if_index, args);
+ vec_free (args);
+ }
+
+done:
+ vec_free (sis);
+
+ return error;
+}
+
+/*?
+ * Show the packet processing mode (Layer2 xcross-onnect, Layer 2 bridge,
+ * Layer 3 routed) of all interfaces and sub-interfaces, or limit the
+ * output to just the provided list of interfaces and sub-interfaces.
+ * The output shows the mode, the interface, and if the interface is
+ * a member of a bridge, the bridge-domain-id and the split horizen group (shg).
+ *
+ * @cliexpar
+ * Example of displaying the mode of all interfaces:
+ * @cliexstart{show mode}
+ * l3 local0
+ * l3 GigabitEthernet0/8/0
+ * l3 GigabitEthernet0/9/0
+ * l3 GigabitEthernet0/a/0
+ * l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0
+ * l2 bridge GigabitEthernet0/9/0.200 bd_id 200 shg 0
+ * l2 bridge GigabitEthernet0/a/0.200 bd_id 200 shg 0
+ * l2 xconnect GigabitEthernet0/8/0.300 GigabitEthernet0/9/0.300
+ * l2 xconnect GigabitEthernet0/9/0.300 GigabitEthernet0/8/0.300
+ * @cliexend
+ * Example of displaying the mode of a seleted list of interfaces:
+ * @cliexstart{show mode GigabitEthernet0/8/0 GigabitEthernet0/8/0.200}
+ * l3 GigabitEthernet0/8/0
+ * l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_l2_mode, static) = {
+ .path = "show mode",
+ .short_help = "show mode [<if-name1> <if-name2> ...]",
+ .function = show_int_mode,
+};
+/* *INDENT-ON* */
+
+#define foreach_l2_init_function \
+_(feat_bitmap_drop_init) \
+_(l2fib_init) \
+_(l2_input_classify_init) \
+_(l2bd_init) \
+_(l2fwd_init) \
+_(l2_inacl_init) \
+_(l2input_init) \
+_(l2_vtr_init) \
+_(l2_invtr_init) \
+_(l2_efp_filter_init) \
+_(l2learn_init) \
+_(l2flood_init) \
+_(l2_outacl_init) \
+_(l2output_init) \
+_(l2_patch_init) \
+_(l2_xcrw_init)
+
+clib_error_t *
+l2_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+
+#define _(a) do { \
+ if ((error = vlib_call_init_function (vm, a))) return error; } \
+while (0);
+ foreach_l2_init_function;
+#undef _
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h
new file mode 100644
index 00000000000..f3fada6a7d3
--- /dev/null
+++ b/src/vnet/l2/l2_input.h
@@ -0,0 +1,266 @@
+/*
+ * l2_input.h : layer 2 input packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_input_h
+#define included_vnet_l2_input_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip.h>
+
+/* Per-subinterface L2 feature configuration */
+
+typedef struct
+{
+
+ union
+ {
+ u16 bd_index; /* bridge domain id */
+ u32 output_sw_if_index; /* for xconnect */
+ };
+
+ /* Interface mode. If both are 0, this interface is in L3 mode */
+ u8 xconnect;
+ u8 bridge;
+
+ /* this is the bvi interface for the bridge-domain */
+ u8 bvi;
+
+ /* config for which input features are configured on this interface */
+ u32 feature_bitmap;
+
+ /* some of these flags are also in the feature bitmap */
+ u8 learn_enable;
+ u8 fwd_enable;
+ u8 flood_enable;
+
+ /* split horizon group */
+ u8 shg;
+
+} l2_input_config_t;
+
+
+typedef struct
+{
+
+ /* Next nodes for the feature bitmap */
+ u32 feat_next_node_index[32];
+
+ /* config vector indexed by sw_if_index */
+ l2_input_config_t *configs;
+
+ /* bridge domain config vector indexed by bd_index */
+ l2_bridge_domain_t *bd_configs;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2input_main_t;
+
+extern l2input_main_t l2input_main;
+
+extern vlib_node_registration_t l2input_node;
+
+static_always_inline l2_bridge_domain_t *
+l2input_bd_config_from_index (l2input_main_t * l2im, u32 bd_index)
+{
+ l2_bridge_domain_t *bd_config;
+
+ bd_config = vec_elt_at_index (l2im->bd_configs, bd_index);
+ return bd_is_valid (bd_config) ? bd_config : NULL;
+}
+
+/* L2 input indication packet is from BVI, using -2 */
+#define L2INPUT_BVI ((u32) (~0-1))
+
+/* L2 input features */
+
+/* Mappings from feature ID to graph node name */
+#define foreach_l2input_feat \
+ _(DROP, "feature-bitmap-drop") \
+ _(XCONNECT, "l2-output") \
+ _(FLOOD, "l2-flood") \
+ _(ARP_TERM, "arp-term-l2bd") \
+ _(UU_FLOOD, "l2-flood") \
+ _(FWD, "l2-fwd") \
+ _(RW, "l2-rw") \
+ _(LEARN, "l2-learn") \
+ _(VTR, "l2-input-vtr") \
+ _(VPATH, "vpath-input-l2") \
+ _(ACL, "l2-input-acl") \
+ _(POLICER_CLAS, "l2-policer-classify") \
+ _(INPUT_CLASSIFY, "l2-input-classify")
+
+/* Feature bitmap positions */
+typedef enum
+{
+#define _(sym,str) L2INPUT_FEAT_##sym##_BIT,
+ foreach_l2input_feat
+#undef _
+ L2INPUT_N_FEAT,
+} l2input_feat_t;
+
+/* Feature bit masks */
+typedef enum
+{
+#define _(sym,str) L2INPUT_FEAT_##sym = (1<<L2INPUT_FEAT_##sym##_BIT),
+ foreach_l2input_feat
+#undef _
+} l2input_feat_masks_t;
+
+/** Return an array of strings containing graph node names of each feature */
+char **l2input_get_feat_names (void);
+
+
+static_always_inline u8
+bd_feature_flood (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD) ==
+ L2INPUT_FEAT_FLOOD);
+}
+
+static_always_inline u8
+bd_feature_uu_flood (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD) ==
+ L2INPUT_FEAT_UU_FLOOD);
+}
+
+static_always_inline u8
+bd_feature_forward (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_FWD) == L2INPUT_FEAT_FWD);
+}
+
+static_always_inline u8
+bd_feature_learn (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_LEARN) ==
+ L2INPUT_FEAT_LEARN);
+}
+
+static_always_inline u8
+bd_feature_arp_term (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM) ==
+ L2INPUT_FEAT_ARP_TERM);
+}
+
+/** Masks for eliminating features that do not apply to a packet */
+
+/** Get a pointer to the config for the given interface */
+l2_input_config_t *l2input_intf_config (u32 sw_if_index);
+
+/* Enable (or disable) the feature in the bitmap for the given interface */
+u32 l2input_intf_bitmap_enable (u32 sw_if_index,
+ u32 feature_bitmap, u32 enable);
+
+/* Sets modifies flags from a bridge domain */
+u32 l2input_set_bridge_features (u32 bd_index, u32 feat_mask, u32 feat_value);
+
+
+#define MODE_L3 0
+#define MODE_L2_BRIDGE 1
+#define MODE_L2_XC 2
+#define MODE_L2_CLASSIFY 3
+
+#define MODE_ERROR_ETH 1
+#define MODE_ERROR_BVI_DEF 2
+
+u32 set_int_l2_mode (vlib_main_t * vm,
+ vnet_main_t * vnet_main,
+ u32 mode,
+ u32 sw_if_index,
+ u32 bd_index, u32 bvi, u32 shg, u32 xc_sw_if_index);
+
+static inline void
+vnet_update_l2_len (vlib_buffer_t * b)
+{
+ ethernet_header_t *eth;
+ u16 ethertype;
+ u8 vlan_count = 0;
+
+ /* point at currrent l2 hdr */
+ eth = vlib_buffer_get_current (b);
+
+ /*
+ * l2-output pays no attention to this
+ * but the tag push/pop code on an l2 subif needs it.
+ *
+ * Determine l2 header len, check for up to 2 vlans
+ */
+ vnet_buffer (b)->l2.l2_len = sizeof (ethernet_header_t);
+ ethertype = clib_net_to_host_u16 (eth->type);
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan;
+ vnet_buffer (b)->l2.l2_len += sizeof (*vlan);
+ vlan_count = 1;
+ vlan = (void *) (eth + 1);
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vnet_buffer (b)->l2.l2_len += sizeof (*vlan);
+ vlan_count = 2;
+ }
+ }
+ ethernet_buffer_set_vlan_count (b, vlan_count);
+}
+
+/*
+ * Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet
+ * is ip4 or ip6. Otherwise hash on smac/dmac/etype.
+ * The vlib buffer current pointer is expected to be at ethernet header
+ * and vnet l2.l2_len is exppected to be setup already.
+ */
+static inline u32
+vnet_l2_compute_flow_hash (vlib_buffer_t * b)
+{
+ ethernet_header_t *eh = vlib_buffer_get_current (b);
+ u8 *l3h = (u8 *) eh + vnet_buffer (b)->l2.l2_len;
+ u16 ethertype = clib_net_to_host_u16 (*(u16 *) (l3h - 2));
+
+ if (ethertype == ETHERNET_TYPE_IP4)
+ return ip4_compute_flow_hash ((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ return ip6_compute_flow_hash ((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
+ else
+ {
+ u32 a, b, c;
+ u32 *ap = (u32 *) & eh->dst_address[2];
+ u32 *bp = (u32 *) & eh->src_address[2];
+ a = *ap;
+ b = *bp;
+ c = ethertype;
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+ return c;
+ }
+}
+
+#endif
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input_acl.c b/src/vnet/l2/l2_input_acl.c
new file mode 100644
index 00000000000..104fcd15b85
--- /dev/null
+++ b/src/vnet/l2/l2_input_acl.c
@@ -0,0 +1,434 @@
+/*
+ * l2_input_acl.c : layer 2 input acl processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+
+typedef struct
+{
+
+ /* Next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_inacl_main_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 offset;
+} l2_inacl_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_inacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_inacl_trace_t *t = va_arg (*args, l2_inacl_trace_t *);
+
+ s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d",
+ t->sw_if_index, t->next_index, t->table_index, t->offset);
+ return s;
+}
+
+l2_inacl_main_t l2_inacl_main;
+
+static vlib_node_registration_t l2_inacl_node;
+
+#define foreach_l2_inacl_error \
+_(NONE, "valid input ACL packets") \
+_(MISS, "input ACL misses") \
+_(HIT, "input ACL hits") \
+_(CHAIN_HIT, "input ACL hits after chain walk") \
+_(TABLE_MISS, "input ACL table-miss drops") \
+_(SESSION_DENY, "input ACL session deny drops")
+
+
+typedef enum
+{
+#define _(sym,str) L2_INACL_ERROR_##sym,
+ foreach_l2_inacl_error
+#undef _
+ L2_INACL_N_ERROR,
+} l2_inacl_error_t;
+
+static char *l2_inacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_inacl_error
+#undef _
+};
+
+static uword
+l2_inacl_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ acl_next_index_t next_index;
+ l2_inacl_main_t *msm = &l2_inacl_main;
+ input_acl_main_t *am = &input_acl_main;
+ vnet_classify_main_t *vcm = am->vnet_classify_main;
+ input_acl_table_id_t tid = INPUT_ACL_TABLE_L2;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ /* First pass: compute hashes */
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ u8 *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t *t0, *t1;
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ table_index1 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index1];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ if (t1->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h1 = (void *) vlib_buffer_get_current (b1) + t1->current_data_offset;
+ else
+ h1 = b1->data;
+
+ vnet_buffer (b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer (b1)->l2_classify.hash);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer (b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ u8 *h0;
+ u32 sw_if_index0;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ACL_NEXT_INDEX_DENY;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+ u64 hash0;
+ u8 *h0;
+ u8 error0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
+ vnet_classify_table_t *tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer (p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer (p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+
+ /* Feature bitmap update */
+ vnet_buffer (b0)->l2.feature_bitmap &= ~L2INPUT_FEAT_ACL;
+
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+ /* Determine the next node */
+ next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
+ vnet_buffer (b0)->
+ l2.feature_bitmap);
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+
+ next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT) ?
+ e0->next_index : next0;
+
+ hits++;
+
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ L2_INACL_ERROR_SESSION_DENY : L2_INACL_ERROR_NONE;
+ b0->error = node->errors[error0];
+ }
+ else
+ {
+ while (1)
+ {
+ if (PREDICT_TRUE (t0->next_table_index != ~0))
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 =
+ (t0->miss_next_index <
+ ACL_NEXT_INDEX_N_NEXT) ? t0->miss_next_index :
+ next0;
+
+ misses++;
+
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ L2_INACL_ERROR_TABLE_MISS : L2_INACL_ERROR_NONE;
+ b0->error = node->errors[error0];
+ break;
+ }
+
+ if (t0->current_data_flag ==
+ CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry
+ (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT) ?
+ e0->next_index : next0;
+ hits++;
+ chain_hits++;
+
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ L2_INACL_ERROR_SESSION_DENY : L2_INACL_ERROR_NONE;
+ b0->error = node->errors[error0];
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_inacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->offset = (t0 && e0) ? vnet_classify_get_offset (t0, e0) : ~0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INACL_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INACL_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INACL_ERROR_CHAIN_HIT, chain_hits);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_inacl_node,static) = {
+ .function = l2_inacl_node_fn,
+ .name = "l2-input-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_inacl_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_inacl_error_strings),
+ .error_strings = l2_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_inacl_node, l2_inacl_node_fn)
+ clib_error_t *l2_inacl_init (vlib_main_t * vm)
+{
+ l2_inacl_main_t *mp = &l2_inacl_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_inacl_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_inacl_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input_classify.c b/src/vnet/l2/l2_input_classify.c
new file mode 100644
index 00000000000..497df192f39
--- /dev/null
+++ b/src/vnet/l2/l2_input_classify.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * l2_classify.c
+ */
+
+#include <vnet/l2/l2_classify.h>
+#include <vnet/api_errno.h>
+
+/**
+ * @file
+ * @brief L2 input classifier.
+ *
+ * @sa @ref vnet/vnet/classify/vnet_classify.c
+ * @sa @ref vnet/vnet/classify/vnet_classify.h
+ */
+
+/**
+ * @brief l2_input_classifier packet trace record.
+ */
+typedef struct
+{
+ /** interface handle for the ith packet */
+ u32 sw_if_index;
+ /** graph arc index selected for this packet */
+ u32 next_index;
+ /** classifier table which provided the final result */
+ u32 table_index;
+ /** offset in classifier heap of the corresponding session */
+ u32 session_offset;
+} l2_input_classify_trace_t;
+
+/**
+ * @brief vlib node runtime.
+ */
+typedef struct
+{
+ /** use-case independent main object pointer */
+ vnet_classify_main_t *vcm;
+ /** l2 input classifier main object pointer */
+ l2_input_classify_main_t *l2cm;
+} l2_input_classify_runtime_t;
+
+/** Packet trace format function. */
+static u8 *
+format_l2_input_classify_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_input_classify_trace_t *t = va_arg (*args, l2_input_classify_trace_t *);
+
+ s = format (s, "l2-classify: sw_if_index %d, table %d, offset %x, next %d",
+ t->sw_if_index, t->table_index, t->session_offset,
+ t->next_index);
+ return s;
+}
+
+/** l2 input classifier main data structure. */
+l2_input_classify_main_t l2_input_classify_main;
+
+vlib_node_registration_t l2_input_classify_node;
+
+#define foreach_l2_input_classify_error \
+_(MISS, "Classify misses") \
+_(HIT, "Classify hits") \
+_(CHAIN_HIT, "Classify hits after chain walk") \
+_(DROP, "L2 Classify Drops")
+
+typedef enum
+{
+#define _(sym,str) L2_INPUT_CLASSIFY_ERROR_##sym,
+ foreach_l2_input_classify_error
+#undef _
+ L2_INPUT_CLASSIFY_N_ERROR,
+} l2_input_classify_error_t;
+
+static char *l2_input_classify_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_input_classify_error
+#undef _
+};
+
+/**
+ * @brief l2 input classifier node.
+ * @node l2-input-classify
+ *
+ * This is the l2 input classifier dispatch node
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @par Graph mechanics: buffer metadata, next index usage
+ *
+ * @em Uses:
+ * - <code>(l2_input_classify_runtime_t *)
+ * rt->classify_table_index_by_sw_if_index</code>
+ * - Head of the per-interface, per-protocol classifier table chain
+ * for a specific interface.
+ * - @c ~0 => send pkts to the next feature in the L2 feature chain.
+ * - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ * - Indicates the @c sw_if_index value of the interface that the
+ * packet was received on.
+ * - <code>vnet_buffer(b0)->l2.feature_bitmap</code>
+ * - Used to steer packets across l2 features enabled on the interface
+ * - <code>(vnet_classify_entry_t) e0->next_index</code>
+ * - Used to steer traffic when the classifier hits on a session
+ * - <code>(vnet_classify_entry_t) e0->advance</code>
+ * - Signed quantity applied via <code>vlib_buffer_advance</code>
+ * when the classifier hits on a session
+ * - <code>(vnet_classify_table_t) t0->miss_next_index</code>
+ * - Used to steer traffic when the classifier misses
+ *
+ * @em Sets:
+ * - <code>vnet_buffer (b0)->l2_classify.table_index</code>
+ * - Classifier table index of the first classifier table in
+ * the classifier table chain
+ * - <code>vnet_buffer (b0)->l2_classify.hash</code>
+ * - Bounded-index extensible hash corresponding to the
+ * masked fields in the current packet
+ * - <code>vnet_buffer (b0)->l2.feature_bitmap</code>
+ * - Used to steer packets across l2 features enabled on the interface
+ * - <code>vnet_buffer (b0)->l2_classify.opaque_index</code>
+ * - Copied from the classifier session object upon classifier hit
+ *
+ * @em Counters:
+ * - <code>L2_INPUT_CLASSIFY_ERROR_MISS</code> Classifier misses
+ * - <code>L2_INPUT_CLASSIFY_ERROR_HIT</code> Classifier hits
+ * - <code>L2_INPUT_CLASSIFY_ERROR_CHAIN_HIT</code>
+ * Classifier hits in other than the first table
+ */
+
+static uword
+l2_input_classify_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_input_classify_next_t next_index;
+ l2_input_classify_main_t *cm = &l2_input_classify_main;
+ vnet_classify_main_t *vcm = cm->vnet_classify_main;
+ l2_input_classify_runtime_t *rt =
+ (l2_input_classify_runtime_t *) node->runtime_data;
+ u32 feature_bitmap;
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ f64 now;
+ u32 n_next_nodes;
+
+ n_next_nodes = node->n_next_nodes;
+
+ now = vlib_time_now (vm);
+
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ /* First pass: compute hash */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ ethernet_header_t *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u16 type0, type1;
+ int type_index0, type_index1;
+ vnet_classify_table_t *t0, *t1;
+ u32 table_index0, table_index1;
+ u64 hash0, hash1;
+
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ vnet_buffer (b0)->l2_classify.table_index = ~0;
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ vnet_buffer (b1)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+ type1 = clib_net_to_host_u16 (h1->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_INPUT_CLASSIFY_TABLE_IP4 : L2_INPUT_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_INPUT_CLASSIFY_TABLE_IP6 : type_index0;
+
+ type_index1 = (type1 == ETHERNET_TYPE_IP4)
+ ? L2_INPUT_CLASSIFY_TABLE_IP4 : L2_INPUT_CLASSIFY_TABLE_OTHER;
+ type_index1 = (type1 == ETHERNET_TYPE_IP6)
+ ? L2_INPUT_CLASSIFY_TABLE_IP6 : type_index1;
+
+ vnet_buffer (b0)->l2_classify.table_index =
+ table_index0 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer (b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+
+ vnet_buffer (b1)->l2_classify.table_index =
+ table_index1 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index1][sw_if_index1];
+
+ if (table_index1 != ~0)
+ {
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer (b1)->l2_classify.hash = hash1 =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+ vnet_classify_prefetch_bucket (t1, hash1);
+ }
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ ethernet_header_t *h0;
+ u32 sw_if_index0;
+ u16 type0;
+ u32 type_index0;
+ vnet_classify_table_t *t0;
+ u32 table_index0;
+ u64 hash0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ vnet_buffer (b0)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_INPUT_CLASSIFY_TABLE_IP4 : L2_INPUT_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_INPUT_CLASSIFY_TABLE_IP6 : type_index0;
+
+ vnet_buffer (b0)->l2_classify.table_index =
+ table_index0 = rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer (b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ~0; /* next l2 input feature, please... */
+ ethernet_header_t *h0;
+ u32 table_index0;
+ u64 hash0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+
+ if (PREDICT_TRUE (n_left_from > 2))
+ {
+ vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]);
+ u64 phash2;
+ u32 table_index2;
+ vnet_classify_table_t *tp2;
+
+ /*
+ * Prefetch table entry two ahead. Buffer / data
+ * were prefetched above...
+ */
+ table_index2 = vnet_buffer (p2)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index2 != ~0))
+ {
+ tp2 = pool_elt_at_index (vcm->tables, table_index2);
+ phash2 = vnet_buffer (p2)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp2, phash2);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+
+ /* Remove ourself from the feature bitmap */
+ feature_bitmap = vnet_buffer (b0)->l2.feature_bitmap
+ & ~L2INPUT_FEAT_INPUT_CLASSIFY;
+
+ /* save for next feature graph nodes */
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap;
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ }
+ else
+ {
+ while (1)
+ {
+ if (t0->next_table_index != ~0)
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < n_next_nodes) ?
+ t0->miss_next_index : next0;
+ misses++;
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 =
+ vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ chain_hits++;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE (next0 == 0))
+ b0->error = node->errors[L2_INPUT_CLASSIFY_ERROR_DROP];
+
+ if (PREDICT_TRUE (next0 == ~0))
+ {
+ // Determine the next node
+ next0 =
+ feat_bitmap_get_next_node_index (cm->feat_next_node_index,
+ feature_bitmap);
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_input_classify_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->table_index = table_index0;
+ t->next_index = next0;
+ t->session_offset = e0 ? vnet_classify_get_offset (t0, e0) : 0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INPUT_CLASSIFY_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INPUT_CLASSIFY_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INPUT_CLASSIFY_ERROR_CHAIN_HIT, chain_hits);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_input_classify_node) = {
+ .function = l2_input_classify_node_fn,
+ .name = "l2-input-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_input_classify_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_input_classify_error_strings),
+ .error_strings = l2_input_classify_error_strings,
+
+ .runtime_data_bytes = sizeof (l2_input_classify_runtime_t),
+
+ .n_next_nodes = L2_INPUT_CLASSIFY_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_INPUT_CLASSIFY_NEXT_DROP] = "error-drop",
+ [L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2",
+ [L2_INPUT_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input",
+ [L2_INPUT_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input",
+ [L2_INPUT_CLASSIFY_NEXT_LI] = "li-hit",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_input_classify_node,
+ l2_input_classify_node_fn);
+
+/** l2 input classsifier feature initialization. */
+clib_error_t *
+l2_input_classify_init (vlib_main_t * vm)
+{
+ l2_input_classify_main_t *cm = &l2_input_classify_main;
+ l2_input_classify_runtime_t *rt;
+
+ rt = vlib_node_get_runtime_data (vm, l2_input_classify_node.index);
+
+ cm->vlib_main = vm;
+ cm->vnet_main = vnet_get_main ();
+ cm->vnet_classify_main = &vnet_classify_main;
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_input_classify_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ cm->feat_next_node_index);
+ rt->l2cm = cm;
+ rt->vcm = cm->vnet_classify_main;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_input_classify_init);
+
+
+/** Enable/disable l2 input classification on a specific interface. */
+void
+vnet_l2_input_classify_enable_disable (u32 sw_if_index, int enable_disable)
+{
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_INPUT_CLASSIFY,
+ (u32) enable_disable);
+}
+
+/** @brief Set l2 per-protocol, per-interface input classification tables.
+ *
+ * @param sw_if_index interface handle
+ * @param ip4_table_index ip4 classification table index, or ~0
+ * @param ip6_table_index ip6 classification table index, or ~0
+ * @param other_table_index non-ip4, non-ip6 classification table index,
+ * or ~0
+ * @returns 0 on success, VNET_API_ERROR_NO_SUCH_TABLE, TABLE2, TABLE3
+ * if the indicated (non-~0) table does not exist.
+ */
+
+int
+vnet_l2_input_classify_set_tables (u32 sw_if_index,
+ u32 ip4_table_index,
+ u32 ip6_table_index, u32 other_table_index)
+{
+ l2_input_classify_main_t *cm = &l2_input_classify_main;
+ vnet_classify_main_t *vcm = cm->vnet_classify_main;
+
+ /* Assume that we've validated sw_if_index in the API layer */
+
+ if (ip4_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip4_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ if (ip6_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip6_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE2;
+
+ if (other_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, other_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE3;
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER],
+ sw_if_index);
+
+ cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4]
+ [sw_if_index] = ip4_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6]
+ [sw_if_index] = ip6_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER]
+ [sw_if_index] = other_table_index;
+
+ return 0;
+}
+
+static clib_error_t *
+int_l2_input_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 other_table_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "ip4-table %d", &ip4_table_index))
+ ;
+ else if (unformat (input, "ip6-table %d", &ip6_table_index))
+ ;
+ else if (unformat (input, "other-table %d", &other_table_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface must be specified");
+
+
+ if (ip4_table_index == ~0 && ip6_table_index == ~0
+ && other_table_index == ~0)
+ {
+ vlib_cli_output (vm, "L2 classification disabled");
+ vnet_l2_input_classify_enable_disable (sw_if_index, 0 /* enable */ );
+ return 0;
+ }
+
+ rv = vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index, other_table_index);
+ switch (rv)
+ {
+ case 0:
+ vnet_l2_input_classify_enable_disable (sw_if_index, 1 /* enable */ );
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_l2_input_classify_set_tables: %d",
+ rv);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure l2 input classification.
+ *
+ * @cliexpar
+ * @cliexstart{set interface l2 input classify intfc <interface-name> [ip4-table <index>] [ip6-table <index>] [other-table <index>]}
+ * @cliexend
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = {
+ .path = "set interface l2 input classify",
+ .short_help =
+ "set interface l2 input classify intfc <interface-name> [ip4-table <n>]\n"
+ " [ip6-table <n>] [other-table <n>]",
+ .function = int_l2_input_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input_vtr.c b/src/vnet/l2/l2_input_vtr.c
new file mode 100644
index 00000000000..60a39631e87
--- /dev/null
+++ b/src/vnet/l2/l2_input_vtr.c
@@ -0,0 +1,401 @@
+/*
+ * l2_input_vtr.c : layer 2 input vlan tag rewrite processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/l2/l2_input_vtr.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/cache.h>
+
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u8 raw[12]; /* raw data (vlans) */
+ u32 sw_if_index;
+} l2_invtr_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_invtr_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_invtr_trace_t *t = va_arg (*args, l2_invtr_trace_t *);
+
+ s = format (s, "l2-input-vtr: sw_if_index %d dst %U src %U data "
+ "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4],
+ t->raw[5], t->raw[6], t->raw[7], t->raw[8], t->raw[9],
+ t->raw[10], t->raw[11]);
+ return s;
+}
+
+l2_invtr_main_t l2_invtr_main;
+
+static vlib_node_registration_t l2_invtr_node;
+
+#define foreach_l2_invtr_error \
+_(L2_INVTR, "L2 inverter packets") \
+_(DROP, "L2 input tag rewrite drops")
+
+typedef enum
+{
+#define _(sym,str) L2_INVTR_ERROR_##sym,
+ foreach_l2_invtr_error
+#undef _
+ L2_INVTR_N_ERROR,
+} l2_invtr_error_t;
+
+static char *l2_invtr_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_invtr_error
+#undef _
+};
+
+typedef enum
+{
+ L2_INVTR_NEXT_DROP,
+ L2_INVTR_N_NEXT,
+} l2_invtr_next_t;
+
+
+static uword
+l2_invtr_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_invtr_next_t next_index;
+ l2_invtr_main_t *msm = &l2_invtr_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 feature_bitmap0, feature_bitmap1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3, *p4, *p5;
+ u32 sw_if_index2, sw_if_index3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+ /*
+ * Prefetch the input config for the N+1 loop iteration
+ * This depends on the buffer header above
+ */
+ sw_if_index2 = vnet_buffer (p2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (p3)->sw_if_index[VLIB_RX];
+ CLIB_PREFETCH (vec_elt_at_index
+ (l2output_main.configs, sw_if_index2),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (vec_elt_at_index
+ (l2output_main.configs, sw_if_index3),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ /* process 2 packets */
+
+ /* Remove ourself from the feature bitmap */
+ feature_bitmap0 =
+ vnet_buffer (b0)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR;
+ feature_bitmap1 =
+ vnet_buffer (b1)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR;
+
+ /* save for next feature graph nodes */
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap0;
+ vnet_buffer (b1)->l2.feature_bitmap = feature_bitmap1;
+
+ /* Determine the next node */
+ next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
+ feature_bitmap0);
+ next1 = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
+ feature_bitmap1);
+
+ l2_output_config_t *config0;
+ l2_output_config_t *config1;
+ config0 = vec_elt_at_index (l2output_main.configs, sw_if_index0);
+ config1 = vec_elt_at_index (l2output_main.configs, sw_if_index1);
+
+ if (PREDICT_FALSE (config0->out_vtr_flag))
+ {
+ if (config0->output_vtr.push_and_pop_bytes)
+ {
+ /* perform the tag rewrite on two packets */
+ if (l2_vtr_process
+ (b0,
+ &(vec_elt_at_index
+ (l2output_main.configs, sw_if_index0)->input_vtr)))
+ {
+ /* Drop packet */
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ else if (config0->output_pbb_vtr.push_and_pop_bytes)
+ {
+ if (l2_pbb_process (b0, &(config0->input_pbb_vtr)))
+ {
+ /* Drop packet */
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ }
+ if (PREDICT_FALSE (config1->out_vtr_flag))
+ {
+ if (config1->output_vtr.push_and_pop_bytes)
+ {
+ if (l2_vtr_process
+ (b1,
+ &(vec_elt_at_index
+ (l2output_main.configs, sw_if_index1)->input_vtr)))
+ {
+ /* Drop packet */
+ next1 = L2_INVTR_NEXT_DROP;
+ b1->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ else if (config1->output_pbb_vtr.push_and_pop_bytes)
+ {
+ if (l2_pbb_process (b1, &(config1->input_pbb_vtr)))
+ {
+ /* Drop packet */
+ next1 = L2_INVTR_NEXT_DROP;
+ b1->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_invtr_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_invtr_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ ethernet_header_t *h1 = vlib_buffer_get_current (b1);
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ clib_memcpy (t->raw, &h1->type, sizeof (t->raw));
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ u32 feature_bitmap0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ /* process 1 packet */
+
+ /* Remove ourself from the feature bitmap */
+ feature_bitmap0 =
+ vnet_buffer (b0)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR;
+
+ /* save for next feature graph nodes */
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap0;
+
+ /* Determine the next node */
+ next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
+ feature_bitmap0);
+
+ l2_output_config_t *config0;
+ config0 = vec_elt_at_index (l2output_main.configs, sw_if_index0);
+
+ if (PREDICT_FALSE (config0->out_vtr_flag))
+ {
+ if (config0->output_vtr.push_and_pop_bytes)
+ {
+ /* perform the tag rewrite on one packet */
+ if (l2_vtr_process
+ (b0,
+ &(vec_elt_at_index
+ (l2output_main.configs, sw_if_index0)->input_vtr)))
+ {
+ /* Drop packet */
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ else if (config0->output_pbb_vtr.push_and_pop_bytes)
+ {
+ if (l2_pbb_process (b0, &(config0->input_pbb_vtr)))
+ {
+ /* Drop packet */
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ }
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_invtr_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ clib_memcpy (t->raw, &h0->type, sizeof (t->raw));
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_invtr_node,static) = {
+ .function = l2_invtr_node_fn,
+ .name = "l2-input-vtr",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_invtr_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_invtr_error_strings),
+ .error_strings = l2_invtr_error_strings,
+
+ .n_next_nodes = L2_INVTR_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_INVTR_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_invtr_node, l2_invtr_node_fn)
+ clib_error_t *l2_invtr_init (vlib_main_t * vm)
+{
+ l2_invtr_main_t *mp = &l2_invtr_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_invtr_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_invtr_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_input_vtr.h b/src/vnet/l2/l2_input_vtr.h
new file mode 100644
index 00000000000..f248669e550
--- /dev/null
+++ b/src/vnet/l2/l2_input_vtr.h
@@ -0,0 +1,54 @@
+/*
+ * l2_input_vtr.h : layer 2 input vlan tag rewrite processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_input_vtr_h
+#define included_vnet_l2_input_vtr_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+
+
+typedef struct
+{
+
+ /*
+ * The input vtr data is located in l2_output_config_t because
+ * the same config data is used for the egress EFP Filter check.
+ */
+
+ /* Next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_invtr_main_t;
+
+extern l2_invtr_main_t l2_invtr_main;
+
+#endif /* included_vnet_l2_input_vtr_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c
new file mode 100644
index 00000000000..7f19f936d70
--- /dev/null
+++ b/src/vnet/l2/l2_learn.c
@@ -0,0 +1,597 @@
+/*
+ * l2_learn.c : layer 2 learning using l2fib
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_learn.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+
+/**
+ * @file
+ * @brief Ethernet Bridge Learning.
+ *
+ * Populate the mac table with entries mapping the packet's source mac + bridge
+ * domain ID to the input sw_if_index.
+ *
+ * Note that learning and forwarding are separate graph nodes. This means that
+ * for a set of packets, all learning is performed first, then all nodes are
+ * forwarded. The forwarding is done based on the end-state of the mac table,
+ * instead of the state after each packet. Thus the forwarding results could
+ * differ in certain cases (mac move tests), but this not expected to cause
+ * problems in real-world networks. It is much simpler to separate learning
+ * and forwarding into separate nodes.
+ */
+
+
+typedef struct
+{
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u16 bd_index;
+} l2learn_trace_t;
+
+
+/* packet trace format function */
+static u8 *
+format_l2learn_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2learn_trace_t *t = va_arg (*args, l2learn_trace_t *);
+
+ s = format (s, "l2-learn: sw_if_index %d dst %U src %U bd_index %d",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src, t->bd_index);
+ return s;
+}
+
+static vlib_node_registration_t l2learn_node;
+
+#define foreach_l2learn_error \
+_(L2LEARN, "L2 learn packets") \
+_(MISS, "L2 learn misses") \
+_(MAC_MOVE, "L2 mac moves") \
+_(MAC_MOVE_VIOLATE, "L2 mac move violations") \
+_(LIMIT, "L2 not learned due to limit") \
+_(HIT, "L2 learn hits") \
+_(FILTER_DROP, "L2 filter mac drops")
+
+typedef enum
+{
+#define _(sym,str) L2LEARN_ERROR_##sym,
+ foreach_l2learn_error
+#undef _
+ L2LEARN_N_ERROR,
+} l2learn_error_t;
+
+static char *l2learn_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2learn_error
+#undef _
+};
+
+typedef enum
+{
+ L2LEARN_NEXT_L2FWD,
+ L2LEARN_NEXT_DROP,
+ L2LEARN_N_NEXT,
+} l2learn_next_t;
+
+
+/** Perform learning on one packet based on the mac table lookup result. */
+
+static_always_inline void
+l2learn_process (vlib_node_runtime_t * node,
+ l2learn_main_t * msm,
+ u64 * counter_base,
+ vlib_buffer_t * b0,
+ u32 sw_if_index0,
+ l2fib_entry_key_t * key0,
+ l2fib_entry_key_t * cached_key,
+ u32 * bucket0,
+ l2fib_entry_result_t * result0, u32 * next0, u8 timestamp)
+{
+ u32 feature_bitmap;
+
+ /* Set up the default next node (typically L2FWD) */
+
+ /* Remove ourself from the feature bitmap */
+ feature_bitmap = vnet_buffer (b0)->l2.feature_bitmap & ~L2INPUT_FEAT_LEARN;
+
+ /* Save for next feature graph nodes */
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap;
+
+ /* Determine the next node */
+ *next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
+ feature_bitmap);
+
+ /* Check mac table lookup result */
+
+ if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0))
+ {
+ /*
+ * The entry was in the table, and the sw_if_index matched, the normal case
+ */
+ counter_base[L2LEARN_ERROR_HIT] += 1;
+ if (PREDICT_FALSE (result0->fields.timestamp != timestamp))
+ result0->fields.timestamp = timestamp;
+
+ }
+ else if (result0->raw == ~0)
+ {
+
+ /* The entry was not in table, so add it */
+
+ counter_base[L2LEARN_ERROR_MISS] += 1;
+
+ if (msm->global_learn_count == msm->global_learn_limit)
+ {
+ /*
+ * Global limit reached. Do not learn the mac but forward the packet.
+ * In the future, limits could also be per-interface or bridge-domain.
+ */
+ counter_base[L2LEARN_ERROR_LIMIT] += 1;
+ goto done;
+
+ }
+ else
+ {
+ BVT (clib_bihash_kv) kv;
+ /* It is ok to learn */
+
+ result0->raw = 0; /* clear all fields */
+ result0->fields.sw_if_index = sw_if_index0;
+ result0->fields.timestamp = timestamp;
+ kv.key = key0->raw;
+ kv.value = result0->raw;
+
+ BV (clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */ );
+
+ cached_key->raw = ~0; /* invalidate the cache */
+ msm->global_learn_count++;
+ }
+
+ }
+ else
+ {
+
+ /* The entry was in the table, but with the wrong sw_if_index mapping (mac move) */
+ counter_base[L2LEARN_ERROR_MAC_MOVE] += 1;
+
+ if (result0->fields.static_mac)
+ {
+ /*
+ * Don't overwrite a static mac
+ * TODO: Check violation policy. For now drop the packet
+ */
+ b0->error = node->errors[L2LEARN_ERROR_MAC_MOVE_VIOLATE];
+ *next0 = L2LEARN_NEXT_DROP;
+ }
+ else
+ {
+ /*
+ * Update the entry
+ * TODO: may want to rate limit mac moves
+ * TODO: check global/bridge domain/interface learn limits
+ */
+ BVT (clib_bihash_kv) kv;
+
+ result0->raw = 0; /* clear all fields */
+ result0->fields.sw_if_index = sw_if_index0;
+ result0->fields.timestamp = timestamp;
+
+ kv.key = key0->raw;
+ kv.value = result0->raw;
+
+ cached_key->raw = ~0; /* invalidate the cache */
+
+ BV (clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */ );
+ }
+ }
+
+ if (result0->fields.filter)
+ {
+ /* drop packet because lookup matched a filter mac entry */
+
+ if (*next0 != L2LEARN_NEXT_DROP)
+ {
+ /* if we're not already dropping the packet, do it now */
+ b0->error = node->errors[L2LEARN_ERROR_FILTER_DROP];
+ *next0 = L2LEARN_NEXT_DROP;
+ }
+ }
+
+done:
+ return;
+}
+
+
+static uword
+l2learn_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2learn_next_t next_index;
+ l2learn_main_t *msm = &l2learn_main;
+ vlib_node_t *n = vlib_get_node (vm, l2learn_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+ l2fib_entry_key_t cached_key;
+ l2fib_entry_result_t cached_result;
+ u8 timestamp = (u8) (vlib_time_now (vm) / 60);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ /* Clear the one-entry cache in case mac table was updated */
+ cached_key.raw = ~0;
+ cached_result.raw = ~0; /* warning be gone */
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+ ethernet_header_t *h0, *h1, *h2, *h3;
+ l2fib_entry_key_t key0, key1, key2, key3;
+ l2fib_entry_result_t result0, result1, result2, result3;
+ u32 bucket0, bucket1, bucket2, bucket3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
+
+ /* Process 4 x pkts */
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->bd_index = vnet_buffer (b1)->l2.bd_index;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->sw_if_index = sw_if_index2;
+ t->bd_index = vnet_buffer (b2)->l2.bd_index;
+ clib_memcpy (t->src, h2->src_address, 6);
+ clib_memcpy (t->dst, h2->dst_address, 6);
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->sw_if_index = sw_if_index3;
+ t->bd_index = vnet_buffer (b3)->l2.bd_index;
+ clib_memcpy (t->src, h3->src_address, 6);
+ clib_memcpy (t->dst, h3->dst_address, 6);
+ }
+ }
+
+ /* process 4 pkts */
+ vlib_node_increment_counter (vm, l2learn_node.index,
+ L2LEARN_ERROR_L2LEARN, 4);
+
+ l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result,
+ h0->src_address,
+ h1->src_address,
+ h2->src_address,
+ h3->src_address,
+ vnet_buffer (b0)->l2.bd_index,
+ vnet_buffer (b1)->l2.bd_index,
+ vnet_buffer (b2)->l2.bd_index,
+ vnet_buffer (b3)->l2.bd_index,
+ &key0, &key1, &key2, &key3,
+ &bucket0, &bucket1, &bucket2, &bucket3,
+ &result0, &result1, &result2, &result3);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b0, sw_if_index0, &key0, &cached_key,
+ &bucket0, &result0, &next0, timestamp);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b1, sw_if_index1, &key1, &cached_key,
+ &bucket1, &result1, &next1, timestamp);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b2, sw_if_index2, &key2, &cached_key,
+ &bucket2, &result2, &next2, timestamp);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b3, sw_if_index3, &key3, &cached_key,
+ &bucket3, &result3, &next3, timestamp);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t *h0;
+ l2fib_entry_key_t key0;
+ l2fib_entry_result_t result0;
+ u32 bucket0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2learn_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer (b0)->l2.bd_index;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ /* process 1 pkt */
+ vlib_node_increment_counter (vm, l2learn_node.index,
+ L2LEARN_ERROR_L2LEARN, 1);
+
+
+ l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result,
+ h0->src_address, vnet_buffer (b0)->l2.bd_index,
+ &key0, &bucket0, &result0);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b0, sw_if_index0, &key0, &cached_key,
+ &bucket0, &result0, &next0, timestamp);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2learn_node,static) = {
+ .function = l2learn_node_fn,
+ .name = "l2-learn",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2learn_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2learn_error_strings),
+ .error_strings = l2learn_error_strings,
+
+ .n_next_nodes = L2LEARN_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2LEARN_NEXT_DROP] = "error-drop",
+ [L2LEARN_NEXT_L2FWD] = "l2-fwd",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2learn_node, l2learn_node_fn)
+ clib_error_t *l2learn_init (vlib_main_t * vm)
+{
+ l2learn_main_t *mp = &l2learn_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2learn_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ mp->feat_next_node_index);
+
+ /* init the hash table ptr */
+ mp->mac_table = get_mac_table ();
+
+ /*
+ * Set the default number of dynamically learned macs to the number
+ * of buckets.
+ */
+ mp->global_learn_limit = L2FIB_NUM_BUCKETS * 16;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2learn_init);
+
+
+/**
+ * Set subinterface learn enable/disable.
+ * The CLI format is:
+ * set interface l2 learn <interface> [disable]
+ */
+static clib_error_t *
+int_learn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the interface flag */
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_LEARN, enable);
+
+done:
+ return error;
+}
+
+/*?
+ * Layer 2 learning can be enabled and disabled on each
+ * interface and on each bridge-domain. Use this command to
+ * manage interfaces. It is enabled by default.
+ *
+ * @cliexpar
+ * Example of how to enable learning:
+ * @cliexcmd{set interface l2 learn GigabitEthernet0/8/0}
+ * Example of how to disable learning:
+ * @cliexcmd{set interface l2 learn GigabitEthernet0/8/0 disable}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_learn_cli, static) = {
+ .path = "set interface l2 learn",
+ .short_help = "set interface l2 learn <interface> [disable]",
+ .function = int_learn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+l2learn_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ l2learn_main_t *mp = &l2learn_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "limit %d", &mp->global_learn_limit))
+ ;
+
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (l2learn_config, "l2learn");
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_learn.h b/src/vnet/l2/l2_learn.h
new file mode 100644
index 00000000000..5bb1130b015
--- /dev/null
+++ b/src/vnet/l2/l2_learn.h
@@ -0,0 +1,64 @@
+/*
+ * l2_learn.c : layer 2 learning using l2fib
+ *
+ * Copyright (c) 2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2learn_h
+#define included_l2learn_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+
+typedef struct
+{
+
+ /* Hash table */
+ BVT (clib_bihash) * mac_table;
+
+ /* number of dynamically learned mac entries */
+ u32 global_learn_count;
+
+ /* maximum number of dynamically learned mac entries */
+ u32 global_learn_limit;
+
+ /* Next nodes for each feature */
+ u32 feat_next_node_index[32];
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2learn_main_t;
+
+
+l2learn_main_t l2learn_main;
+
+extern vlib_node_registration_t l2fib_mac_age_scanner_process_node;
+
+enum
+{
+ L2_MAC_AGE_PROCESS_EVENT_START = 1,
+ L2_MAC_AGE_PROCESS_EVENT_STOP = 2,
+} l2_mac_age_process_event_t;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c
new file mode 100644
index 00000000000..953fcb0222a
--- /dev/null
+++ b/src/vnet/l2/l2_output.c
@@ -0,0 +1,708 @@
+/*
+ * l2_output.c : layer 2 output packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_output.h>
+
+
+/* Feature graph node names */
+static char *l2output_feat_names[] = {
+#define _(sym,name) name,
+ foreach_l2output_feat
+#undef _
+};
+
+char **
+l2output_get_feat_names (void)
+{
+ return l2output_feat_names;
+}
+
+l2output_main_t l2output_main;
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+} l2output_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2output_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2output_trace_t *t = va_arg (*args, l2output_trace_t *);
+
+ s = format (s, "l2-output: sw_if_index %d dst %U src %U",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src);
+ return s;
+}
+
+
+static char *l2output_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2output_error
+#undef _
+};
+
+/**
+ * Check for split horizon violations.
+ * Return 0 if split horizon check passes, otherwise return non-zero.
+ * Packets should not be transmitted out an interface with the same
+ * split-horizon group as the input interface, except if the @c shg is 0
+ * in which case the check always passes.
+ */
+static_always_inline u32
+split_horizon_violation (u8 shg1, u8 shg2)
+{
+ if (PREDICT_TRUE (shg1 == 0))
+ {
+ return 0;
+ }
+ else
+ {
+ return shg1 == shg2;
+ }
+}
+
+static_always_inline void
+l2output_vtr (vlib_node_runtime_t * node, l2_output_config_t * config,
+ u32 feature_bitmap, vlib_buffer_t * b, u32 * next)
+{
+ if (PREDICT_FALSE (config->out_vtr_flag))
+ {
+ /* Perform pre-vtr EFP filter check if configured */
+ if (config->output_vtr.push_and_pop_bytes)
+ {
+ /*
+ * Perform output vlan tag rewrite and the pre-vtr EFP filter check.
+ * The EFP Filter only needs to be run if there is an output VTR
+ * configured. The flag for the post-vtr EFP Filter node is used
+ * to trigger the pre-vtr check as well.
+ */
+ u32 failed1 = (feature_bitmap & L2OUTPUT_FEAT_EFP_FILTER)
+ && (l2_efp_filter_process (b, &(config->input_vtr)));
+ u32 failed2 = l2_vtr_process (b, &(config->output_vtr));
+
+ if (PREDICT_FALSE (failed1 | failed2))
+ {
+ *next = L2OUTPUT_NEXT_DROP;
+ if (failed2)
+ {
+ b->error = node->errors[L2OUTPUT_ERROR_VTR_DROP];
+ }
+ if (failed1)
+ {
+ b->error = node->errors[L2OUTPUT_ERROR_EFP_DROP];
+ }
+ }
+ }
+ // perform the PBB rewrite
+ else if (config->output_pbb_vtr.push_and_pop_bytes)
+ {
+ u32 failed = l2_pbb_process (b, &(config->output_pbb_vtr));
+ if (PREDICT_FALSE (failed))
+ {
+ *next = L2OUTPUT_NEXT_DROP;
+ b->error = node->errors[L2OUTPUT_ERROR_VTR_DROP];
+ }
+ }
+ }
+}
+
+
+static vlib_node_registration_t l2output_node;
+
+static uword
+l2output_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2output_next_t next_index;
+ l2output_main_t *msm = &l2output_main;
+ u32 cached_sw_if_index;
+ u32 cached_next_index;
+
+ /* Invalidate cache */
+ cached_sw_if_index = ~0;
+ cached_next_index = ~0; /* warning be gone */
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+ ethernet_header_t *h0, *h1, *h2, *h3;
+ l2_output_config_t *config0, *config1, *config2, *config3;
+ u32 feature_bitmap0, feature_bitmap1;
+ u32 feature_bitmap2, feature_bitmap3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ /* TX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->sw_if_index = sw_if_index2;
+ clib_memcpy (t->src, h2->src_address, 6);
+ clib_memcpy (t->dst, h2->dst_address, 6);
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->sw_if_index = sw_if_index3;
+ clib_memcpy (t->src, h3->src_address, 6);
+ clib_memcpy (t->dst, h3->dst_address, 6);
+ }
+ }
+
+ vlib_node_increment_counter (vm, l2output_node.index,
+ L2OUTPUT_ERROR_L2OUTPUT, 4);
+
+ /* Get config for the output interface */
+ config0 = vec_elt_at_index (msm->configs, sw_if_index0);
+ config1 = vec_elt_at_index (msm->configs, sw_if_index1);
+ config2 = vec_elt_at_index (msm->configs, sw_if_index2);
+ config3 = vec_elt_at_index (msm->configs, sw_if_index3);
+
+ /*
+ * Get features from the config
+ * TODO: mask out any non-applicable features
+ */
+ feature_bitmap0 = config0->feature_bitmap;
+ feature_bitmap1 = config1->feature_bitmap;
+ feature_bitmap2 = config2->feature_bitmap;
+ feature_bitmap3 = config3->feature_bitmap;
+
+ /* Determine next node */
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2output_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0, sw_if_index0, feature_bitmap0, &next0);
+
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2output_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b1, sw_if_index1, feature_bitmap1, &next1);
+
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2output_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b2, sw_if_index2, feature_bitmap2, &next2);
+
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2output_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b3, sw_if_index3, feature_bitmap3, &next3);
+
+ l2output_vtr (node, config0, feature_bitmap0, b0, &next0);
+ l2output_vtr (node, config1, feature_bitmap1, b1, &next1);
+ l2output_vtr (node, config2, feature_bitmap2, b2, &next2);
+ l2output_vtr (node, config3, feature_bitmap3, b3, &next3);
+
+ /*
+ * Perform the split horizon check
+ * The check can only fail for non-zero shg's
+ */
+ if (PREDICT_FALSE (config0->shg + config1->shg +
+ config2->shg + config3->shg))
+ {
+ /* one of the checks might fail, check both */
+ if (split_horizon_violation
+ (config0->shg, vnet_buffer (b0)->l2.shg))
+ {
+ next0 = L2OUTPUT_NEXT_DROP;
+ b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ if (split_horizon_violation
+ (config1->shg, vnet_buffer (b1)->l2.shg))
+ {
+ next1 = L2OUTPUT_NEXT_DROP;
+ b1->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ if (split_horizon_violation
+ (config2->shg, vnet_buffer (b2)->l2.shg))
+ {
+ next2 = L2OUTPUT_NEXT_DROP;
+ b2->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ if (split_horizon_violation
+ (config3->shg, vnet_buffer (b3)->l2.shg))
+ {
+ next3 = L2OUTPUT_NEXT_DROP;
+ b3->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t *h0;
+ l2_output_config_t *config0;
+ u32 feature_bitmap0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ h0 = vlib_buffer_get_current (b0);
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ vlib_node_increment_counter (vm, l2output_node.index,
+ L2OUTPUT_ERROR_L2OUTPUT, 1);
+
+ /* Get config for the output interface */
+ config0 = vec_elt_at_index (msm->configs, sw_if_index0);
+
+ /*
+ * Get features from the config
+ * TODO: mask out any non-applicable features
+ */
+ feature_bitmap0 = config0->feature_bitmap;
+
+ /* Determine next node */
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2output_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0, sw_if_index0, feature_bitmap0, &next0);
+
+ l2output_vtr (node, config0, feature_bitmap0, b0, &next0);
+
+ /* Perform the split horizon check */
+ if (PREDICT_FALSE
+ (split_horizon_violation
+ (config0->shg, vnet_buffer (b0)->l2.shg)))
+ {
+ next0 = L2OUTPUT_NEXT_DROP;
+ b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2output_node,static) = {
+ .function = l2output_node_fn,
+ .name = "l2-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2output_error_strings),
+ .error_strings = l2output_error_strings,
+
+ .n_next_nodes = L2OUTPUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2OUTPUT_NEXT_DROP] = "error-drop",
+ [L2OUTPUT_NEXT_BAD_INTF] = "l2-output-bad-intf",
+ },
+};
+/* *INDENT-ON* */
+
+
+#define foreach_l2output_bad_intf_error \
+_(DROP, "L2 output to interface not in L2 mode or deleted")
+
+static char *l2output_bad_intf_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2output_bad_intf_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) L2OUTPUT_BAD_INTF_ERROR_##sym,
+ foreach_l2output_bad_intf_error
+#undef _
+ L2OUTPUT_BAD_INTF_N_ERROR,
+} l2output_bad_intf_error_t;
+
+
+/**
+ * Output node for interfaces/tunnels which was in L2 mode but were changed
+ * to L3 mode or possibly deleted thereafter. On changing forwarding mode
+ * of any tunnel/interface from L2 to L3, its entry in l2_output_main table
+ * next_nodes.output_node_index_vec[sw_if_index] MUST be set to the value of
+ * L2OUTPUT_NEXT_BAD_INTF. Thus, if there are stale entries in the L2FIB for
+ * this sw_if_index, l2-output will send packets for this sw_if_index to the
+ * l2-output-bad-intf node which just setup the proper drop reason before
+ * sending packets to the error-drop node to drop the packet. Then, stale L2FIB
+ * entries for delted tunnels won't cause possible packet or memory corrpution.
+ */
+static vlib_node_registration_t l2output_bad_intf_node;
+
+static uword
+l2output_bad_intf_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2output_next_t next_index = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b0->error = node->errors[L2OUTPUT_BAD_INTF_ERROR_DROP];
+ b1->error = node->errors[L2OUTPUT_BAD_INTF_ERROR_DROP];
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->error = node->errors[L2OUTPUT_BAD_INTF_ERROR_DROP];
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2output_bad_intf_node,static) = {
+ .function = l2output_bad_intf_node_fn,
+ .name = "l2-output-bad-intf",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2output_bad_intf_error_strings),
+ .error_strings = l2output_bad_intf_error_strings,
+
+ .n_next_nodes = 1,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2output_node, l2output_node_fn)
+ clib_error_t *l2output_init (vlib_main_t * vm)
+{
+ l2output_main_t *mp = &l2output_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Create the config vector */
+ vec_validate (mp->configs, 100);
+ /* Until we hook up the CLI config, just create 100 sw interface entries and zero them */
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2output_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ mp->next_nodes.feat_next_node_index);
+
+ /* Initialize the output node mapping table */
+ l2output_init_output_node_vec (&mp->next_nodes.output_node_index_vec);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2output_init);
+
+typedef struct
+{
+ u32 node_index;
+ u32 sw_if_index;
+} output_node_mapping_rpc_args_t;
+
+static void output_node_rpc_callback (output_node_mapping_rpc_args_t * a);
+
+static void
+output_node_mapping_send_rpc (u32 node_index, u32 sw_if_index)
+{
+ output_node_mapping_rpc_args_t args;
+ void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+ args.node_index = node_index;
+ args.sw_if_index = sw_if_index;
+
+ vl_api_rpc_call_main_thread (output_node_rpc_callback,
+ (u8 *) & args, sizeof (args));
+}
+
+
+/** Create a mapping in the next node mapping table for the given sw_if_index. */
+u32
+l2output_create_output_node_mapping (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 node_index, /* index of current node */
+ u32 * output_node_index_vec,
+ u32 sw_if_index)
+{
+
+ u32 next; /* index of next graph node */
+ vnet_hw_interface_t *hw0;
+ u32 *node;
+
+ hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+
+ uword cpu_number;
+
+ cpu_number = os_get_cpu_number ();
+
+ if (cpu_number)
+ {
+ u32 oldflags;
+
+ oldflags = __sync_fetch_and_or (&hw0->flags,
+ VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED);
+
+ if ((oldflags & VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED))
+ return L2OUTPUT_NEXT_DROP;
+
+ output_node_mapping_send_rpc (node_index, sw_if_index);
+ return L2OUTPUT_NEXT_DROP;
+ }
+
+ /* dynamically create graph node arc */
+ next = vlib_node_add_next (vlib_main, node_index, hw0->output_node_index);
+
+ /* Initialize vector with the mapping */
+
+ node = vec_elt_at_index (output_node_index_vec, sw_if_index);
+ *node = next;
+
+ /* reset mapping bit, includes memory barrier */
+ __sync_fetch_and_and (&hw0->flags, ~VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED);
+
+ return next;
+}
+
+void
+output_node_rpc_callback (output_node_mapping_rpc_args_t * a)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ l2output_main_t *mp = &l2output_main;
+
+ (void) l2output_create_output_node_mapping
+ (vm, vnm, a->node_index, mp->next_nodes.output_node_index_vec,
+ a->sw_if_index);
+}
+
+/* Get a pointer to the config for the given interface */
+l2_output_config_t *
+l2output_intf_config (u32 sw_if_index)
+{
+ l2output_main_t *mp = &l2output_main;
+
+ vec_validate (mp->configs, sw_if_index);
+ return vec_elt_at_index (mp->configs, sw_if_index);
+}
+
+/** Enable (or disable) the feature in the bitmap for the given interface. */
+void
+l2output_intf_bitmap_enable (u32 sw_if_index, u32 feature_bitmap, u32 enable)
+{
+ l2output_main_t *mp = &l2output_main;
+ l2_output_config_t *config;
+
+ vec_validate (mp->configs, sw_if_index);
+ config = vec_elt_at_index (mp->configs, sw_if_index);
+
+ if (enable)
+ {
+ config->feature_bitmap |= feature_bitmap;
+ }
+ else
+ {
+ config->feature_bitmap &= ~feature_bitmap;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_output.h b/src/vnet/l2/l2_output.h
new file mode 100644
index 00000000000..c683b1ade73
--- /dev/null
+++ b/src/vnet/l2/l2_output.h
@@ -0,0 +1,285 @@
+/*
+ * l2_output.h : layer 2 output packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_output_h
+#define included_vnet_l2_output_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+
+
+/* The L2 output feature configuration, a per-interface struct */
+typedef struct
+{
+
+ u32 feature_bitmap;
+
+ /*
+ * vlan tag rewrite for ingress and egress
+ * ingress vtr is located here because the same config data is used for
+ * the egress EFP filter check
+ */
+ vtr_config_t input_vtr;
+ vtr_config_t output_vtr;
+ ptr_config_t input_pbb_vtr;
+ ptr_config_t output_pbb_vtr;
+
+ /* some of these flags may get integrated into the feature bitmap */
+ u8 fwd_enable;
+ u8 flood_enable;
+
+ /* split horizon group */
+ u8 shg;
+
+ /* flag for output vtr operation */
+ u8 out_vtr_flag;
+
+} l2_output_config_t;
+
+
+/*
+ * The set of next nodes for features and interface output.
+ * Each output feature node should include this.
+ */
+typedef struct
+{
+ /*
+ * vector of output next node index, indexed by sw_if_index.
+ * used when all output features have been executed and the
+ * next nodes are the interface output nodes.
+ */
+ u32 *output_node_index_vec;
+
+ /*
+ * array of next node index for each output feature, indexed
+ * by l2output_feat_t. Used to determine next feature node.
+ */
+ u32 feat_next_node_index[32];
+
+} l2_output_next_nodes_st;
+
+
+typedef struct
+{
+ /* Next nodes for features and output interfaces */
+ l2_output_next_nodes_st next_nodes;
+
+ /* config vector indexed by sw_if_index */
+ l2_output_config_t *configs;
+
+ /* Convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2output_main_t;
+
+l2output_main_t l2output_main;
+
+/* L2 output features */
+
+/* Mappings from feature ID to graph node name */
+#define foreach_l2output_feat \
+ _(SPAN, "feature-bitmap-drop") \
+ _(CFM, "feature-bitmap-drop") \
+ _(QOS, "feature-bitmap-drop") \
+ _(ACL, "l2-output-acl") \
+ _(L2PT, "feature-bitmap-drop") \
+ _(EFP_FILTER, "l2-efp-filter") \
+ _(IPIW, "feature-bitmap-drop") \
+ _(STP_BLOCKED, "feature-bitmap-drop") \
+ _(LINESTATUS_DOWN, "feature-bitmap-drop") \
+ _(OUTPUT_CLASSIFY, "l2-output-classify") \
+ _(XCRW, "l2-xcrw")
+
+/* Feature bitmap positions */
+typedef enum
+{
+#define _(sym,str) L2OUTPUT_FEAT_##sym##_BIT,
+ foreach_l2output_feat
+#undef _
+ L2OUTPUT_N_FEAT,
+} l2output_feat_t;
+
+/* Feature bit masks */
+typedef enum
+{
+#define _(sym,str) L2OUTPUT_FEAT_##sym = (1<<L2OUTPUT_FEAT_##sym##_BIT),
+ foreach_l2output_feat
+#undef _
+} l2output_feat_masks_t;
+
+#define foreach_l2output_error \
+_(L2OUTPUT, "L2 output packets") \
+_(EFP_DROP, "L2 EFP filter pre-rewrite drops") \
+_(VTR_DROP, "L2 output tag rewrite drops") \
+_(SHG_DROP, "L2 split horizon drops") \
+_(DROP, "L2 output drops") \
+_(MAPPING_DROP, "L2 Output interface mapping in progress")
+
+typedef enum
+{
+ L2OUTPUT_NEXT_DROP,
+ L2OUTPUT_NEXT_BAD_INTF,
+ L2OUTPUT_N_NEXT,
+} l2output_next_t;
+
+typedef enum
+{
+#define _(sym,str) L2OUTPUT_ERROR_##sym,
+ foreach_l2output_error
+#undef _
+ L2OUTPUT_N_ERROR,
+} l2output_error_t;
+
+/* Return an array of strings containing graph node names of each feature */
+char **l2output_get_feat_names (void);
+
+
+/**
+ * The next set of functions is for use by output feature graph nodes.
+ * When the last bit has been cleared from the output feature bitmap,
+ * the next node is the output graph node for the TX sw_if_index.
+ * These functions help the feature nodes get that node index.
+ */
+
+/* Create a mapping to the output graph node for the given sw_if_index */
+u32 l2output_create_output_node_mapping (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 node_index, /* index of current node */
+ u32 * output_node_index_vec,
+ u32 sw_if_index);
+
+/* Initialize the next node mapping table */
+always_inline void
+l2output_init_output_node_vec (u32 ** output_node_index_vec)
+{
+
+ /*
+ * Size it at 100 sw_if_indexes initially
+ * Uninitialized mappings are set to ~0
+ */
+ vec_validate_init_empty (*output_node_index_vec, 100, ~0);
+}
+
+
+/**
+ * Get a mapping from the output node mapping table,
+ * creating the entry if necessary.
+ */
+always_inline u32
+l2output_get_output_node (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 node_index, /* index of current node */
+ u32 sw_if_index, u32 ** output_node_index_vec) /* may be updated */
+{
+ u32 next; /* index of next graph node */
+
+ /* Insure the vector is big enough */
+ vec_validate_init_empty (*output_node_index_vec, sw_if_index, ~0);
+
+ /* Get the mapping for the sw_if_index */
+ next = vec_elt (*output_node_index_vec, sw_if_index);
+
+ if (next == ~0)
+ {
+ /* Mapping doesn't exist so create it */
+ next = l2output_create_output_node_mapping (vlib_main,
+ vnet_main,
+ node_index,
+ *output_node_index_vec,
+ sw_if_index);
+ }
+
+ return next;
+}
+
+
+/** Determine the next L2 node based on the output feature bitmap */
+always_inline void
+l2_output_dispatch (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ vlib_node_runtime_t * node,
+ u32 node_index,
+ u32 * cached_sw_if_index,
+ u32 * cached_next_index,
+ l2_output_next_nodes_st * next_nodes,
+ vlib_buffer_t * b0,
+ u32 sw_if_index, u32 feature_bitmap, u32 * next0)
+{
+ if (feature_bitmap)
+ {
+ /* There are some features to execute */
+
+ /* Save bitmap for the next feature graph nodes */
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap;
+
+ /* Determine the next node */
+ *next0 =
+ feat_bitmap_get_next_node_index (next_nodes->feat_next_node_index,
+ feature_bitmap);
+ }
+ else
+ {
+ /*
+ * There are no features. Send packet to TX node for sw_if_index0
+ * This is a little tricky in that the output interface next node indexes
+ * are not precomputed at init time.
+ */
+
+ if (sw_if_index == *cached_sw_if_index)
+ {
+ /* We hit in the one-entry cache. Use it. */
+ *next0 = *cached_next_index;
+ }
+ else
+ {
+ /* Look up the output TX node */
+ *next0 = l2output_get_output_node (vlib_main,
+ vnet_main,
+ node_index,
+ sw_if_index,
+ &next_nodes->output_node_index_vec);
+
+ if (*next0 == L2OUTPUT_NEXT_DROP)
+ {
+ vnet_hw_interface_t *hw0;
+ hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+
+ if (hw0->flags & VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED)
+ b0->error = node->errors[L2OUTPUT_ERROR_MAPPING_DROP];
+ }
+
+ /* Update the one-entry cache */
+ *cached_sw_if_index = sw_if_index;
+ *cached_next_index = *next0;
+ }
+ }
+}
+
+/** Get a pointer to the config for the given interface */
+l2_output_config_t *l2output_intf_config (u32 sw_if_index);
+
+/** Enable (or disable) the feature in the bitmap for the given interface */
+void l2output_intf_bitmap_enable (u32 sw_if_index,
+ u32 feature_bitmap, u32 enable);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_output_acl.c b/src/vnet/l2/l2_output_acl.c
new file mode 100644
index 00000000000..94a4d66b48f
--- /dev/null
+++ b/src/vnet/l2/l2_output_acl.c
@@ -0,0 +1,358 @@
+/*
+ * l2_output_acl.c : layer 2 output acl processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+
+typedef struct
+{
+ /* Next nodes for features and output interfaces */
+ l2_output_next_nodes_st next_nodes;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_outacl_main_t;
+
+
+
+typedef struct
+{
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 next_index;
+ u32 sw_if_index;
+} l2_outacl_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_outacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_outacl_trace_t *t = va_arg (*args, l2_outacl_trace_t *);
+
+ s = format (s, "l2-output-acl: sw_if_index %d dst %U src %U",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src);
+ return s;
+}
+
+l2_outacl_main_t l2_outacl_main;
+
+static vlib_node_registration_t l2_outacl_node;
+
+#define foreach_l2_outacl_error \
+_(L2_OUTACL, "L2 output ACL packets") \
+_(DROP, "L2 output drops")
+
+typedef enum
+{
+#define _(sym,str) L2_OUTACL_ERROR_##sym,
+ foreach_l2_outacl_error
+#undef _
+ L2_OUTACL_N_ERROR,
+} l2_outacl_error_t;
+
+static char *l2_outacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_outacl_error
+#undef _
+};
+
+typedef enum
+{
+ L2_OUTACL_NEXT_DROP,
+ L2_OUTACL_N_NEXT,
+} l2_outacl_next_t;
+
+
+
+static uword
+l2_outacl_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_outacl_next_t next_index;
+ l2_outacl_main_t *msm = &l2_outacl_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_outacl_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+ u32 cached_sw_if_index = (u32) ~ 0;
+ u32 cached_next_index = (u32) ~ 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (0 && n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t *h0, *h1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* TX interface handles */
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_outacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_outacl_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ }
+ }
+
+ em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] +=
+ 2;
+
+ /* add core loop code here */
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t *h0;
+ u32 feature_bitmap0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_outacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ }
+
+ em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] +=
+ 1;
+
+ /*
+ * L2_OUTACL code
+ * Dummy for now, just go to next feature node
+ */
+
+
+ /* Remove ourself from the feature bitmap */
+ feature_bitmap0 =
+ vnet_buffer (b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_ACL;
+
+ /* Determine next node */
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2_outacl_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0, sw_if_index0, feature_bitmap0, &next0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_outacl_node,static) = {
+ .function = l2_outacl_node_fn,
+ .name = "l2-output-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_outacl_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_outacl_error_strings),
+ .error_strings = l2_outacl_error_strings,
+
+ .n_next_nodes = L2_OUTACL_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_OUTACL_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_outacl_node, l2_outacl_node_fn)
+ clib_error_t *l2_outacl_init (vlib_main_t * vm)
+{
+ l2_outacl_main_t *mp = &l2_outacl_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_outacl_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ mp->next_nodes.feat_next_node_index);
+
+ /* Initialize the output node mapping table */
+ l2output_init_output_node_vec (&mp->next_nodes.output_node_index_vec);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_outacl_init);
+
+#if 0
+/** @todo maybe someone will add output ACL's in the future.
+ * Set subinterface outacl enable/disable.
+ * The CLI format is:
+ * set interface acl output <interface> [disable]
+ */
+static clib_error_t *
+int_l2_outacl (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable"))
+ {
+ enable = 0;
+ }
+
+ /* set the interface flag */
+ l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_ACL, enable);
+
+done:
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_outacl_cli, static) = {
+ .path = "set interface acl output",
+ .short_help = "set interface acl output <interface> [disable]",
+ .function = int_l2_outacl,
+};
+/* *INDENT-ON* */
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c
new file mode 100644
index 00000000000..27d5eb39514
--- /dev/null
+++ b/src/vnet/l2/l2_output_classify.c
@@ -0,0 +1,657 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/l2/l2_classify.h>
+#include <vnet/api_errno.h>
+
+/**
+ * @file
+ * @brief Layer 2 Output Classifier.
+ *
+ * @sa @ref vnet/vnet/classify/vnet_classify.c
+ * @sa @ref vnet/vnet/classify/vnet_classify.h
+ */
+
+typedef struct
+{
+ /** interface handle for the ith packet */
+ u32 sw_if_index;
+ /** graph arc index selected for this packet */
+ u32 next_index;
+ /** classifier table which provided the final result */
+ u32 table_index;
+ /** offset in classifier heap of the corresponding session */
+ u32 session_offset;
+} l2_output_classify_trace_t;
+
+typedef struct
+{
+ /** use-case independent main object pointer */
+ vnet_classify_main_t *vcm;
+ /** l2 input classifier main object pointer */
+ l2_output_classify_main_t *l2cm;
+} l2_output_classify_runtime_t;
+
+/** Packet trace format function. */
+static u8 *
+format_l2_output_classify_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_output_classify_trace_t *t =
+ va_arg (*args, l2_output_classify_trace_t *);
+
+ s = format (s, "l2-classify: sw_if_index %d, table %d, offset %x, next %d",
+ t->sw_if_index, t->table_index, t->session_offset,
+ t->next_index);
+ return s;
+}
+
+/** l2 output classifier main data structure. */
+l2_output_classify_main_t l2_output_classify_main;
+
+vlib_node_registration_t l2_output_classify_node;
+
+#define foreach_l2_output_classify_error \
+_(MISS, "Classify misses") \
+_(HIT, "Classify hits") \
+_(CHAIN_HIT, "Classify hits after chain walk") \
+_(DROP, "L2 Classify Drops")
+
+typedef enum
+{
+#define _(sym,str) L2_OUTPUT_CLASSIFY_ERROR_##sym,
+ foreach_l2_output_classify_error
+#undef _
+ L2_OUTPUT_CLASSIFY_N_ERROR,
+} l2_output_classify_error_t;
+
+static char *l2_output_classify_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_output_classify_error
+#undef _
+};
+
+/**
+ * @brief l2 output classifier node.
+ * @node l2-output-classify
+ *
+ * This is the l2 output classifier dispatch node
+ *
+ * @param vm vlib_main_t corresponding to the current thread.
+ * @param node vlib_node_runtime_t data for this node.
+ * @param frame vlib_frame_t whose contents should be dispatched.
+ *
+ * @par Graph mechanics: buffer metadata, next index usage
+ *
+ * @em Uses:
+ * - <code>(l2_output_classify_runtime_t *)
+ * rt->classify_table_index_by_sw_if_index</code>
+ * Head of the per-interface, perprotocol classifier table chain
+ * for a specific interface. ~0 => send pkts to the next
+ * feature in the L2 feature chain.
+ * - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
+ * - Indicates the @c sw_if_index value of the interface that the
+ * packet was received on.
+ * - <code>vnet_buffer (b0)->l2.feature_bitmap</code>
+ * - Used to steer packets across l2 features enabled on the interface
+ * - <code>(vnet_classify_entry_t) e0->next_index</code>
+ * - Used to steer traffic when the classifier hits on a session
+ * - <code>(vnet_classify_entry_t) e0->advance</code>
+ * - Signed quantity applied via <code>vlib_buffer_advance</code>
+ * when the classifier hits on a session
+ * - <code>(vnet_classify_table_t) t0->miss_next_index</code>
+ * - Used to steer traffic when the classifier misses
+ *
+ * @em Sets:
+ * - <code>vnet_buffer (b0)->l2_classify.table_index</code>
+ * - Classifier table index of the first classifier table in
+ * the classifier table chain
+ * - <code>vnet_buffer (b0)->l2_classify.hash</code>
+ * - Bounded-index extensible hash corresponding to the
+ * masked fields in the current packet
+ * - <code>vnet_buffer (b0)->l2.feature_bitmap</code>
+ * - Used to steer packets across l2 features enabled on the interface
+ * - <code>vnet_buffer (b0)->l2_classify.opaque_index</code>
+ * - Copied from the classifier session object upon classifier hit
+ *
+ * @em Counters:
+ * - <code>L2_OUTPUT_CLASSIFY_ERROR_MISS</code> Classifier misses
+ * - <code>L2_OUTPUT_CLASSIFY_ERROR_HIT</code> Classifier hits
+ * - <code>L2_OUTPUT_CLASSIFY_ERROR_CHAIN_HIT</code>
+ * Classifier hits in other than the first table
+ */
+
+static uword
+l2_output_classify_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_output_classify_next_t next_index;
+ l2_output_classify_main_t *cm = &l2_output_classify_main;
+ vnet_classify_main_t *vcm = cm->vnet_classify_main;
+ l2_output_classify_runtime_t *rt =
+ (l2_output_classify_runtime_t *) node->runtime_data;
+ u32 feature_bitmap0;
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ f64 now;
+ u32 n_next_nodes;
+ u32 cached_sw_if_index = (u32) ~ 0;
+ u32 cached_next_index = (u32) ~ 0;
+ u32 sw_if_index0;
+
+ n_next_nodes = node->n_next_nodes;
+
+ now = vlib_time_now (vm);
+
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ /* First pass: compute hash */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ ethernet_header_t *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u16 type0, type1;
+ int type_index0, type_index1;
+ vnet_classify_table_t *t0, *t1;
+ u32 table_index0, table_index1;
+ u64 hash0, hash1;
+
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ vnet_buffer (b0)->l2_classify.table_index = ~0;
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ vnet_buffer (b1)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+ type1 = clib_net_to_host_u16 (h1->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP4 : L2_OUTPUT_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP6 : type_index0;
+
+ type_index1 = (type1 == ETHERNET_TYPE_IP4)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP4 : L2_OUTPUT_CLASSIFY_TABLE_OTHER;
+ type_index1 = (type1 == ETHERNET_TYPE_IP6)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP6 : type_index1;
+
+ vnet_buffer (b0)->l2_classify.table_index =
+ table_index0 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer (b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+
+ vnet_buffer (b1)->l2_classify.table_index =
+ table_index1 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index1][sw_if_index1];
+
+ if (table_index1 != ~0)
+ {
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer (b1)->l2_classify.hash = hash1 =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+ vnet_classify_prefetch_bucket (t1, hash1);
+ }
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ ethernet_header_t *h0;
+ u16 type0;
+ u32 type_index0;
+ vnet_classify_table_t *t0;
+ u32 table_index0;
+ u64 hash0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ vnet_buffer (b0)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP4 : L2_OUTPUT_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_OUTPUT_CLASSIFY_TABLE_IP6 : type_index0;
+
+ vnet_buffer (b0)->l2_classify.table_index =
+ table_index0 = rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer (b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ~0;
+ ethernet_header_t *h0;
+ u32 table_index0;
+ u64 hash0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+
+ if (PREDICT_TRUE (n_left_from > 2))
+ {
+ vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]);
+ u64 phash2;
+ u32 table_index2;
+ vnet_classify_table_t *tp2;
+
+ /*
+ * Prefetch table entry two ahead. Buffer / data
+ * were prefetched above...
+ */
+ table_index2 = vnet_buffer (p2)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index2 != ~0))
+ {
+ tp2 = pool_elt_at_index (vcm->tables, table_index2);
+ phash2 = vnet_buffer (p2)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp2, phash2);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+ /* Remove ourself from the feature bitmap */
+ feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap
+ & ~L2OUTPUT_FEAT_OUTPUT_CLASSIFY;
+
+ /* save for next feature graph nodes */
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap0;
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ }
+ else
+ {
+ while (1)
+ {
+ if (t0->next_table_index != ~0)
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < n_next_nodes) ?
+ t0->miss_next_index : next0;
+ misses++;
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 =
+ vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ chain_hits++;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE (next0 == 0))
+ b0->error = node->errors[L2_OUTPUT_CLASSIFY_ERROR_DROP];
+
+ if (PREDICT_FALSE (next0 == ~0))
+ {
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ /* Determine next node */
+ l2_output_dispatch (cm->vlib_main,
+ cm->vnet_main,
+ node,
+ l2_output_classify_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &cm->next_nodes,
+ b0, sw_if_index0, feature_bitmap0, &next0);
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_output_classify_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ t->table_index = table_index0;
+ t->next_index = next0;
+ t->session_offset = e0 ? vnet_classify_get_offset (t0, e0) : 0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_OUTPUT_CLASSIFY_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_OUTPUT_CLASSIFY_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_OUTPUT_CLASSIFY_ERROR_CHAIN_HIT,
+ chain_hits);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_output_classify_node) = {
+ .function = l2_output_classify_node_fn,
+ .name = "l2-output-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_output_classify_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_output_classify_error_strings),
+ .error_strings = l2_output_classify_error_strings,
+
+ .runtime_data_bytes = sizeof (l2_output_classify_runtime_t),
+
+ .n_next_nodes = L2_OUTPUT_CLASSIFY_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_OUTPUT_CLASSIFY_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_output_classify_node,
+ l2_output_classify_node_fn);
+
+/** l2 output classsifier feature initialization. */
+clib_error_t *
+l2_output_classify_init (vlib_main_t * vm)
+{
+ l2_output_classify_main_t *cm = &l2_output_classify_main;
+ l2_output_classify_runtime_t *rt;
+
+ rt = vlib_node_get_runtime_data (vm, l2_output_classify_node.index);
+
+ cm->vlib_main = vm;
+ cm->vnet_main = vnet_get_main ();
+ cm->vnet_classify_main = &vnet_classify_main;
+
+ /* Initialize the feature next-node indexes */
+ feat_bitmap_init_next_nodes (vm,
+ l2_output_classify_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names (),
+ cm->feat_next_node_index);
+ rt->l2cm = cm;
+ rt->vcm = cm->vnet_classify_main;
+
+ /* Initialize the output node mapping table */
+ l2output_init_output_node_vec (&cm->next_nodes.output_node_index_vec);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_output_classify_init);
+
+/** Enable/disable l2 input classification on a specific interface. */
+void
+vnet_l2_output_classify_enable_disable (u32 sw_if_index, int enable_disable)
+{
+
+ l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_OUTPUT_CLASSIFY,
+ (u32) enable_disable);
+}
+
+/** @brief Set l2 per-protocol, per-interface output classification tables.
+ *
+ * @param sw_if_index interface handle
+ * @param ip4_table_index ip4 classification table index, or ~0
+ * @param ip6_table_index ip6 classification table index, or ~0
+ * @param other_table_index non-ip4, non-ip6 classification table index,
+ * or ~0
+ * @returns 0 on success, VNET_API_ERROR_NO_SUCH_TABLE, TABLE2, TABLE3
+ * if the indicated (non-~0) table does not exist.
+ */
+
+int
+vnet_l2_output_classify_set_tables (u32 sw_if_index,
+ u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 other_table_index)
+{
+ l2_output_classify_main_t *cm = &l2_output_classify_main;
+ vnet_classify_main_t *vcm = cm->vnet_classify_main;
+
+ /* Assume that we've validated sw_if_index in the API layer */
+
+ if (ip4_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip4_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ if (ip6_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip6_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE2;
+
+ if (other_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, other_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE3;
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP4],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP6],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_OTHER],
+ sw_if_index);
+
+ cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP4]
+ [sw_if_index] = ip4_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP6]
+ [sw_if_index] = ip6_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_OTHER]
+ [sw_if_index] = other_table_index;
+
+ return 0;
+}
+
+static clib_error_t *
+int_l2_output_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ~0;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 other_table_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "ip4-table %d", &ip4_table_index))
+ ;
+ else if (unformat (input, "ip6-table %d", &ip6_table_index))
+ ;
+ else if (unformat (input, "other-table %d", &other_table_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface must be specified");
+
+
+ if (ip4_table_index == ~0 && ip6_table_index == ~0
+ && other_table_index == ~0)
+ {
+ vlib_cli_output (vm, "L2 classification disabled");
+ vnet_l2_output_classify_enable_disable (sw_if_index, 0 /* enable */ );
+ return 0;
+ }
+
+ rv = vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index,
+ other_table_index);
+ switch (rv)
+ {
+ case 0:
+ vnet_l2_output_classify_enable_disable (sw_if_index, 1 /* enable */ );
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_l2_output_classify_set_tables: %d",
+ rv);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure Layer 2 output classification.
+ *
+ * @cliexpar
+ * @cliexstart{set interface l2 output classify intfc <interface-name> [ip4-table <index>] [ip6-table <index>] [other-table <index>]}
+ * @cliexend
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = {
+ .path = "set interface l2 output classify",
+ .short_help =
+ "set interface l2 output classify intfc <<interface-name>> [ip4-table <n>]\n"
+ " [ip6-table <n>] [other-table <n>]",
+ .function = int_l2_output_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c
new file mode 100644
index 00000000000..5e4691f45c7
--- /dev/null
+++ b/src/vnet/l2/l2_patch.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/feature/feature.h>
+#include <vppinfra/error.h>
+
+typedef struct
+{
+ /* vector of dispositions, indexed by rx_sw_if_index */
+ u32 *tx_next_by_rx_sw_if_index;
+ u32 *tx_sw_if_index_by_rx_sw_if_index;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_patch_main_t;
+
+typedef struct
+{
+ u32 rx_sw_if_index;
+ u32 tx_sw_if_index;
+} l2_patch_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_patch_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_patch_trace_t *t = va_arg (*args, l2_patch_trace_t *);
+
+ s = format (s, "L2_PATCH: rx %d tx %d", t->rx_sw_if_index,
+ t->tx_sw_if_index);
+ return s;
+}
+
+l2_patch_main_t l2_patch_main;
+
+static vlib_node_registration_t l2_patch_node;
+
+#define foreach_l2_patch_error \
+_(PATCHED, "L2 patch packets") \
+_(DROPPED, "L2 patch misconfigured drops")
+
+typedef enum
+{
+#define _(sym,str) L2_PATCH_ERROR_##sym,
+ foreach_l2_patch_error
+#undef _
+ L2_PATCH_N_ERROR,
+} l2_patch_error_t;
+
+static char *l2_patch_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_patch_error
+#undef _
+};
+
+typedef enum
+{
+ L2_PATCH_NEXT_DROP,
+ L2_PATCH_N_NEXT,
+} l2_patch_next_t;
+
+static uword
+l2_patch_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_patch_next_t next_index;
+ l2_patch_main_t *l2pm = &l2_patch_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_patch_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ /* So stupid / simple, we don't need to prefetch data */
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0);
+ ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0);
+ ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index1] != ~0);
+ ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1] != ~0);
+
+ next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0];
+ next1 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index1];
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0];
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index0;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0];
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index1;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1];
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0);
+ ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0);
+
+ next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0];
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0];
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index0;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0];
+ }
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ em->counters[node_counter_base_index + L2_PATCH_ERROR_PATCHED] +=
+ frame->n_vectors;
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_patch_node, static) = {
+ .function = l2_patch_node_fn,
+ .name = "l2-patch",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_patch_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_patch_error_strings),
+ .error_strings = l2_patch_error_strings,
+
+ .n_next_nodes = L2_PATCH_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_PATCH_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_patch_node, l2_patch_node_fn)
+ int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index,
+ int is_add)
+{
+ l2_patch_main_t *l2pm = &l2_patch_main;
+ vnet_hw_interface_t *rxhi, *txhi;
+ u32 tx_next_index;
+
+ /*
+ * We assume that the API msg handler has used 2x VALIDATE_SW_IF_INDEX
+ * macros...
+ */
+
+ rxhi = vnet_get_sup_hw_interface (l2pm->vnet_main, rx_sw_if_index);
+
+ /* Make sure caller didn't pass a vlan subif, etc. */
+ if (rxhi->sw_if_index != rx_sw_if_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ txhi = vnet_get_sup_hw_interface (l2pm->vnet_main, tx_sw_if_index);
+ if (txhi->sw_if_index != tx_sw_if_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX_2;
+
+ if (is_add)
+ {
+ tx_next_index = vlib_node_add_next (l2pm->vlib_main,
+ l2_patch_node.index,
+ txhi->output_node_index);
+
+ vec_validate_init_empty (l2pm->tx_next_by_rx_sw_if_index,
+ rx_sw_if_index, ~0);
+
+ l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = tx_next_index;
+ vec_validate_init_empty (l2pm->tx_sw_if_index_by_rx_sw_if_index,
+ rx_sw_if_index, ~0);
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index]
+ = txhi->sw_if_index;
+
+ ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+
+ vnet_feature_enable_disable ("device-input", "l2-patch",
+ rxhi->hw_if_index, 1, 0, 0);
+ }
+ else
+ {
+ ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index,
+ 0 /* disable promiscuous mode */ );
+
+ vnet_feature_enable_disable ("device-input", "l2-patch",
+ rxhi->hw_if_index, 0, 0, 0);
+ if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index)
+ {
+ l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0;
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index] = ~0;
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+test_patch_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2_patch_main_t *l2pm = &l2_patch_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 rx_sw_if_index, tx_sw_if_index;
+ int rv;
+ int rx_set = 0;
+ int tx_set = 0;
+ int is_add = 1;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "rx %U", unformat_vnet_sw_interface,
+ l2pm->vnet_main, &rx_sw_if_index))
+ rx_set = 1;
+ else if (unformat (line_input, "tx %U", unformat_vnet_sw_interface,
+ l2pm->vnet_main, &tx_sw_if_index))
+ tx_set = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (rx_set == 0)
+ return clib_error_return (0, "rx interface not set");
+
+ if (tx_set == 0)
+ return clib_error_return (0, "tx interface not set");
+
+ rv = vnet_l2_patch_add_del (rx_sw_if_index, tx_sw_if_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "rx interface not a physical port");
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX_2:
+ return clib_error_return (0, "tx interface not a physical port");
+
+ default:
+ return clib_error_return
+ (0, "WARNING: vnet_l2_patch_add_del returned %d", rv);
+ }
+
+ return 0;
+}
+
+/*?
+ * Create or delete a Layer 2 patch.
+ *
+ * @cliexpar
+ * @cliexstart{test l2patch rx <intfc> tx <intfc> [del]}
+ * @cliexend
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_patch_command, static) = {
+ .path = "test l2patch",
+ .short_help = "test l2patch rx <intfc> tx <intfc> [del]",
+ .function = test_patch_command_fn,
+};
+/* *INDENT-ON* */
+
+/** Display the contents of the l2patch table. */
+static clib_error_t *
+show_l2patch (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2_patch_main_t *l2pm = &l2_patch_main;
+ u32 rx_sw_if_index;
+ u32 no_entries = 1;
+
+ ASSERT (vec_len (l2pm->tx_next_by_rx_sw_if_index) ==
+ vec_len (l2pm->tx_sw_if_index_by_rx_sw_if_index));
+
+ for (rx_sw_if_index = 0;
+ rx_sw_if_index < vec_len (l2pm->tx_sw_if_index_by_rx_sw_if_index);
+ rx_sw_if_index++)
+ {
+ u32 tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index];
+ if (tx_sw_if_index != ~0)
+ {
+ no_entries = 0;
+ vlib_cli_output (vm, "%26U -> %U",
+ format_vnet_sw_if_index_name,
+ l2pm->vnet_main, rx_sw_if_index,
+ format_vnet_sw_if_index_name,
+ l2pm->vnet_main, tx_sw_if_index);
+ }
+ }
+
+ if (no_entries)
+ vlib_cli_output (vm, "no l2patch entries");
+
+ return 0;
+}
+
+/*?
+ * Show Layer 2 patch entries.
+ *
+ * @cliexpar
+ * @cliexstart{show l2patch}
+ * @cliexend
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_l2patch_cli, static) = {
+ .path = "show l2patch",
+ .short_help = "Show l2 interface cross-connect entries",
+ .function = show_l2patch,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+l2_patch_init (vlib_main_t * vm)
+{
+ l2_patch_main_t *mp = &l2_patch_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main ();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_patch_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_rw.c b/src/vnet/l2/l2_rw.c
new file mode 100644
index 00000000000..c54509d048b
--- /dev/null
+++ b/src/vnet/l2/l2_rw.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_rw.h>
+
+/**
+ * @file
+ * @brief Layer 2 Rewrite.
+ *
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ */
+
+
+l2_rw_main_t l2_rw_main;
+
+vlib_node_registration_t l2_rw_node;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 classify_table_index;
+ u32 rewrite_entry_index;
+} l2_rw_trace_t;
+
+static u8 *
+format_l2_rw_entry (u8 * s, va_list * args)
+{
+ l2_rw_entry_t *e = va_arg (*args, l2_rw_entry_t *);
+ l2_rw_main_t *rw = &l2_rw_main;
+ s = format (s, "%d - mask:%U value:%U\n",
+ e - rw->entries,
+ format_hex_bytes, e->mask,
+ e->rewrite_n_vectors * sizeof (u32x4), format_hex_bytes,
+ e->value, e->rewrite_n_vectors * sizeof (u32x4));
+ s =
+ format (s, " hits:%d skip_bytes:%d", e->hit_count,
+ e->skip_n_vectors * sizeof (u32x4));
+ return s;
+}
+
+static u8 *
+format_l2_rw_config (u8 * s, va_list * args)
+{
+ l2_rw_config_t *c = va_arg (*args, l2_rw_config_t *);
+ return format (s, "table-index:%d miss-index:%d",
+ c->table_index, c->miss_index);
+}
+
+/* packet trace format function */
+static u8 *
+format_l2_rw_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_rw_trace_t *t = va_arg (*args, l2_rw_trace_t *);
+ return format (s, "l2-rw: sw_if_index %d, table %d, entry %d",
+ t->sw_if_index, t->classify_table_index,
+ t->rewrite_entry_index);
+}
+
+always_inline l2_rw_config_t *
+l2_rw_get_config (u32 sw_if_index)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ if (PREDICT_FALSE (!clib_bitmap_get (rw->configs_bitmap, sw_if_index)))
+ {
+ vec_validate (rw->configs, sw_if_index);
+ rw->configs[sw_if_index].table_index = ~0;
+ rw->configs[sw_if_index].miss_index = ~0;
+ rw->configs_bitmap =
+ clib_bitmap_set (rw->configs_bitmap, sw_if_index, 1);
+ }
+ return &rw->configs[sw_if_index];
+}
+
+static_always_inline void
+l2_rw_rewrite (l2_rw_entry_t * rwe, u8 * h)
+{
+ if (U32X4_ALIGNED (h))
+ {
+ u32x4 *d = ((u32x4 *) h) + rwe->skip_n_vectors;
+ switch (rwe->rewrite_n_vectors)
+ {
+ case 5:
+ d[4] = (d[4] & ~rwe->mask[4]) | rwe->value[4];
+ /* FALLTHROUGH */
+ case 4:
+ d[3] = (d[3] & ~rwe->mask[3]) | rwe->value[3];
+ /* FALLTHROUGH */
+ case 3:
+ d[2] = (d[2] & ~rwe->mask[2]) | rwe->value[2];
+ /* FALLTHROUGH */
+ case 2:
+ d[1] = (d[1] & ~rwe->mask[1]) | rwe->value[1];
+ /* FALLTHROUGH */
+ case 1:
+ d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0];
+ break;
+ default:
+ abort ();
+ }
+ }
+ else
+ {
+ u64 *d = ((u64 *) h) + rwe->skip_n_vectors * 2;
+ switch (rwe->rewrite_n_vectors)
+ {
+ case 5:
+ d[8] =
+ (d[8] & ~(((u64 *) rwe->mask)[8])) | (((u64 *) rwe->value)[8]);
+ d[9] =
+ (d[9] & ~(((u64 *) rwe->mask)[9])) | (((u64 *) rwe->value)[9]);
+ /* FALLTHROUGH */
+ case 4:
+ d[6] =
+ (d[6] & ~(((u64 *) rwe->mask)[6])) | (((u64 *) rwe->value)[6]);
+ d[7] =
+ (d[7] & ~(((u64 *) rwe->mask)[7])) | (((u64 *) rwe->value)[7]);
+ /* FALLTHROUGH */
+ case 3:
+ d[4] =
+ (d[4] & ~(((u64 *) rwe->mask)[4])) | (((u64 *) rwe->value)[4]);
+ d[5] =
+ (d[5] & ~(((u64 *) rwe->mask)[5])) | (((u64 *) rwe->value)[5]);
+ /* FALLTHROUGH */
+ case 2:
+ d[2] =
+ (d[2] & ~(((u64 *) rwe->mask)[2])) | (((u64 *) rwe->value)[2]);
+ d[3] =
+ (d[3] & ~(((u64 *) rwe->mask)[3])) | (((u64 *) rwe->value)[3]);
+ /* FALLTHROUGH */
+ case 1:
+ d[0] =
+ (d[0] & ~(((u64 *) rwe->mask)[0])) | (((u64 *) rwe->value)[0]);
+ d[1] =
+ (d[1] & ~(((u64 *) rwe->mask)[1])) | (((u64 *) rwe->value)[1]);
+ break;
+ default:
+ abort ();
+ }
+ }
+}
+
+static uword
+l2_rw_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ u32 n_left_from, *from, *to_next, next_index;
+ vnet_classify_main_t *vcm = &vnet_classify_main;
+ f64 now = vlib_time_now (vlib_get_main ());
+ u32 prefetch_size = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, next0, sw_if_index0, feature_bitmap0, rwe_index0;
+ u32 bi1, next1, sw_if_index1, feature_bitmap1, rwe_index1;
+ vlib_buffer_t *b0, *b1;
+ ethernet_header_t *h0, *h1;
+ l2_rw_config_t *config0, *config1;
+ u64 hash0, hash1;
+ vnet_classify_table_t *t0, *t1;
+ vnet_classify_entry_t *e0, *e1;
+ l2_rw_entry_t *rwe0, *rwe1;
+
+ {
+ vlib_buffer_t *p2, *p3;
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_current (p2), prefetch_size, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_current (p3), prefetch_size, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ config0 = l2_rw_get_config (sw_if_index0); /*TODO: check sw_if_index0 value */
+ config1 = l2_rw_get_config (sw_if_index1); /*TODO: check sw_if_index0 value */
+ t0 = pool_elt_at_index (vcm->tables, config0->table_index);
+ t1 = pool_elt_at_index (vcm->tables, config1->table_index);
+ prefetch_size =
+ (t1->skip_n_vectors + t1->match_n_vectors) * sizeof (u32x4);
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ hash1 = vnet_classify_hash_packet (t1, (u8 *) h1);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ e1 = vnet_classify_find_entry (t1, (u8 *) h1, hash1, now);
+
+ while (!e0 && (t0->next_table_index != ~0))
+ {
+ t0 = pool_elt_at_index (vcm->tables, t0->next_table_index);
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ }
+
+ while (!e1 && (t1->next_table_index != ~0))
+ {
+ t1 = pool_elt_at_index (vcm->tables, t1->next_table_index);
+ hash1 = vnet_classify_hash_packet (t1, (u8 *) h1);
+ e1 = vnet_classify_find_entry (t1, (u8 *) h1, hash1, now);
+ }
+
+ rwe_index0 = e0 ? e0->opaque_index : config0->miss_index;
+ rwe_index1 = e1 ? e1->opaque_index : config1->miss_index;
+
+ if (rwe_index0 != ~0)
+ {
+ rwe0 = pool_elt_at_index (rw->entries, rwe_index0);
+ l2_rw_rewrite (rwe0, (u8 *) h0);
+ }
+ if (rwe_index1 != ~0)
+ {
+ rwe1 = pool_elt_at_index (rw->entries, rwe_index1);
+ l2_rw_rewrite (rwe1, (u8 *) h1);
+ }
+
+ if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_rw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->classify_table_index = config0->table_index;
+ t->rewrite_entry_index = rwe_index0;
+ }
+
+ if (PREDICT_FALSE ((b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_rw_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->classify_table_index = config1->table_index;
+ t->rewrite_entry_index = rwe_index1;
+ }
+
+ /* Update feature bitmap and get next feature index */
+ feature_bitmap0 =
+ vnet_buffer (b0)->l2.feature_bitmap & ~L2INPUT_FEAT_RW;
+ feature_bitmap1 =
+ vnet_buffer (b1)->l2.feature_bitmap & ~L2INPUT_FEAT_RW;
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap0;
+ vnet_buffer (b1)->l2.feature_bitmap = feature_bitmap1;
+ next0 = feat_bitmap_get_next_node_index (rw->feat_next_node_index,
+ feature_bitmap0);
+ next1 = feat_bitmap_get_next_node_index (rw->feat_next_node_index,
+ feature_bitmap1);
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, next0, sw_if_index0, feature_bitmap0, rwe_index0;
+ vlib_buffer_t *b0;
+ ethernet_header_t *h0;
+ l2_rw_config_t *config0;
+ u64 hash0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+ l2_rw_entry_t *rwe0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ config0 = l2_rw_get_config (sw_if_index0); /*TODO: check sw_if_index0 value */
+ t0 = pool_elt_at_index (vcm->tables, config0->table_index);
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+
+ while (!e0 && (t0->next_table_index != ~0))
+ {
+ t0 = pool_elt_at_index (vcm->tables, t0->next_table_index);
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ }
+
+ rwe_index0 = e0 ? e0->opaque_index : config0->miss_index;
+
+ if (rwe_index0 != ~0)
+ {
+ rwe0 = pool_elt_at_index (rw->entries, rwe_index0);
+ l2_rw_rewrite (rwe0, (u8 *) h0);
+ }
+
+ if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_rw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->classify_table_index = config0->table_index;
+ t->rewrite_entry_index = rwe_index0;
+ }
+
+ /* Update feature bitmap and get next feature index */
+ feature_bitmap0 =
+ vnet_buffer (b0)->l2.feature_bitmap & ~L2INPUT_FEAT_RW;
+ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap0;
+ next0 = feat_bitmap_get_next_node_index (rw->feat_next_node_index,
+ feature_bitmap0);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+int
+l2_rw_mod_entry (u32 * index,
+ u8 * mask, u8 * value, u32 len, u32 skip, u8 is_del)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ l2_rw_entry_t *e = 0;
+ if (*index != ~0)
+ {
+ if (pool_is_free_index (rw->entries, *index))
+ {
+ return -1;
+ }
+ e = pool_elt_at_index (rw->entries, *index);
+ }
+ else
+ {
+ pool_get (rw->entries, e);
+ *index = e - rw->entries;
+ }
+
+ if (!e)
+ return -1;
+
+ if (is_del)
+ {
+ pool_put (rw->entries, e);
+ return 0;
+ }
+
+ e->skip_n_vectors = skip / sizeof (u32x4);
+ skip -= e->skip_n_vectors * sizeof (u32x4);
+ e->rewrite_n_vectors = (skip + len - 1) / sizeof (u32x4) + 1;
+ vec_alloc_aligned (e->mask, e->rewrite_n_vectors, sizeof (u32x4));
+ memset (e->mask, 0, e->rewrite_n_vectors * sizeof (u32x4));
+ vec_alloc_aligned (e->value, e->rewrite_n_vectors, sizeof (u32x4));
+ memset (e->value, 0, e->rewrite_n_vectors * sizeof (u32x4));
+
+ clib_memcpy (((u8 *) e->value) + skip, value, len);
+ clib_memcpy (((u8 *) e->mask) + skip, mask, len);
+
+ int i;
+ for (i = 0; i < e->rewrite_n_vectors; i++)
+ {
+ e->value[i] &= e->mask[i];
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+l2_rw_entry_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 index = ~0;
+ u8 *mask = 0;
+ u8 *value = 0;
+ u32 skip = 0;
+ u8 del = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "index %d", &index))
+ ;
+ else if (unformat (input, "mask %U", unformat_hex_string, &mask))
+ ;
+ else if (unformat (input, "value %U", unformat_hex_string, &value))
+ ;
+ else if (unformat (input, "skip %d", &skip))
+ ;
+ else if (unformat (input, "del"))
+ del = 1;
+ else
+ break;
+ }
+
+ if (!mask || !value)
+ return clib_error_return (0, "Unspecified mask or value");
+
+ if (vec_len (mask) != vec_len (value))
+ return clib_error_return (0, "Mask and value lengths must be identical");
+
+ int ret;
+ if ((ret =
+ l2_rw_mod_entry (&index, mask, value, vec_len (mask), skip, del)))
+ return clib_error_return (0, "Could not add entry");
+
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_entry_cli, static) = {
+ .path = "l2 rewrite entry",
+ .short_help =
+ "l2 rewrite entry [index <index>] [mask <hex-mask>] [value <hex-value>] [skip <n_bytes>] [del]",
+ .function = l2_rw_entry_cli_fn,
+};
+/* *INDENT-ON* */
+
+int
+l2_rw_interface_set_table (u32 sw_if_index, u32 table_index, u32 miss_index)
+{
+ l2_rw_config_t *c = l2_rw_get_config (sw_if_index);
+ l2_rw_main_t *rw = &l2_rw_main;
+
+ c->table_index = table_index;
+ c->miss_index = miss_index;
+ u32 feature_bitmap = (table_index == ~0) ? 0 : L2INPUT_FEAT_RW;
+
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_RW, feature_bitmap);
+
+ if (c->table_index == ~0)
+ clib_bitmap_set (rw->configs_bitmap, sw_if_index, 0);
+
+ return 0;
+}
+
+static clib_error_t *
+l2_rw_interface_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 table_index = ~0;
+ u32 sw_if_index = ~0;
+ u32 miss_index = ~0;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index);
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_index))
+ ;
+ else if (unformat (input, "miss-index %d", &miss_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0,
+ "You must specify an interface 'iface <interface>'",
+ format_unformat_error, input);
+ int ret;
+ if ((ret =
+ l2_rw_interface_set_table (sw_if_index, table_index, miss_index)))
+ return clib_error_return (0, "l2_rw_interface_set_table returned %d",
+ ret);
+
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_interface_cli, static) = {
+ .path = "set interface l2 rewrite",
+ .short_help =
+ "set interface l2 rewrite <interface> [table <table index>] [miss-index <entry-index>]",
+ .function = l2_rw_interface_cli_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+l2_rw_show_interfaces_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ if (clib_bitmap_count_set_bits (rw->configs_bitmap) == 0)
+ vlib_cli_output (vm, "No interface is currently using l2 rewrite\n");
+
+ uword i;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach(i, rw->configs_bitmap, {
+ vlib_cli_output (vm, "sw_if_index:%d %U\n", i, format_l2_rw_config, &rw->configs[i]);
+ });
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_show_interfaces_cli, static) = {
+ .path = "show l2 rewrite interfaces",
+ .short_help =
+ "show l2 rewrite interfaces",
+ .function = l2_rw_show_interfaces_cli_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+l2_rw_show_entries_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ l2_rw_entry_t *e;
+ if (pool_elts (rw->entries) == 0)
+ vlib_cli_output (vm, "No entries\n");
+
+ /* *INDENT-OFF* */
+ pool_foreach(e, rw->entries, {
+ vlib_cli_output (vm, "%U\n", format_l2_rw_entry, e);
+ });
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_show_entries_cli, static) = {
+ .path = "show l2 rewrite entries",
+ .short_help =
+ "show l2 rewrite entries",
+ .function = l2_rw_show_entries_cli_fn,
+};
+/* *INDENT-ON* */
+
+int
+l2_rw_enable_disable (u32 bridge_domain, u8 disable)
+{
+ u32 mask = L2INPUT_FEAT_RW;
+ l2input_set_bridge_features (bridge_domain, mask, disable ? 0 : mask);
+ return 0;
+}
+
+static clib_error_t *
+l2_rw_set_cli_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 bridge_domain;
+ u8 disable = 0;
+
+ if (unformat_check_input (input) == UNFORMAT_END_OF_INPUT ||
+ !unformat (input, "%d", &bridge_domain))
+ {
+ return clib_error_return (0, "You must specify a bridge domain");
+ }
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT &&
+ unformat (input, "disable"))
+ {
+ disable = 1;
+ }
+
+ if (l2_rw_enable_disable (bridge_domain, disable))
+ return clib_error_return (0, "Could not enable or disable rewrite");
+
+ return 0;
+}
+
+/*?
+ * Layer 2-Rewrite node uses classify tables to match packets. Then, using
+ * the provisioned mask and value, modfies the packet header.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2_rw_set_cli, static) = {
+ .path = "set bridge-domain rewrite",
+ .short_help =
+ "set bridge-domain rewrite <bridge-domain> [disable]",
+ .function = l2_rw_set_cli_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+l2_rw_init (vlib_main_t * vm)
+{
+ l2_rw_main_t *rw = &l2_rw_main;
+ rw->configs = 0;
+ rw->entries = 0;
+ clib_bitmap_alloc (rw->configs_bitmap, 1);
+ feat_bitmap_init_next_nodes (vm,
+ l2_rw_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ rw->feat_next_node_index);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_rw_init);
+
+enum
+{
+ L2_RW_NEXT_DROP,
+ L2_RW_N_NEXT,
+};
+
+#define foreach_l2_rw_error \
+_(UNKNOWN, "Unknown error")
+
+typedef enum
+{
+#define _(sym,str) L2_RW_ERROR_##sym,
+ foreach_l2_rw_error
+#undef _
+ L2_RW_N_ERROR,
+} l2_rw_error_t;
+
+static char *l2_rw_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_rw_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_rw_node) = {
+ .function = l2_rw_node_fn,
+ .name = "l2-rw",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_rw_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(l2_rw_error_strings),
+ .error_strings = l2_rw_error_strings,
+ .runtime_data_bytes = 0,
+ .n_next_nodes = L2_RW_N_NEXT,
+ .next_nodes = { [L2_RW_NEXT_DROP] = "error-drop"},
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_rw_node, l2_rw_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_rw.h b/src/vnet/l2/l2_rw.h
new file mode 100644
index 00000000000..49aa25fb601
--- /dev/null
+++ b/src/vnet/l2/l2_rw.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * l2_rw is based on vnet classifier and provides a way
+ * to modify packets matching a given table.
+ *
+ * Tables must be created using vnet's classify features.
+ * Entries contained within these tables must have their
+ * opaque index set to the rewrite entry created with l2_rw_mod_entry.
+ */
+
+#ifndef L2_RW_H_
+#define L2_RW_H_
+
+#include <vnet/l2/l2_input.h>
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct _l2_rw_entry {
+ u16 skip_n_vectors;
+ u16 rewrite_n_vectors;
+ u64 hit_count;
+ u32x4 *mask;
+ u32x4 *value;
+}) l2_rw_entry_t;
+/* *INDENT-ON* */
+
+/* l2_rw configuration for one interface */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct _l2_rw_config {
+ u32 table_index; /* Which classify table to use */
+ u32 miss_index; /* Rewrite entry to use if table does not match */
+}) l2_rw_config_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ /* Next feature node indexes */
+ u32 feat_next_node_index[32];
+
+ /* A pool of entries */
+ l2_rw_entry_t *entries;
+
+ /* Config vector indexed by sw_if_index */
+ l2_rw_config_t *configs;
+ uword *configs_bitmap;
+} l2_rw_main_t;
+
+extern l2_rw_main_t l2_rw_main;
+
+/*
+ * Specifies which classify table and miss_index should be used
+ * with the given interface.
+ * Use special values ~0 in order to un-set table_index
+ * or miss_index.
+ * l2_rw feature is automatically enabled for the interface
+ * when table_index or miss_index is not ~0.
+ * returns 0 on success and something else on error.
+ */
+int l2_rw_interface_set_table (u32 sw_if_index,
+ u32 table_index, u32 miss_index);
+
+/*
+ * Creates, modifies or delete a rewrite entry.
+ * If *index != ~0, modifies an existing entry (or simply
+ * deletes it if is_del is set).
+ * If *index == ~0, creates a new entry and the created
+ * entry index is stored in *index (Does nothing if is_del
+ * is set).
+ * returns 0 on success and something else on error.
+ */
+int l2_rw_mod_entry (u32 * index,
+ u8 * mask, u8 * value, u32 len, u32 skip, u8 is_del);
+
+#endif /* L2_FW_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_vtr.c b/src/vnet/l2/l2_vtr.c
new file mode 100644
index 00000000000..95a4f15700a
--- /dev/null
+++ b/src/vnet/l2/l2_vtr.c
@@ -0,0 +1,770 @@
+/*
+ * l2_vtr.c : layer 2 vlan tag rewrite configuration
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/l2/l2_input_vtr.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vppinfra/error.h>
+#include <vlib/cli.h>
+
+/**
+ * @file
+ * @brief Ethernet VLAN Tag Rewrite.
+ *
+ * VLAN tag rewrite provides the ability to change the VLAN tags on a packet.
+ * Existing tags can be popped, new tags can be pushed, and existing tags can
+ * be swapped with new tags. The rewrite feature is attached to a subinterface
+ * as input and output operations. The input operation is explicitly configured.
+ * The output operation is the symmetric opposite and is automatically derived
+ * from the input operation.
+ */
+
+/** Just a placeholder; ensures file is not eliminated by linker. */
+clib_error_t *
+l2_vtr_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_vtr_init);
+
+u32
+l2pbb_configure (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main, u32 sw_if_index, u32 vtr_op,
+ u8 * b_dmac, u8 * b_smac,
+ u16 b_vlanid, u32 i_sid, u16 vlan_outer_tag)
+{
+ u32 error = 0;
+ u32 enable = 0;
+
+ l2_output_config_t *config = 0;
+ vnet_hw_interface_t *hi;
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+
+ if (!hi)
+ {
+ error = VNET_API_ERROR_INVALID_INTERFACE;
+ goto done;
+ }
+
+ // Config for this interface should be already initialized
+ ptr_config_t *in_config;
+ ptr_config_t *out_config;
+ config = vec_elt_at_index (l2output_main.configs, sw_if_index);
+ in_config = &(config->input_pbb_vtr);
+ out_config = &(config->output_pbb_vtr);
+
+ in_config->pop_bytes = 0;
+ in_config->push_bytes = 0;
+ out_config->pop_bytes = 0;
+ out_config->push_bytes = 0;
+ enable = (vtr_op != L2_VTR_DISABLED);
+
+ if (!enable)
+ goto done;
+
+ if (vtr_op == L2_VTR_POP_2)
+ {
+ in_config->pop_bytes = sizeof (ethernet_pbb_header_packed_t);
+ }
+ else if (vtr_op == L2_VTR_PUSH_2)
+ {
+ clib_memcpy (in_config->macs_tags.b_dst_address, b_dmac,
+ sizeof (in_config->macs_tags.b_dst_address));
+ clib_memcpy (in_config->macs_tags.b_src_address, b_smac,
+ sizeof (in_config->macs_tags.b_src_address));
+ in_config->macs_tags.b_type =
+ clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AD);
+ in_config->macs_tags.priority_dei_id =
+ clib_net_to_host_u16 (b_vlanid & 0xFFF);
+ in_config->macs_tags.i_type =
+ clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AH);
+ in_config->macs_tags.priority_dei_uca_res_sid =
+ clib_net_to_host_u32 (i_sid & 0xFFFFF);
+ in_config->push_bytes = sizeof (ethernet_pbb_header_packed_t);
+ }
+ else if (vtr_op == L2_VTR_TRANSLATE_2_2)
+ {
+ /* TODO after PoC */
+ }
+
+ /*
+ * Construct the output tag-rewrite config
+ *
+ * The push/pop values are always reversed
+ */
+ out_config->raw_data = in_config->raw_data;
+ out_config->pop_bytes = in_config->push_bytes;
+ out_config->push_bytes = in_config->pop_bytes;
+
+done:
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VTR, enable);
+ if (config)
+ config->out_vtr_flag = (u8) enable;
+
+ /* output vtr enable is checked explicitly in l2_output */
+ return error;
+}
+
+/**
+ * Configure vtag tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32
+l2vtr_configure (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 sw_if_index, u32 vtr_op, u32 push_dot1q, /* ethertype of first pushed tag is dot1q/dot1ad */
+ u32 vtr_tag1, /* first pushed tag */
+ u32 vtr_tag2) /* second pushed tag */
+{
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ u32 hw_no_tags;
+ u32 error = 0;
+ l2_output_config_t *config;
+ vtr_config_t *in_config;
+ vtr_config_t *out_config;
+ u32 enable;
+ u32 push_inner_et;
+ u32 push_outer_et;
+ u32 cfg_tags;
+
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+ if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
+ {
+ error = VNET_API_ERROR_INVALID_INTERFACE; /* non-ethernet interface */
+ goto done;
+ }
+
+ /* Init the config for this interface */
+ vec_validate (l2output_main.configs, sw_if_index);
+ config = vec_elt_at_index (l2output_main.configs, sw_if_index);
+ in_config = &(config->input_vtr);
+ out_config = &(config->output_vtr);
+ in_config->raw_tags = 0;
+ out_config->raw_tags = 0;
+
+ /* Get the configured tags for the interface */
+ si = vnet_get_sw_interface (vnet_main, sw_if_index);
+ hw_no_tags = (si->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+
+ /* Construct the input tag-rewrite config */
+
+ push_outer_et =
+ clib_net_to_host_u16 (push_dot1q ? ETHERNET_TYPE_VLAN :
+ ETHERNET_TYPE_DOT1AD);
+ push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN);
+ vtr_tag1 = clib_net_to_host_u16 (vtr_tag1);
+ vtr_tag2 = clib_net_to_host_u16 (vtr_tag2);
+
+ /* Determine number of vlan tags with explictly configured values */
+ cfg_tags = 0;
+ if (hw_no_tags || si->sub.eth.flags.no_tags)
+ {
+ cfg_tags = 0;
+ }
+ else if (si->sub.eth.flags.one_tag)
+ {
+ cfg_tags = 1;
+ if (si->sub.eth.flags.outer_vlan_id_any)
+ {
+ cfg_tags = 0;
+ }
+ }
+ else if (si->sub.eth.flags.two_tags)
+ {
+ cfg_tags = 2;
+ if (si->sub.eth.flags.inner_vlan_id_any)
+ {
+ cfg_tags = 1;
+ }
+ if (si->sub.eth.flags.outer_vlan_id_any)
+ {
+ cfg_tags = 0;
+ }
+ }
+
+ switch (vtr_op)
+ {
+ case L2_VTR_DISABLED:
+ in_config->push_and_pop_bytes = 0;
+ break;
+
+ case L2_VTR_POP_1:
+ if (cfg_tags < 1)
+ {
+ /* Need one or two tags */
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT;
+ goto done;
+ }
+ in_config->pop_bytes = 4;
+ in_config->push_bytes = 0;
+ break;
+
+ case L2_VTR_POP_2:
+ if (cfg_tags < 2)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 8;
+ in_config->push_bytes = 0;
+
+ out_config->push_bytes = in_config->pop_bytes;
+ out_config->pop_bytes = in_config->push_bytes;
+ break;
+
+ case L2_VTR_PUSH_1:
+ in_config->pop_bytes = 0;
+ in_config->push_bytes = 4;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[1].type = push_outer_et;
+ break;
+
+ case L2_VTR_PUSH_2:
+ in_config->pop_bytes = 0;
+ in_config->push_bytes = 8;
+ in_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[0].type = push_outer_et;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ in_config->tags[1].type = push_inner_et;
+ break;
+
+ case L2_VTR_TRANSLATE_1_1:
+ if (cfg_tags < 1)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need one or two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 4;
+ in_config->push_bytes = 4;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[1].type = push_outer_et;
+ break;
+
+ case L2_VTR_TRANSLATE_1_2:
+ if (cfg_tags < 1)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need one or two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 4;
+ in_config->push_bytes = 8;
+ in_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[0].type = push_outer_et;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ in_config->tags[1].type = push_inner_et;
+ break;
+
+ case L2_VTR_TRANSLATE_2_1:
+ if (cfg_tags < 2)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 8;
+ in_config->push_bytes = 4;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[1].type = push_outer_et;
+ break;
+
+ case L2_VTR_TRANSLATE_2_2:
+ if (cfg_tags < 2)
+ {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need two tags */
+ goto done;
+ }
+ in_config->pop_bytes = 8;
+ in_config->push_bytes = 8;
+ in_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[0].type = push_outer_et;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ in_config->tags[1].type = push_inner_et;
+ break;
+ }
+
+ /*
+ * Construct the output tag-rewrite config
+ *
+ * The push/pop values are always reversed
+ */
+ out_config->push_bytes = in_config->pop_bytes;
+ out_config->pop_bytes = in_config->push_bytes;
+
+ /* Any pushed tags are derived from the subinterface config */
+ push_outer_et =
+ clib_net_to_host_u16 (si->sub.eth.flags.dot1ad ? ETHERNET_TYPE_DOT1AD :
+ ETHERNET_TYPE_VLAN);
+ push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN);
+ vtr_tag1 = clib_net_to_host_u16 (si->sub.eth.outer_vlan_id);
+ vtr_tag2 = clib_net_to_host_u16 (si->sub.eth.inner_vlan_id);
+
+ if (out_config->push_bytes == 4)
+ {
+ out_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ out_config->tags[1].type = push_outer_et;
+ }
+ else if (out_config->push_bytes == 8)
+ {
+ out_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ out_config->tags[0].type = push_outer_et;
+ out_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ out_config->tags[1].type = push_inner_et;
+ }
+
+ /* set the interface enable flags */
+ enable = (vtr_op != L2_VTR_DISABLED);
+ config->out_vtr_flag = (u8) enable;
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VTR, enable);
+ /* output vtr enable is checked explicitly in l2_output */
+
+done:
+ return error;
+}
+
+/**
+ * Get vtag tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32
+l2vtr_get (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 sw_if_index, u32 * vtr_op, u32 * push_dot1q, /* ethertype of first pushed tag is dot1q/dot1ad */
+ u32 * vtr_tag1, /* first pushed tag */
+ u32 * vtr_tag2) /* second pushed tag */
+{
+ vnet_hw_interface_t *hi;
+ u32 error = 0;
+ vtr_config_t *in_config;
+
+ if (!vtr_op || !push_dot1q || !vtr_tag1 || !vtr_tag2)
+ {
+ clib_warning ("invalid arguments");
+ error = VNET_API_ERROR_INVALID_ARGUMENT;
+ goto done;
+ }
+
+ *vtr_op = L2_VTR_DISABLED;
+ *vtr_tag1 = 0;
+ *vtr_tag2 = 0;
+ *push_dot1q = 0;
+
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+ if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
+ {
+ /* non-ethernet interface */
+ goto done;
+ }
+
+ if (sw_if_index >= vec_len (l2output_main.configs))
+ {
+ /* no specific config (return disabled) */
+ goto done;
+ }
+
+ /* Get the config for this interface */
+ in_config =
+ &(vec_elt_at_index (l2output_main.configs, sw_if_index)->input_vtr);
+
+ /* DISABLED */
+ if (in_config->push_and_pop_bytes == 0)
+ {
+ goto done;
+ }
+
+ /* find out vtr_op */
+ switch (in_config->pop_bytes)
+ {
+ case 0:
+ switch (in_config->push_bytes)
+ {
+ case 0:
+ /* DISABLED */
+ goto done;
+ case 4:
+ *vtr_op = L2_VTR_PUSH_1;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[1].type));
+ break;
+ case 8:
+ *vtr_op = L2_VTR_PUSH_2;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id);
+ *vtr_tag2 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[0].type));
+ break;
+ default:
+ clib_warning ("invalid push_bytes count: %d",
+ in_config->push_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+ break;
+
+ case 4:
+ switch (in_config->push_bytes)
+ {
+ case 0:
+ *vtr_op = L2_VTR_POP_1;
+ break;
+ case 4:
+ *vtr_op = L2_VTR_TRANSLATE_1_1;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[1].type));
+ break;
+ case 8:
+ *vtr_op = L2_VTR_TRANSLATE_1_2;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id);
+ *vtr_tag2 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[0].type));
+ break;
+ default:
+ clib_warning ("invalid push_bytes count: %d",
+ in_config->push_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+ break;
+
+ case 8:
+ switch (in_config->push_bytes)
+ {
+ case 0:
+ *vtr_op = L2_VTR_POP_2;
+ break;
+ case 4:
+ *vtr_op = L2_VTR_TRANSLATE_2_1;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[1].type));
+ break;
+ case 8:
+ *vtr_op = L2_VTR_TRANSLATE_2_2;
+ *vtr_tag1 =
+ clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id);
+ *vtr_tag2 =
+ clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q =
+ (ETHERNET_TYPE_VLAN ==
+ clib_host_to_net_u16 (in_config->tags[0].type));
+ break;
+ default:
+ clib_warning ("invalid push_bytes count: %d",
+ in_config->push_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+ break;
+
+ default:
+ clib_warning ("invalid pop_bytes count: %d", in_config->pop_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/**
+ * Set subinterface vtr enable/disable.
+ * The CLI format is:
+ * set interface l2 tag-rewrite <interface> [disable | pop 1 | pop 2 | push {dot1q|dot1ad} <tag> [<tag>]]
+ *
+ * "push" can also be replaced by "translate-{1|2}-{1|2}"
+ */
+static clib_error_t *
+int_l2_vtr (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ u32 vtr_op;
+ u32 push_dot1q = 0;
+ u32 tag1 = 0, tag2 = 0;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ vtr_op = L2_VTR_DISABLED;
+
+ if (unformat (input, "disable"))
+ {
+ vtr_op = L2_VTR_DISABLED;
+ }
+ else if (unformat (input, "pop 1"))
+ {
+ vtr_op = L2_VTR_POP_1;
+ }
+ else if (unformat (input, "pop 2"))
+ {
+ vtr_op = L2_VTR_POP_2;
+
+ }
+ else if (unformat (input, "push dot1q %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_PUSH_2;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "push dot1ad %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_PUSH_2;
+
+ }
+ else if (unformat (input, "push dot1q %d", &tag1))
+ {
+ vtr_op = L2_VTR_PUSH_1;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "push dot1ad %d", &tag1))
+ {
+ vtr_op = L2_VTR_PUSH_1;
+
+ }
+ else if (unformat (input, "translate 1-1 dot1q %d", &tag1))
+ {
+ vtr_op = L2_VTR_TRANSLATE_1_1;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "translate 1-1 dot1ad %d", &tag1))
+ {
+ vtr_op = L2_VTR_TRANSLATE_1_1;
+
+ }
+ else if (unformat (input, "translate 2-1 dot1q %d", &tag1))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "translate 2-1 dot1ad %d", &tag1))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+
+ }
+ else if (unformat (input, "translate 2-2 dot1q %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_2;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "translate 2-2 dot1ad %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_2;
+
+ }
+ else if (unformat (input, "translate 1-2 dot1q %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_TRANSLATE_1_2;
+ push_dot1q = 1;
+ }
+ else if (unformat (input, "translate 1-2 dot1ad %d %d", &tag1, &tag2))
+ {
+ vtr_op = L2_VTR_TRANSLATE_1_2;
+
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting [disable | pop 1 | pop 2 | push {dot1q|dot1ah} <tag> [<tag>]\n"
+ " | translate {1|2}-{1|2} {dot1q|dot1ah} <tag> [<tag>]] but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (l2vtr_configure (vm, vnm, sw_if_index, vtr_op, push_dot1q, tag1, tag2))
+ {
+ error =
+ clib_error_return (0,
+ "vlan tag rewrite is not compatible with interface");
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * VLAN tag rewrite provides the ability to change the VLAN tags on a packet.
+ * Existing tags can be popped, new tags can be pushed, and existing tags can
+ * be swapped with new tags. The rewrite feature is attached to a subinterface
+ * as input and output operations. The input operation is explicitly configured.
+ * The output operation is the symmetric opposite and is automatically derived
+ * from the input operation.
+ *
+ * <b>POP:</b> For pop operations, the subinterface encapsulation (the vlan
+ * tags specified when it was created) must have at least the number of popped
+ * tags. e.g. the \"pop 2\" operation would be rejected on a single-vlan interface.
+ * The output tag-rewrite operation for pops is to push the specified number of
+ * vlan tags onto the packet. The pushed tag values are the ones in the
+ * subinterface encapsulation.
+ *
+ * <b>PUSH:</b> For push operations, the ethertype is also specified. The
+ * output tag-rewrite operation for pushes is to pop the same number of tags
+ * off the packet. If the packet doesn't have enough tags it is dropped.
+ *
+ *
+ * @cliexpar
+ * @parblock
+ * By default a subinterface has no tag-rewrite. To return a subinterface to
+ * this state use:
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 disable}
+ *
+ * To pop vlan tags off packets received from a subinterface, use:
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 pop 1}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 pop 2}
+ *
+ * To push one or two vlan tags onto packets received from an interface, use:
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 push dot1q 100}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 push dot1ad 100 150}
+ *
+ * Tags can also be translated, which is basically a combination of a pop and push.
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 1-1 dot1ad 100}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 2-2 dot1ad 100 150}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 1-2 dot1q 100}
+ * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 2-1 dot1q 100 150}
+ *
+ * To display the VLAN Tag settings, show the associate bridge-domain:
+ * @cliexstart{show bridge-domain 200 detail}
+ * ID Index Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf
+ * 200 1 on on on on off N/A
+ *
+ * Interface Index SHG BVI VLAN-Tag-Rewrite
+ * GigabitEthernet0/8/0.200 5 0 - trans-1-1 dot1ad 100
+ * GigabitEthernet0/9/0.200 4 0 - none
+ * GigabitEthernet0/a/0.200 6 0 - none
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = {
+ .path = "set interface l2 tag-rewrite",
+ .short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]",
+ .function = int_l2_vtr,
+};
+/* *INDENT-ON* */
+
+/**
+ * Set subinterface pbb vtr enable/disable.
+ * The CLI format is:
+ * set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]
+ */
+static clib_error_t *
+int_l2_pbb_vtr (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, tmp;
+ u32 vtr_op = L2_VTR_DISABLED;
+ u32 outer_tag = 0;
+ u8 dmac[6];
+ u8 smac[6];
+ u8 dmac_set = 0, smac_set = 0;
+ u16 b_vlanid = 0;
+ u32 s_id = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user
+ (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "disable"))
+ vtr_op = L2_VTR_DISABLED;
+ else if (vtr_op == L2_VTR_DISABLED && unformat (input, "pop"))
+ vtr_op = L2_VTR_POP_2;
+ else if (vtr_op == L2_VTR_DISABLED && unformat (input, "push"))
+ vtr_op = L2_VTR_PUSH_2;
+ else if (vtr_op == L2_VTR_DISABLED
+ && unformat (input, "translate_pbb_stag %d", &outer_tag))
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+ else if (unformat (input, "dmac %U", unformat_ethernet_address, dmac))
+ dmac_set = 1;
+ else if (unformat (input, "smac %U", unformat_ethernet_address, smac))
+ smac_set = 1;
+ else if (unformat (input, "b_vlanid %d", &tmp))
+ b_vlanid = tmp;
+ else if (unformat (input, "s_id %d", &s_id))
+ ;
+ else
+ {
+ error = clib_error_return (0,
+ "expecting [disable | pop | push | translate_pbb_stag <outer_tag>\n"
+ "dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]");
+ goto done;
+ }
+ }
+
+ if ((vtr_op == L2_VTR_PUSH_2 || vtr_op == L2_VTR_TRANSLATE_2_1)
+ && (!dmac_set || !smac_set || s_id == ~0))
+ {
+ error = clib_error_return (0,
+ "expecting dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]");
+ goto done;
+ }
+
+ if (l2pbb_configure
+ (vm, vnm, sw_if_index, vtr_op, dmac, smac, b_vlanid, s_id, outer_tag))
+ {
+ error =
+ clib_error_return (0,
+ "pbb tag rewrite is not compatible with interface");
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (int_l2_pbb_vtr_cli, static) = {
+ .path = "set interface l2 pbb-tag-rewrite",
+ .short_help = "set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]",
+ .function = int_l2_pbb_vtr,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_vtr.h b/src/vnet/l2/l2_vtr.h
new file mode 100644
index 00000000000..893b2272b04
--- /dev/null
+++ b/src/vnet/l2/l2_vtr.h
@@ -0,0 +1,270 @@
+/*
+ * l2_vtr.h : layer 2 vlan tag rewrite processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_vtr_h
+#define included_vnet_l2_vtr_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/l2_vtr.h>
+
+/* VTR config options for API and CLI support */
+typedef enum
+{
+ L2_VTR_DISABLED,
+ L2_VTR_PUSH_1,
+ L2_VTR_PUSH_2,
+ L2_VTR_POP_1,
+ L2_VTR_POP_2,
+ L2_VTR_TRANSLATE_1_1,
+ L2_VTR_TRANSLATE_1_2,
+ L2_VTR_TRANSLATE_2_1,
+ L2_VTR_TRANSLATE_2_2
+} l2_vtr_op_t;
+
+/**
+ * Per-interface vlan tag rewrite configuration
+ * There will be one instance of this struct for each sw_if_index
+ * for both input vtr and output vtr
+ */
+typedef struct
+{
+ union
+ {
+ /*
+ * Up to two vlan tags to push.
+ * if there is only one vlan tag to push, it is in tags[1].
+ */
+ ethernet_vlan_header_tv_t tags[2];
+ u64 raw_tags;
+ };
+
+ union
+ {
+ struct
+ {
+ u8 push_bytes; /* number of bytes to push for up to 2 vlans (0,4,8) */
+ u8 pop_bytes; /* number of bytes to pop for up to 2 vlans (0,4,8) */
+ };
+ u16 push_and_pop_bytes; /* if 0 then the feature is disabled */
+ };
+} vtr_config_t;
+
+
+/**
+ * Perform the configured tag rewrite on the packet.
+ * Return 0 if ok, 1 if packet should be dropped (e.g. tried to pop
+ * too many tags)
+ */
+always_inline u32
+l2_vtr_process (vlib_buffer_t * b0, vtr_config_t * config)
+{
+ u64 temp_8;
+ u32 temp_4;
+ u8 *eth;
+
+ eth = vlib_buffer_get_current (b0);
+
+ /* copy the 12B dmac and smac to a temporary location */
+ temp_8 = *((u64 *) eth);
+ temp_4 = *((u32 *) (eth + 8));
+
+ /* adjust for popped tags */
+ eth += config->pop_bytes;
+
+ /* if not enough tags to pop then drop packet */
+ if (PREDICT_FALSE ((vnet_buffer (b0)->l2.l2_len - 12) < config->pop_bytes))
+ {
+ return 1;
+ }
+
+ /* copy the 2 new tags to the start of the packet */
+ *((u64 *) (eth + 12 - 8)) = config->raw_tags;
+
+ /* TODO: set cos bits */
+
+ /* adjust for pushed tags: */
+ eth -= config->push_bytes;
+
+ /* copy the 12 dmac and smac back to the packet */
+ *((u64 *) eth) = temp_8;
+ *((u32 *) (eth + 8)) = temp_4;
+
+ /* Update l2_len */
+ vnet_buffer (b0)->l2.l2_len +=
+ (word) config->push_bytes - (word) config->pop_bytes;
+
+ /* Update vlan tag count */
+ ethernet_buffer_adjust_vlan_count_by_bytes (b0,
+ (word) config->push_bytes -
+ (word) config->pop_bytes);
+
+ /* Update packet len */
+ vlib_buffer_advance (b0,
+ (word) config->pop_bytes - (word) config->push_bytes);
+
+ return 0;
+}
+
+
+/*
+ * Perform the egress pre-vlan tag rewrite EFP Filter check.
+ * The post-vlan tag rewrite check is a separate graph node.
+ *
+ * This check insures that a packet being output to an interface
+ * (before output vtr is performed) has vlan tags that match those
+ * on a packet received from that interface (after vtr has been performed).
+ * This means verifying that any tags pushed by input vtr are present
+ * on the packet.
+ *
+ * Return 0 if ok, 1 if packet should be dropped.
+ * This function should be passed the input vtr config for the interface.
+ */
+always_inline u8
+l2_efp_filter_process (vlib_buffer_t * b0, vtr_config_t * in_config)
+{
+ u8 *eth;
+ u64 packet_tags;
+ u64 tag_mask;
+
+ eth = vlib_buffer_get_current (b0);
+
+ /*
+ * If there are 2 tags pushed, they must match config->tags[0] and
+ * config->tags[1].
+ * If there is one tag pushed, it must match config->tag[1].
+ * If there are 0 tags pushed, the check passes.
+ */
+
+ /* mask for two vlan id and ethertypes, no cos bits */
+ tag_mask = clib_net_to_host_u64 (0xFFFF0FFFFFFF0FFF);
+ /* mask for one vlan id and ethertype, no cos bits */
+ tag_mask =
+ (in_config->push_bytes ==
+ 4) ? clib_net_to_host_u64 (0xFFFF0FFF) : tag_mask;
+ /* mask for always match */
+ tag_mask = (in_config->push_bytes == 0) ? 0 : tag_mask;
+
+ /*
+ * Read 8B from the packet, getting the proper set of vlan tags
+ * For 0 push bytes, the address doesn't matter since the mask
+ * clears the data to 0.
+ */
+ packet_tags = *((u64 *) (eth + 4 + in_config->push_bytes));
+
+ /* Check if the packet tags match the configured tags */
+ return (packet_tags & tag_mask) != in_config->raw_tags;
+}
+
+typedef struct
+{
+ union
+ {
+ ethernet_pbb_header_t macs_tags;
+ struct
+ {
+ u64 data1;
+ u64 data2;
+ u16 data3;
+ u32 data4;
+ } raw_data;
+ };
+ union
+ {
+ struct
+ {
+ u8 push_bytes; /* number of bytes to push pbb tags */
+ u8 pop_bytes; /* number of bytes to pop pbb tags */
+ };
+ u16 push_and_pop_bytes; /* if 0 then the feature is disabled */
+ };
+} ptr_config_t;
+
+always_inline u32
+l2_pbb_process (vlib_buffer_t * b0, ptr_config_t * config)
+{
+ u8 *eth = vlib_buffer_get_current (b0);
+
+ if (config->pop_bytes > 0)
+ {
+ ethernet_pbb_header_packed_t *ph = (ethernet_pbb_header_packed_t *) eth;
+
+ // drop packet without PBB header or with wrong I-tag or B-tag
+ if (clib_net_to_host_u16 (ph->priority_dei_id) !=
+ clib_net_to_host_u16 (config->macs_tags.priority_dei_id)
+ || clib_net_to_host_u32 (ph->priority_dei_uca_res_sid) !=
+ clib_net_to_host_u32 (config->macs_tags.priority_dei_uca_res_sid))
+ return 1;
+
+ eth += config->pop_bytes;
+ }
+
+ if (config->push_bytes > 0)
+ {
+ eth -= config->push_bytes;
+ // copy the B-DA (6B), B-SA (6B), B-TAG (4B), I-TAG (6B)
+ *((u64 *) eth) = config->raw_data.data1;
+ *((u64 *) (eth + 8)) = config->raw_data.data2;
+ *((u16 *) (eth + 16)) = config->raw_data.data3;
+ *((u32 *) (eth + 18)) = config->raw_data.data4;
+ }
+
+ /* Update l2_len */
+ vnet_buffer (b0)->l2.l2_len +=
+ (word) config->push_bytes - (word) config->pop_bytes;
+ /* Update packet len */
+ vlib_buffer_advance (b0,
+ (word) config->pop_bytes - (word) config->push_bytes);
+
+ return 0;
+}
+
+u32 l2pbb_configure (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main, u32 sw_if_index, u32 vtr_op,
+ u8 * b_dmac, u8 * b_smac,
+ u16 b_vlanid, u32 i_sid, u16 vlan_outer_tag);
+
+/**
+ * Configure vtag tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32 l2vtr_configure (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 vtr_op, u32 push_dot1q, u32 vtr_tag1, u32 vtr_tag2);
+
+/**
+ * Get vtag tag rewrite on the given interface.
+ * Return 1 if there is an error, 0 if ok
+ */
+u32 l2vtr_get (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 * vtr_op,
+ u32 * push_dot1q, u32 * vtr_tag1, u32 * vtr_tag2);
+
+#endif /* included_vnet_l2_vtr_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_xcrw.c b/src/vnet/l2/l2_xcrw.c
new file mode 100644
index 00000000000..70610a853d3
--- /dev/null
+++ b/src/vnet/l2/l2_xcrw.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/l2/l2_xcrw.h>
+
+/**
+ * @file
+ * General L2 / L3 cross-connect, used to set up
+ * "L2 interface <--> your-favorite-tunnel-encap" tunnels.
+ *
+ * We set up a typical L2 cross-connect or (future) bridge
+ * to hook L2 interface(s) up to the L3 stack in arbitrary ways.
+ *
+ * Each l2_xcrw adjacency specifies 3 things:
+ *
+ * 1. The next graph node (presumably in the L3 stack) to
+ * process the (L2 -> L3) packet
+ *
+ * 2. A new value for vnet_buffer(b)->sw_if_index[VLIB_TX]
+ * (i.e. a lookup FIB index),
+ *
+ * 3. A rewrite string to apply.
+ *
+ * Example: to cross-connect an L2 interface or (future) bridge
+ * to an mpls-o-gre tunnel, set up the L2 rewrite string as shown in
+ * mpls_gre_rewrite, and use "mpls-post-rewrite" to fix the
+ * GRE IP header checksum and length fields.
+ */
+
+typedef struct
+{
+ u32 next_index;
+ u32 tx_fib_index;
+} l2_xcrw_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_l2_xcrw_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_xcrw_trace_t *t = va_arg (*args, l2_xcrw_trace_t *);
+
+ s = format (s, "L2_XCRW: next index %d tx_fib_index %d",
+ t->next_index, t->tx_fib_index);
+ return s;
+}
+
+l2_xcrw_main_t l2_xcrw_main;
+
+static vlib_node_registration_t l2_xcrw_node;
+
+static char *l2_xcrw_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_xcrw_error
+#undef _
+};
+
+static uword
+l2_xcrw_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2_xcrw_next_t next_index;
+ l2_xcrw_main_t *xcm = &l2_xcrw_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_xcrw_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ l2_xcrw_adjacency_t *adj0, *adj1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0);
+ adj1 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index1);
+
+ next0 = adj0->rewrite_header.next_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ adj0->rewrite_header.sw_if_index;
+
+ next1 = adj1->rewrite_header.next_index;
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] =
+ adj1->rewrite_header.sw_if_index;
+
+ em->counters[node_counter_base_index + next1]++;
+
+ if (PREDICT_TRUE (next0 > 0))
+ {
+ u8 *h0 = vlib_buffer_get_current (b0);
+ vnet_rewrite_one_header (adj0[0], h0,
+ adj0->rewrite_header.data_bytes);
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+ em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++;
+ }
+
+ if (PREDICT_TRUE (next1 > 0))
+ {
+ u8 *h1 = vlib_buffer_get_current (b1);
+ vnet_rewrite_one_header (adj1[0], h1,
+ adj1->rewrite_header.data_bytes);
+ vlib_buffer_advance (b1, -adj1->rewrite_header.data_bytes);
+ em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++;
+ }
+
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_xcrw_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->tx_fib_index = adj0->rewrite_header.sw_if_index;
+ }
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_xcrw_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ t->tx_fib_index = adj1->rewrite_header.sw_if_index;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ l2_xcrw_adjacency_t *adj0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0);
+
+ next0 = adj0->rewrite_header.next_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ adj0->rewrite_header.sw_if_index;
+
+ if (PREDICT_TRUE (next0 > 0))
+ {
+ u8 *h0 = vlib_buffer_get_current (b0);
+ vnet_rewrite_one_header (adj0[0], h0,
+ adj0->rewrite_header.data_bytes);
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+ em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_xcrw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->tx_fib_index = adj0->rewrite_header.sw_if_index;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (l2_xcrw_node, static) = {
+ .function = l2_xcrw_node_fn,
+ .name = "l2-xcrw",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_xcrw_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_xcrw_error_strings),
+ .error_strings = l2_xcrw_error_strings,
+
+ .n_next_nodes = L2_XCRW_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_XCRW_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_xcrw_node, l2_xcrw_node_fn)
+ clib_error_t *l2_xcrw_init (vlib_main_t * vm)
+{
+ l2_xcrw_main_t *mp = &l2_xcrw_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = &vnet_main;
+ mp->tunnel_index_by_l2_sw_if_index = hash_create (0, sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_xcrw_init);
+
+static uword
+dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static u8 *
+format_xcrw_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "xcrw%d", dev_instance);
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (xcrw_device_class,static) = {
+ .name = "Xcrw",
+ .format_device_name = format_xcrw_name,
+ .tx_function = dummy_interface_tx,
+};
+/* *INDENT-ON* */
+
+/* Create a sham tunnel interface and return its sw_if_index */
+static u32
+create_xcrw_interface (vlib_main_t * vm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ static u32 instance;
+ u8 address[6];
+ u32 hw_if_index;
+ vnet_hw_interface_t *hi;
+ u32 sw_if_index;
+
+ /* mac address doesn't really matter */
+ memset (address, 0, sizeof (address));
+ address[2] = 0x12;
+
+ /* can returns error iff phy != 0 */
+ (void) ethernet_register_interface
+ (vnm, xcrw_device_class.index, instance++, address, &hw_if_index,
+ /* flag change */ 0);
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ sw_if_index = hi->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ /* Output to the sham tunnel invokes the encap node */
+ hi->output_node_index = l2_xcrw_node.index;
+
+ return sw_if_index;
+}
+
+int
+vnet_configure_l2_xcrw (vlib_main_t * vm, vnet_main_t * vnm,
+ u32 l2_sw_if_index, u32 tx_fib_index,
+ u8 * rewrite, u32 next_node_index, int is_add)
+{
+ l2_xcrw_main_t *xcm = &l2_xcrw_main;
+ l2_xcrw_adjacency_t *a;
+ l2_xcrw_tunnel_t *t;
+ uword *p;
+
+ if (is_add)
+ {
+
+ pool_get (xcm->tunnels, t);
+
+ /* No interface allocated? Do it. Otherwise, set admin up */
+ if (t->tunnel_sw_if_index == 0)
+ t->tunnel_sw_if_index = create_xcrw_interface (vm);
+ else
+ vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ t->l2_sw_if_index = l2_sw_if_index;
+
+ vec_validate (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+
+ a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+ memset (a, 0, sizeof (*a));
+
+ a->rewrite_header.sw_if_index = tx_fib_index;
+
+ /*
+ * Add or find a dynamic disposition for the successor node,
+ * e.g. so we can ship pkts to mpls_post_rewrite...
+ */
+ a->rewrite_header.next_index =
+ vlib_node_add_next (vm, l2_xcrw_node.index, next_node_index);
+
+ if (vec_len (rewrite))
+ vnet_rewrite_set_data (a[0], rewrite, vec_len (rewrite));
+
+ set_int_l2_mode (vm, vnm, MODE_L2_XC, t->l2_sw_if_index, 0, 0, 0,
+ t->tunnel_sw_if_index);
+ hash_set (xcm->tunnel_index_by_l2_sw_if_index,
+ t->l2_sw_if_index, t - xcm->tunnels);
+ return 0;
+ }
+ else
+ {
+ p = hash_get (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index);
+ if (p == 0)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ t = pool_elt_at_index (xcm->tunnels, p[0]);
+
+ a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+ /* Reset adj to drop traffic */
+ memset (a, 0, sizeof (*a));
+
+ set_int_l2_mode (vm, vnm, MODE_L3, t->l2_sw_if_index, 0, 0, 0, 0);
+
+ vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index, 0 /* down */ );
+
+ hash_unset (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index);
+ pool_put (xcm->tunnels, t);
+ }
+ return 0;
+}
+
+
+static clib_error_t *
+set_l2_xcrw_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int is_add = 1;
+ int is_ipv6 = 0; /* for fib id -> fib index mapping */
+ u32 tx_fib_id = ~0;
+ u32 tx_fib_index = ~0;
+ u32 next_node_index = ~0;
+ u32 l2_sw_if_index;
+ u8 *rw = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv;
+
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (!unformat (line_input, "%U",
+ unformat_vnet_sw_interface, vnm, &l2_sw_if_index))
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "next %U",
+ unformat_vlib_node, vm, &next_node_index))
+ ;
+ else if (unformat (line_input, "tx-fib-id %d", &tx_fib_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (line_input, "rw %U", unformat_hex_string, &rw));
+ else
+ break;
+ }
+
+ if (next_node_index == ~0)
+ return clib_error_return (0, "next node not specified");
+
+ if (tx_fib_id != ~0)
+ {
+ uword *p;
+
+ if (is_ipv6)
+ p = hash_get (ip6_main.fib_index_by_table_id, tx_fib_id);
+ else
+ p = hash_get (ip4_main.fib_index_by_table_id, tx_fib_id);
+
+ if (p == 0)
+ return clib_error_return (0, "nonexistent tx_fib_id %d", tx_fib_id);
+
+ tx_fib_index = p[0];
+ }
+
+ rv = vnet_configure_l2_xcrw (vm, vnm, l2_sw_if_index, tx_fib_index,
+ rw, next_node_index, is_add);
+
+ switch (rv)
+ {
+
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "%U not cross-connected",
+ format_vnet_sw_if_index_name,
+ vnm, l2_sw_if_index);
+ default:
+ return clib_error_return (0, "vnet_configure_l2_xcrw returned %d", rv);
+ }
+
+ vec_free (rw);
+
+ return 0;
+}
+
+/*?
+ * Add or delete a Layer 2 to Layer 3 rewrite cross-connect. This is
+ * used to hook Layer 2 interface(s) up to the Layer 3 stack in
+ * arbitrary ways. For example, cross-connect an L2 interface or
+ * (future) bridge to an mpls-o-gre tunnel. Set up the L2 rewrite
+ * string as shown in mpls_gre_rewrite, and use \"mpls-post-rewrite\"
+ * to fix the GRE IP header checksum and length fields.
+ *
+ * @cliexpar
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = {
+ .path = "set interface l2 xcrw",
+ .short_help =
+ "set interface l2 xcrw <interface> next <node-name>\n"
+ " [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>",
+ .function = set_l2_xcrw_command_fn,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_l2xcrw (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ l2_xcrw_tunnel_t *t = va_arg (*args, l2_xcrw_tunnel_t *);
+ l2_xcrw_main_t *xcm = &l2_xcrw_main;
+ vlib_main_t *vm = vlib_get_main ();
+ l2_xcrw_adjacency_t *a;
+ u8 *rewrite_string;
+
+ if (t == 0)
+ {
+ s = format (s, "%-25s%s", "L2 interface", "Tunnel Details");
+ return s;
+ }
+
+ s = format (s, "%-25U %U ",
+ format_vnet_sw_if_index_name, vnm, t->l2_sw_if_index,
+ format_vnet_sw_if_index_name, vnm, t->tunnel_sw_if_index);
+
+ a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+
+ s = format (s, "next %U ",
+ format_vlib_next_node_name, vm, l2_xcrw_node.index,
+ a->rewrite_header.next_index);
+
+ if (a->rewrite_header.sw_if_index != ~0)
+ s = format (s, "tx fib index %d ", a->rewrite_header.sw_if_index);
+
+ if (a->rewrite_header.data_bytes)
+ {
+ rewrite_string = (u8 *) (a + 1);
+ rewrite_string -= a->rewrite_header.data_bytes;
+ s = format (s, "rewrite data: %U ",
+ format_hex_bytes, rewrite_string,
+ a->rewrite_header.data_bytes);
+ }
+
+ s = format (s, "\n");
+
+ return s;
+}
+
+
+static clib_error_t *
+show_l2xcrw_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ l2_xcrw_main_t *xcm = &l2_xcrw_main;
+ l2_xcrw_tunnel_t *t;
+
+ if (pool_elts (xcm->tunnels) == 0)
+ {
+ vlib_cli_output (vm, "No L2 / L3 rewrite cross-connects configured");
+ return 0;
+ }
+
+ vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0);
+
+ /* *INDENT-OFF* */
+ pool_foreach (t, xcm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/*?
+ * Display a Layer 2 to Layer 3 rewrite cross-connect. This is used to
+ * hook Layer 2 interface(s) up to the Layer 3 stack in arbitrary ways.
+ *
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_l2xcrw_command, static) = {
+ .path = "show l2xcrw",
+ .short_help = "show l2xcrw",
+ .function = show_l2xcrw_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_xcrw.h b/src/vnet/l2/l2_xcrw.h
new file mode 100644
index 00000000000..ca80aae9929
--- /dev/null
+++ b/src/vnet/l2/l2_xcrw.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_l2_xcrw_h__
+#define __included_l2_xcrw_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+
+typedef struct
+{
+ /*
+ * Let: rewrite_header.sw_if_index = tx_fib_index or ~0.
+ * rewrite_header.next_index = L2_XCRW_NEXT_XXX
+ */
+ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
+} l2_xcrw_adjacency_t;
+
+typedef struct
+{
+ /* L2 interface */
+ u32 l2_sw_if_index;
+
+ /* Tunnel interface */
+ u32 tunnel_sw_if_index; /* This field remains set in freed pool elts */
+
+} l2_xcrw_tunnel_t;
+
+typedef struct
+{
+ u32 cached_next_index;
+
+ /* Vector of cross-connect rewrites */
+ l2_xcrw_adjacency_t *adj_by_sw_if_index;
+
+ /* Pool of xcrw tunnels */
+ l2_xcrw_tunnel_t *tunnels;
+
+ /* Tunnel index by tunnel sw_if_index */
+ uword *tunnel_index_by_l2_sw_if_index;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} l2_xcrw_main_t;
+
+typedef enum
+{
+ L2_XCRW_NEXT_DROP,
+ L2_XCRW_N_NEXT,
+} l2_xcrw_next_t;
+
+#define foreach_l2_xcrw_error \
+_(DROP, "Packets dropped") \
+_(FWD, "Packets forwarded")
+
+typedef enum
+{
+#define _(sym,str) L2_XCRW_ERROR_##sym,
+ foreach_l2_xcrw_error
+#undef _
+ L2_XCRW_N_ERROR,
+} l2_xcrw_error_t;
+
+#endif /* __included_l2_xcrw_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */