aboutsummaryrefslogtreecommitdiffstats
path: root/vnet/vnet/l2
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/l2')
-rw-r--r--vnet/vnet/l2/feat_bitmap.c166
-rw-r--r--vnet/vnet/l2/feat_bitmap.h80
-rw-r--r--vnet/vnet/l2/l2_bd.c695
-rw-r--r--vnet/vnet/l2/l2_bd.h120
-rw-r--r--vnet/vnet/l2/l2_bvi.c35
-rw-r--r--vnet/vnet/l2/l2_bvi.h122
-rw-r--r--vnet/vnet/l2/l2_classify.c551
-rw-r--r--vnet/vnet/l2/l2_classify.h76
-rw-r--r--vnet/vnet/l2/l2_efp_filter.c572
-rw-r--r--vnet/vnet/l2/l2_efp_filter.h28
-rw-r--r--vnet/vnet/l2/l2_fib.c567
-rw-r--r--vnet/vnet/l2/l2_fib.h226
-rw-r--r--vnet/vnet/l2/l2_flood.c520
-rw-r--r--vnet/vnet/l2/l2_flood.h28
-rw-r--r--vnet/vnet/l2/l2_fwd.c446
-rw-r--r--vnet/vnet/l2/l2_fwd.h29
-rw-r--r--vnet/vnet/l2/l2_input.c963
-rw-r--r--vnet/vnet/l2/l2_input.h279
-rw-r--r--vnet/vnet/l2/l2_input_acl.c427
-rw-r--r--vnet/vnet/l2/l2_input_vtr.c314
-rw-r--r--vnet/vnet/l2/l2_input_vtr.h43
-rw-r--r--vnet/vnet/l2/l2_learn.c504
-rw-r--r--vnet/vnet/l2/l2_learn.h47
-rw-r--r--vnet/vnet/l2/l2_output.c541
-rw-r--r--vnet/vnet/l2/l2_output.h219
-rw-r--r--vnet/vnet/l2/l2_output_acl.c335
-rw-r--r--vnet/vnet/l2/l2_patch.c432
-rw-r--r--vnet/vnet/l2/l2_vtr.c448
-rw-r--r--vnet/vnet/l2/l2_vtr.h167
-rw-r--r--vnet/vnet/l2/l2_xcrw.c559
-rw-r--r--vnet/vnet/l2/l2_xcrw.h78
31 files changed, 9617 insertions, 0 deletions
diff --git a/vnet/vnet/l2/feat_bitmap.c b/vnet/vnet/l2/feat_bitmap.c
new file mode 100644
index 00000000000..74917cda3ae
--- /dev/null
+++ b/vnet/vnet/l2/feat_bitmap.c
@@ -0,0 +1,166 @@
+/*
+ * feat_bitmap.c: bitmap for managing feature invocation
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+
+// Drop node for feature bitmaps
+// For features that just do a drop, or are not yet implemented.
+// Initial feature dispatch nodes don't need to set b0->error
+// in case of a possible drop because that will be done here.
+// The next node is always error-drop.
+
+
+static vlib_node_registration_t feat_bitmap_drop_node;
+
+#define foreach_feat_bitmap_drop_error \
+_(NO_FWD, "L2 feature forwarding disabled") \
+_(NYI, "L2 feature not implemented")
+
+typedef enum {
+#define _(sym,str) FEAT_BITMAP_DROP_ERROR_##sym,
+ foreach_feat_bitmap_drop_error
+#undef _
+ FEAT_BITMAP_DROP_N_ERROR,
+} feat_bitmap_drop_error_t;
+
+static char * feat_bitmap_drop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_feat_bitmap_drop_error
+#undef _
+};
+
+typedef enum {
+ FEAT_BITMAP_DROP_NEXT_DROP,
+ FEAT_BITMAP_DROP_N_NEXT,
+} feat_bitmap_drop_next_t;
+
+typedef struct {
+ u32 feature_bitmap;
+} feat_bitmap_drop_trace_t;
+
+/* packet trace format function */
+static u8 * format_feat_bitmap_drop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ feat_bitmap_drop_trace_t * t = va_arg (*args, feat_bitmap_drop_trace_t *);
+
+ s = format (s, "feat_bitmap_drop: feature bitmap 0x%08x", t->feature_bitmap);
+ return s;
+}
+
+static uword
+feat_bitmap_drop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ feat_bitmap_drop_next_t next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
+ feat_bitmap_drop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->feature_bitmap = vnet_buffer(b0)->l2.feature_bitmap;
+ }
+
+ if (vnet_buffer(b0)->l2.feature_bitmap == 1) {
+ // If we are executing the last feature, this is the
+ // No forwarding catch-all
+ b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NO_FWD];
+ } else {
+ b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NYI];
+ }
+ next0 = FEAT_BITMAP_DROP_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+clib_error_t *feat_bitmap_drop_init (vlib_main_t *vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (feat_bitmap_drop_init);
+
+VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = {
+ .function = feat_bitmap_drop_node_fn,
+ .name = "feature-bitmap-drop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_feat_bitmap_drop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(feat_bitmap_drop_error_strings),
+ .error_strings = feat_bitmap_drop_error_strings,
+
+ .n_next_nodes = FEAT_BITMAP_DROP_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop",
+ },
+};
+
+
diff --git a/vnet/vnet/l2/feat_bitmap.h b/vnet/vnet/l2/feat_bitmap.h
new file mode 100644
index 00000000000..7dd36a7712e
--- /dev/null
+++ b/vnet/vnet/l2/feat_bitmap.h
@@ -0,0 +1,80 @@
+/*
+ * feat_bitmap.h: bitmap for managing feature invocation
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_feat_bitmap_h
+#define included_vnet_l2_feat_bitmap_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+/*
+ * The feature bitmap is a way of organizing input and output feature graph nodes.
+ * The set of features to be executed are arranged in a bitmap with one bit per
+ * feature and each bit positioned in the same order that the features should be
+ * executed. Features can be dynamically removed from the set by masking off their
+ * corresponding bits. The bitmap is stored in packet context. Each feature clears
+ * its bit and then calls feat_bitmap_get_next_node_index() to go to the next
+ * graph node.
+ */
+
+
+// 32 features in a u32 bitmap
+#define FEAT_MAX 32
+
+// Initialize the feature next-node indexes of a graph node.
+// Should be called by the init function of each feature graph node.
+always_inline
+void feat_bitmap_init_next_nodes (
+ vlib_main_t * vm,
+ u32 node_index, // the current graph node index
+ u32 num_features, // number of entries in feat_names
+ char ** feat_names, // array of feature graph node names
+ u32 * next_nodes) // array of 32 next indexes to init
+{
+ u32 idx;
+
+ ASSERT(num_features <= FEAT_MAX);
+
+ for (idx=0; idx<num_features; idx++) {
+ if (vlib_get_node_by_name(vm, (u8 *) feat_names[idx])) {
+ next_nodes[idx] =
+ vlib_node_add_named_next(vm, node_index, feat_names[idx]);
+ } else { // Node may be in plugin which is not installed, use drop node
+ next_nodes[idx] =
+ vlib_node_add_named_next(vm, node_index, "feature-bitmap-drop");
+ }
+ }
+
+ // All unassigned bits go to the drop node
+ for (; idx<FEAT_MAX; idx++) {
+ next_nodes[idx] = vlib_node_add_named_next(vm, node_index, "feature-bitmap-drop");
+ }
+}
+
+// Return the graph node index for the feature corresponding to the
+// first set bit in the bitmap.
+always_inline
+u32 feat_bitmap_get_next_node_index (u32 * next_nodes, u32 bitmap)
+{
+ u32 first_bit;
+
+ count_leading_zeros(first_bit, bitmap);
+ first_bit = uword_bits - 1 - first_bit;
+ return next_nodes[first_bit];
+}
+
+#endif // included_vnet_l2_feat_bitmap_h
diff --git a/vnet/vnet/l2/l2_bd.c b/vnet/vnet/l2/l2_bd.c
new file mode 100644
index 00000000000..24f96d5749c
--- /dev/null
+++ b/vnet/vnet/l2/l2_bd.c
@@ -0,0 +1,695 @@
+/*
+ * l2_bd.c : layer 2 bridge domain
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vlib/cli.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/format.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/vec.h>
+
+bd_main_t bd_main;
+
+// Init bridge domain if not done already
+// For feature bitmap, set all bits except ARP termination
+inline void
+bd_validate (l2_bridge_domain_t * bd_config)
+{
+ if (!bd_is_valid (bd_config)) {
+ bd_config->feature_bitmap = ~L2INPUT_FEAT_ARP_TERM;
+ bd_config->bvi_sw_if_index = ~0;
+ bd_config->members = 0;
+ bd_config->mac_by_ip4 = 0;
+// bd_config->mac_by_ip6 = hash_create_mem (0, sizeof(ip6_address_t),
+// sizeof(uword));
+ }
+}
+
+u32 bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id)
+{
+ uword * p;
+ u32 rv;
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ return (p[0]);
+
+ rv = clib_bitmap_first_clear (bdm->bd_index_bitmap);
+
+ // mark this index busy
+ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, rv, 1);
+
+ hash_set (bdm->bd_index_by_bd_id, bd_id, rv);
+
+ vec_validate (l2input_main.bd_configs, rv);
+ l2input_main.bd_configs[rv].bd_id = bd_id;
+
+ return rv;
+}
+
+int bd_delete_bd_index (bd_main_t * bdm, u32 bd_id)
+{
+ uword * p;
+ u32 bd_index;
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p == 0)
+ return -1;
+
+ bd_index = p[0];
+
+ // mark this index clear
+ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, bd_index, 0);
+ hash_unset (bdm->bd_index_by_bd_id, bd_id);
+
+ l2input_main.bd_configs[bd_index].bd_id = ~0;
+ l2input_main.bd_configs[bd_index].feature_bitmap = 0;
+
+ return 0;
+}
+
+void
+bd_add_member (l2_bridge_domain_t * bd_config,
+ l2_flood_member_t * member)
+{
+ // Add one element to the vector
+
+ // When flooding, the bvi interface (if present) must be the last member
+ // processed due to how BVI processing can change the packet. To enable
+ // this order, we make the bvi interface the first in the vector and
+ // flooding walks the vector in reverse.
+ if ((member->flags == L2_FLOOD_MEMBER_NORMAL) ||
+ (vec_len(bd_config->members) == 0)) {
+ vec_add1 (bd_config->members, *member);
+
+ } else {
+ // Move 0th element to the end
+ vec_add1 (bd_config->members, bd_config->members[0]);
+ bd_config->members[0] = *member;
+ }
+}
+
+
+#define BD_REMOVE_ERROR_OK 0
+#define BD_REMOVE_ERROR_NOT_FOUND 1
+
+u32
+bd_remove_member (l2_bridge_domain_t * bd_config,
+ u32 sw_if_index)
+{
+ u32 ix;
+
+ // Find and delete the member
+ vec_foreach_index(ix, bd_config->members) {
+ if (vec_elt(bd_config->members, ix).sw_if_index == sw_if_index) {
+ vec_del1 (bd_config->members, ix);
+ return BD_REMOVE_ERROR_OK;
+ }
+ }
+
+ return BD_REMOVE_ERROR_NOT_FOUND;
+}
+
+
+clib_error_t *l2bd_init (vlib_main_t *vm)
+{
+ bd_main_t *bdm = &bd_main;
+ u32 bd_index;
+ bdm->bd_index_by_bd_id = hash_create (0, sizeof(uword));
+ // create a dummy bd with bd_id of 0 and bd_index of 0 with feature set
+ // to packet drop only. Thus, packets received from any L2 interface with
+ // uninitialized bd_index of 0 can be dropped safely.
+ bd_index = bd_find_or_add_bd_index (bdm, 0);
+ ASSERT (bd_index == 0);
+ l2input_main.bd_configs[0].feature_bitmap = L2INPUT_FEAT_DROP;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2bd_init);
+
+
+// Set the learn/forward/flood flags for the bridge domain
+// Return 0 if ok, non-zero if for an error.
+u32
+bd_set_flags (vlib_main_t * vm,
+ u32 bd_index,
+ u32 flags,
+ u32 enable) {
+
+ l2_bridge_domain_t * bd_config;
+ u32 feature_bitmap = 0;
+
+ vec_validate (l2input_main.bd_configs, bd_index);
+ bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index);
+
+ bd_validate (bd_config);
+
+ if (flags & L2_LEARN) {
+ feature_bitmap |= L2INPUT_FEAT_LEARN;
+ }
+ if (flags & L2_FWD) {
+ feature_bitmap |= L2INPUT_FEAT_FWD;
+ }
+ if (flags & L2_FLOOD) {
+ feature_bitmap |= L2INPUT_FEAT_FLOOD;
+ }
+ if (flags & L2_UU_FLOOD) {
+ feature_bitmap |= L2INPUT_FEAT_UU_FLOOD;
+ }
+ if (flags & L2_ARP_TERM) {
+ feature_bitmap |= L2INPUT_FEAT_ARP_TERM;
+ }
+
+ if (enable) {
+ bd_config->feature_bitmap |= feature_bitmap;
+ } else {
+ bd_config->feature_bitmap &= ~feature_bitmap;
+ }
+
+ return 0;
+}
+
+// set bridge-domain learn enable/disable
+// The CLI format is:
+// set bridge-domain learn <bd_id> [disable]
+static clib_error_t *
+bd_learn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bd_main_t * bdm = &bd_main;
+ clib_error_t * error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword * p;
+
+ if (! unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // set the bridge domain flag
+ if (bd_set_flags(vm, bd_index, L2_LEARN, enable)) {
+ error = clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (bd_learn_cli, static) = {
+ .path = "set bridge-domain learn",
+ .short_help = "set bridge-domain learn <bridge-domain-id> [disable]",
+ .function = bd_learn,
+};
+
+// set bridge-domain forward enable/disable
+// The CLI format is:
+// set bridge-domain forward <bd_index> [disable]
+static clib_error_t *
+bd_fwd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bd_main_t * bdm = &bd_main;
+ clib_error_t * error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword * p;
+
+ if (! unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // set the bridge domain flag
+ if (bd_set_flags(vm, bd_index, L2_FWD, enable)) {
+ error = clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (bd_fwd_cli, static) = {
+ .path = "set bridge-domain forward",
+ .short_help = "set bridge-domain forward <bridge-domain-id> [disable]",
+ .function = bd_fwd,
+};
+
+// set bridge-domain flood enable/disable
+// The CLI format is:
+// set bridge-domain flood <bd_index> [disable]
+static clib_error_t *
+bd_flood (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bd_main_t * bdm = &bd_main;
+ clib_error_t * error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword * p;
+
+ if (! unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // set the bridge domain flag
+ if (bd_set_flags(vm, bd_index, L2_FLOOD, enable)) {
+ error = clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (bd_flood_cli, static) = {
+ .path = "set bridge-domain flood",
+ .short_help = "set bridge-domain flood <bridge-domain-id> [disable]",
+ .function = bd_flood,
+};
+
+// set bridge-domain unkown-unicast flood enable/disable
+// The CLI format is:
+// set bridge-domain uu-flood <bd_index> [disable]
+static clib_error_t *
+bd_uu_flood (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bd_main_t * bdm = &bd_main;
+ clib_error_t * error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword * p;
+
+ if (! unformat (input, "%d", &bd_id))
+ {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p == 0)
+ return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ bd_index = p[0];
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // set the bridge domain flag
+ if (bd_set_flags(vm, bd_index, L2_UU_FLOOD, enable)) {
+ error = clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = {
+ .path = "set bridge-domain uu-flood",
+ .short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]",
+ .function = bd_uu_flood,
+};
+
+// set bridge-domain arp term enable/disable
+// The CLI format is:
+// set bridge-domain arp term <bridge-domain-id> [disable]
+static clib_error_t *
+bd_arp_term (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bd_main_t * bdm = &bd_main;
+ clib_error_t * error = 0;
+ u32 bd_index, bd_id;
+ u32 enable;
+ uword * p;
+
+ if (! unformat (input, "%d", &bd_id)) {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p) bd_index = *p;
+ else return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ enable = 1;
+ if (unformat (input, "disable")) enable = 0;
+
+ // set the bridge domain flag
+ if (bd_set_flags(vm, bd_index, L2_ARP_TERM, enable)) {
+ error = clib_error_return (0, "bridge-domain id %d out of range", bd_index);
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (bd_arp_term_cli, static) = {
+ .path = "set bridge-domain arp term",
+ .short_help = "set bridge-domain arp term <bridge-domain-id> [disable]",
+ .function = bd_arp_term,
+};
+
+
+// The clib hash implementation stores uword entries in the hash table.
+// The hash table mac_by_ip4 is keyed via IP4 address and store the
+// 6-byte MAC address directly in the hash table entry uword.
+// This only works for 64-bit processor with 8-byte uword; which means
+// this code *WILL NOT WORK* for a 32-bit prcessor with 4-byte uword.
+u32 bd_add_del_ip_mac(u32 bd_index,
+ u8 *ip_addr,
+ u8 *mac_addr,
+ u8 is_ip6,
+ u8 is_add)
+{
+ l2input_main_t * l2im = &l2input_main;
+ l2_bridge_domain_t * bd_cfg = l2input_bd_config_from_index (l2im, bd_index);
+ u64 new_mac = *(u64 *) mac_addr;
+ u64 * old_mac;
+ u16 * mac16 = (u16 *) &new_mac;
+
+ ASSERT (sizeof(uword) == sizeof(u64)); // make sure uword is 8 bytes
+
+ mac16[3] = 0; // Clear last 2 unsed bytes of the 8-byte MAC address
+ if (is_ip6) {
+ // ip6_address_t ip6_addr = *(ip6_address_t *) ip_addr;
+ return 1; // not yet implemented
+ } else {
+ ip4_address_t ip4_addr = *(ip4_address_t *) ip_addr;
+ old_mac = (u64 *) hash_get (bd_cfg->mac_by_ip4, ip4_addr.as_u32);
+ if (is_add) {
+ if (old_mac && (*old_mac == new_mac)) return 0; // mac entry already exist
+ hash_set (bd_cfg->mac_by_ip4, ip4_addr.as_u32, new_mac);
+ } else {
+ if (old_mac && (*old_mac == new_mac)) { // mac entry match
+ hash_unset (bd_cfg->mac_by_ip4, ip4_addr.as_u32); // clear entry
+ } else {
+ return 1;
+ }
+ }
+ return 0;
+ }
+}
+
+// set bridge-domain arp entry add/delete
+// The CLI format is:
+// set bridge-domain arp entry <bd-id> <ip-addr> <mac-addr> [del]
+static clib_error_t *
+bd_arp_entry (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bd_main_t * bdm = &bd_main;
+ clib_error_t * error = 0;
+ u32 bd_index, bd_id;
+ u8 is_add = 1;
+ u8 is_ip6 = 0;
+ u8 ip_addr[16];
+ u8 mac_addr[6];
+ uword * p;
+
+ if (! unformat (input, "%d", &bd_id)) {
+ error = clib_error_return (0, "expecting bridge-domain id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+
+ if (p) bd_index = *p;
+ else return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ if (unformat (input, "%U", unformat_ip4_address, ip_addr)) {
+ is_ip6 = 0;
+ } else if (unformat (input, "%U", unformat_ip6_address, ip_addr)) {
+ is_ip6 = 1;
+ } else {
+ error = clib_error_return (0, "expecting IP address but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat(input, "%U", unformat_ethernet_address, mac_addr)) {
+ error = clib_error_return (0, "expecting MAC address but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "del")) {
+ is_add = 0;
+ }
+
+ // set the bridge domain flagAdd IP-MAC entry into bridge domain
+ if (bd_add_del_ip_mac(bd_index, ip_addr, mac_addr, is_ip6, is_add)) {
+ error = clib_error_return (0, "MAC %s for IP %U and MAC %U failed",
+ is_add ? "add" : "del",
+ format_ip4_address, ip_addr,
+ format_ethernet_address, mac_addr);
+ }
+
+done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = {
+ .path = "set bridge-domain arp entry",
+ .short_help = "set bridge-domain arp entry <bd-id> <ip-addr> <mac-addr> [del]",
+ .function = bd_arp_entry,
+};
+
+u8* format_vtr(u8 * s, va_list *args)
+{
+ u32 vtr_op = va_arg (*args, u32);
+ u32 dot1q = va_arg (*args, u32);
+ u32 tag1 = va_arg (*args, u32);
+ u32 tag2 = va_arg (*args, u32);
+ switch (vtr_op) {
+ case L2_VTR_DISABLED:
+ return format (s, "none");
+ case L2_VTR_PUSH_1:
+ return format (s, "push-1 %s %d", dot1q? "dot1q":"dot1ad", tag1);
+ case L2_VTR_PUSH_2:
+ return format (s, "push-2 %s %d %d", dot1q? "dot1q":"dot1ad", tag1, tag2);
+ case L2_VTR_POP_1:
+ return format (s, "pop-1");
+ case L2_VTR_POP_2:
+ return format (s, "pop-2");
+ case L2_VTR_TRANSLATE_1_1:
+ return format (s, "trans-1-1 %s %d", dot1q? "dot1q":"dot1ad", tag1);
+ case L2_VTR_TRANSLATE_1_2:
+ return format (s, "trans-1-2 %s %d %d",dot1q? "dot1q":"dot1ad", tag1, tag2);
+ case L2_VTR_TRANSLATE_2_1:
+ return format (s, "trans-2-1 %s %d", dot1q? "dot1q":"dot1ad", tag1);
+ case L2_VTR_TRANSLATE_2_2:
+ return format (s, "trans-2-2 %s %d %d", dot1q? "dot1q":"dot1ad", tag1, tag2);
+ default:
+ return format (s, "none");
+ }
+}
+
+// show bridge-domain state
+// The CLI format is:
+// show bridge-domain [<bd_index>]
+static clib_error_t *
+bd_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ bd_main_t * bdm = &bd_main;
+ clib_error_t * error = 0;
+ u32 bd_index = ~0;
+ l2_bridge_domain_t * bd_config;
+ u32 start, end;
+ u32 printed;
+ u32 detail = 0;
+ u32 intf = 0;
+ u32 arp = 0;
+ u32 bd_id = ~0;
+ uword * p;
+
+ start = 0;
+ end = vec_len(l2input_main.bd_configs);
+
+ if (unformat (input, "%d", &bd_id)) {
+ if (unformat (input, "detail")) detail = 1;
+ else if (unformat (input, "det")) detail = 1;
+ if (unformat (input, "int")) intf = 1;
+ if (unformat (input, "arp")) arp = 1;
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p) bd_index = *p;
+ else return clib_error_return (0, "No such bridge domain %d", bd_id);
+
+ vec_validate (l2input_main.bd_configs, bd_index);
+ bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index);
+ if (bd_is_valid (bd_config)) {
+ start = bd_index;
+ end = start + 1;
+ } else {
+ vlib_cli_output (vm, "bridge-domain %d not in use", bd_id);
+ goto done;
+ }
+ }
+
+ // Show all bridge-domains that have been initialized
+
+ printed = 0;
+ for (bd_index=start; bd_index<end; bd_index++) {
+ bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index);
+ if (bd_is_valid(bd_config)) {
+ if (!printed) {
+ printed = 1;
+ vlib_cli_output (vm, "%=5s %=7s %=10s %=10s %=10s %=10s %=10s %=14s",
+ "ID",
+ "Index",
+ "Learning",
+ "U-Forwrd",
+ "UU-Flood",
+ "Flooding",
+ "ARP-Term",
+ "BVI-Intf");
+ }
+
+ vlib_cli_output (
+ vm, "%=5d %=7d %=10s %=10s %=10s %=10s %=10s %=14U",
+ bd_config->bd_id, bd_index,
+ bd_config->feature_bitmap & L2INPUT_FEAT_LEARN ? "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_FWD ? "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD ? "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD ? "on" : "off",
+ bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM ? "on" : "off",
+ format_vnet_sw_if_index_name_with_NA, vnm, bd_config->bvi_sw_if_index);
+
+ if (detail || intf) {
+ // Show all member interfaces
+
+ l2_flood_member_t * member;
+ u32 header = 0;
+
+ vec_foreach(member, bd_config->members) {
+ u32 vtr_opr, dot1q, tag1, tag2;
+ if (!header) {
+ header = 1;
+ vlib_cli_output (vm, "\n%=30s%=7s%=5s%=5s%=30s",
+ "Interface", "Index", "SHG", "BVI","VLAN-Tag-Rewrite");
+ }
+ l2vtr_get(vm, vnm, member->sw_if_index, &vtr_opr, &dot1q, &tag1, &tag2);
+ vlib_cli_output (vm, "%=30U%=7d%=5d%=5s%=30U",
+ format_vnet_sw_if_index_name, vnm, member->sw_if_index,
+ member->sw_if_index,
+ member->shg,
+ member->flags & L2_FLOOD_MEMBER_BVI ? "*" : "-",
+ format_vtr, vtr_opr, dot1q, tag1, tag2);
+ }
+ }
+
+ if ((detail || arp) &&
+ (bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM)) {
+ u32 ip4_addr;
+ u64 mac_addr;
+ vlib_cli_output (vm, "\n IP4 to MAC table for ARP Termination");
+ hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4, ({
+ vlib_cli_output (vm, "%=20U => %=20U",
+ format_ip4_address, &ip4_addr,
+ format_ethernet_address, &mac_addr);
+ }));
+ }
+ }
+ }
+
+ if (!printed) {
+ vlib_cli_output (vm, "no bridge-domains in use");
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (bd_show_cli, static) = {
+ .path = "show bridge-domain",
+ .short_help = "show bridge-domain [bridge-domain-id [detail|int|arp]]",
+ .function = bd_show,
+};
diff --git a/vnet/vnet/l2/l2_bd.h b/vnet/vnet/l2/l2_bd.h
new file mode 100644
index 00000000000..9d29a83b22f
--- /dev/null
+++ b/vnet/vnet/l2/l2_bd.h
@@ -0,0 +1,120 @@
+/*
+ * l2_bd.h : layer 2 bridge domain
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2bd_h
+#define included_l2bd_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+typedef struct {
+ // hash bd_id -> bd_index
+ uword * bd_index_by_bd_id;
+
+ // Busy bd_index bitmap
+ uword * bd_index_bitmap;
+
+ // convenience
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} bd_main_t;
+
+bd_main_t bd_main;
+
+// Bridge domain member
+
+#define L2_FLOOD_MEMBER_NORMAL 0
+#define L2_FLOOD_MEMBER_BVI 1
+
+typedef struct {
+ u32 sw_if_index; // the output L2 interface
+ u8 flags; // 0=normal, 1=bvi
+ u8 shg; // split horizon group number
+ u16 spare;
+} l2_flood_member_t;
+
+
+// Per-bridge domain configuration
+
+typedef struct {
+ u32 feature_bitmap;
+ // Contains bit enables for flooding, learning, and forwarding.
+ // All other feature bits should always be set.
+
+ // identity of the bridge-domain's BVI interface
+ // set to ~0 if there is no BVI
+ u32 bvi_sw_if_index;
+
+ // output node index for bvi interface before it was changed to l2-input
+ u32 saved_bvi_output_node_index;
+
+ // bridge domain id, not to be confused with bd_index
+ u32 bd_id;
+
+ // Vector of members in the replication group
+ l2_flood_member_t * members;
+
+ // hash ip4/ip6 -> mac for arp termination
+ uword *mac_by_ip4;
+ uword *mac_by_ip6;
+
+} l2_bridge_domain_t;
+
+// Return 1 if bridge domain has been initialized
+always_inline u32
+bd_is_valid (l2_bridge_domain_t * bd_config)
+{
+ return (bd_config->feature_bitmap != 0);
+}
+
+// Init bridge domain if not done already
+inline void
+bd_validate (l2_bridge_domain_t * bd_config);
+
+
+void
+bd_add_member (l2_bridge_domain_t * bd_config,
+ l2_flood_member_t * member);
+
+u32
+bd_remove_member (l2_bridge_domain_t * bd_config,
+ u32 sw_if_index);
+
+
+#define L2_LEARN (1<<0)
+#define L2_FWD (1<<1)
+#define L2_FLOOD (1<<2)
+#define L2_UU_FLOOD (1<<3)
+#define L2_ARP_TERM (1<<4)
+
+u32
+bd_set_flags (vlib_main_t * vm,
+ u32 bd_index,
+ u32 flags,
+ u32 enable);
+
+u32 bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id);
+int bd_delete_bd_index (bd_main_t * bdm, u32 bd_id);
+
+u32 bd_add_del_ip_mac(u32 bd_index,
+ u8 *ip_addr,
+ u8 *mac_addr,
+ u8 is_ip6,
+ u8 is_add);
+
+#endif
+
diff --git a/vnet/vnet/l2/l2_bvi.c b/vnet/vnet/l2/l2_bvi.c
new file mode 100644
index 00000000000..828e955617b
--- /dev/null
+++ b/vnet/vnet/l2/l2_bvi.c
@@ -0,0 +1,35 @@
+/*
+ * l2_bvi.c : layer 2 Bridged Virtual Interface
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/l2_fwd.h>
+#include <vnet/l2/l2_flood.h>
+#include <vnet/l2/l2_bvi.h>
+
+
+// Call the L2 nodes that need the ethertype mapping
+void
+l2bvi_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type,
+ u32 node_index)
+{
+ l2fwd_register_input_type (vm, type, node_index);
+ l2flood_register_input_type (vm, type, node_index);
+}
+
+
diff --git a/vnet/vnet/l2/l2_bvi.h b/vnet/vnet/l2/l2_bvi.h
new file mode 100644
index 00000000000..ca5673373fb
--- /dev/null
+++ b/vnet/vnet/l2/l2_bvi.h
@@ -0,0 +1,122 @@
+/*
+ * l2_bvi.h : layer 2 Bridged Virtual Interface
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2bvi_h
+#define included_l2bvi_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/sparse_vec.h>
+
+#include <vnet/l2/l2_input.h>
+
+#define TO_BVI_ERR_OK 0
+#define TO_BVI_ERR_TAGGED 1
+#define TO_BVI_ERR_ETHERTYPE 2
+
+// Send a packet from L2 processing to L3 via the BVI interface.
+// Set next0 to the proper L3 input node.
+// Return an error if the packet isn't what we expect.
+
+static_always_inline u32
+l2_to_bvi (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ vlib_buffer_t * b0,
+ u32 bvi_sw_if_index,
+ next_by_ethertype_t * l3_next,
+ u32 * next0)
+{
+ u8 l2_len;
+ u16 ethertype;
+ u8 * l3h;
+
+ // Save L2 header position which may be changed due to packet replication
+ vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data;
+
+ // Strip L2 header
+ l2_len = vnet_buffer(b0)->l2.l2_len;
+ vlib_buffer_advance (b0, l2_len);
+
+ l3h = vlib_buffer_get_current (b0);
+ ethertype = clib_net_to_host_u16(*(u16 *)(l3h - 2));
+
+ // Set the input interface to be the BVI interface
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = ~0;
+
+ // Go to appropriate L3 input node
+ if (ethertype == ETHERNET_TYPE_IP4) {
+ *next0 = l3_next->input_next_ip4;
+ } else if (ethertype == ETHERNET_TYPE_IP6) {
+ *next0 = l3_next->input_next_ip6;
+ } else {
+ // uncommon ethertype, check table
+ u32 i0;
+
+ i0 = sparse_vec_index (l3_next->input_next_by_type, ethertype);
+ *next0 = vec_elt (l3_next->input_next_by_type, i0);
+
+ if (i0 == SPARSE_VEC_INVALID_INDEX) {
+ return TO_BVI_ERR_ETHERTYPE;
+ }
+ }
+
+ // increment BVI RX interface stat
+ vlib_increment_combined_counter
+ (vnet_main->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_main->cpu_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX],
+ 1,
+ vlib_buffer_length_in_chain (vlib_main, b0));
+ return TO_BVI_ERR_OK;
+}
+
+
+// Prepare a packet that was sent to the BVI interface for L2 processing.
+
+static_always_inline void
+bvi_to_l2 (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 cpu_index,
+ vlib_buffer_t * b0,
+ u32 bvi_sw_if_index)
+{
+ // Set the input interface to be the BVI interface
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = ~0;
+
+ // Update l2_len in packet which is expected by l2 path,
+ // including l2 tag push/pop code on output
+ vnet_update_l2_len(b0);
+
+ // increment BVI TX interface stat
+ vlib_increment_combined_counter
+ (vnet_main->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX,
+ cpu_index,
+ bvi_sw_if_index,
+ 1,
+ vlib_buffer_length_in_chain (vlib_main, b0));
+}
+
+
+void
+l2bvi_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type,
+ u32 node_index);
+#endif
diff --git a/vnet/vnet/l2/l2_classify.c b/vnet/vnet/l2/l2_classify.c
new file mode 100644
index 00000000000..a6c8ebbc1b4
--- /dev/null
+++ b/vnet/vnet/l2/l2_classify.c
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * l2_classify.c
+ */
+
+#include <vnet/l2/l2_classify.h>
+#include <vnet/api_errno.h>
+
+typedef struct {
+ /* per-pkt trace data */
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 session_offset;
+} l2_classify_trace_t;
+
+typedef struct {
+ vnet_classify_main_t * vcm;
+ l2_classify_main_t * l2cm;
+} l2_classify_runtime_t;
+
+/* packet trace format function */
+static u8 * format_l2_classify_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_classify_trace_t * t = va_arg (*args, l2_classify_trace_t *);
+
+ s = format (s, "l2-classify: sw_if_index %d, table %d, offset %x, next %d",
+ t->sw_if_index, t->table_index, t->session_offset, t->next_index);
+ return s;
+}
+
+l2_classify_main_t l2_classify_main;
+
+vlib_node_registration_t l2_classify_node;
+
+#define foreach_l2_classify_error \
+_(MISS, "Classify misses") \
+_(HIT, "Classify hits") \
+_(CHAIN_HIT, "Classify hits after chain walk") \
+_(DROP, "L2 Classify Drops")
+
+typedef enum {
+#define _(sym,str) L2_CLASSIFY_ERROR_##sym,
+ foreach_l2_classify_error
+#undef _
+ L2_CLASSIFY_N_ERROR,
+} l2_classify_error_t;
+
+static char * l2_classify_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_classify_error
+#undef _
+};
+
+static uword
+l2_classify_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2_classify_next_t next_index;
+ l2_classify_main_t * cm = &l2_classify_main;
+ vnet_classify_main_t * vcm = cm->vnet_classify_main;
+ l2_classify_runtime_t * rt = (l2_classify_runtime_t *)node->runtime_data;
+ u32 feature_bitmap;
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ f64 now;
+
+ now = vlib_time_now(vm);
+
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ /* First pass: compute hash */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ u32 bi0, bi1;
+ ethernet_header_t * h0, * h1;
+ u32 sw_if_index0, sw_if_index1;
+ u16 type0, type1;
+ int type_index0, type_index1;
+ vnet_classify_table_t * t0, * t1;
+ u32 table_index0, table_index1;
+ u64 hash0, hash1;
+
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t * p1, * p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ vnet_buffer(b0)->l2_classify.table_index = ~0;
+
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+ vnet_buffer(b1)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+ type1 = clib_net_to_host_u16 (h1->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_CLASSIFY_TABLE_IP4 : L2_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_CLASSIFY_TABLE_IP6 : type_index0;
+
+ type_index1 = (type1 == ETHERNET_TYPE_IP4)
+ ? L2_CLASSIFY_TABLE_IP4 : L2_CLASSIFY_TABLE_OTHER;
+ type_index1 = (type1 == ETHERNET_TYPE_IP6)
+ ? L2_CLASSIFY_TABLE_IP6 : type_index1;
+
+ vnet_buffer(b0)->l2_classify.table_index =
+ table_index0 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer(b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+
+ vnet_buffer(b1)->l2_classify.table_index =
+ table_index1 =
+ rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index1][sw_if_index1];
+
+ if (table_index1 != ~0)
+ {
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer(b1)->l2_classify.hash = hash1 =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+ vnet_classify_prefetch_bucket (t1, hash1);
+ }
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t * b0;
+ u32 bi0;
+ ethernet_header_t * h0;
+ u32 sw_if_index0;
+ u16 type0;
+ u32 type_index0;
+ vnet_classify_table_t * t0;
+ u32 table_index0;
+ u64 hash0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ vnet_buffer(b0)->l2_classify.table_index = ~0;
+
+ /* Select classifier table based on ethertype */
+ type0 = clib_net_to_host_u16 (h0->type);
+
+ type_index0 = (type0 == ETHERNET_TYPE_IP4)
+ ? L2_CLASSIFY_TABLE_IP4 : L2_CLASSIFY_TABLE_OTHER;
+ type_index0 = (type0 == ETHERNET_TYPE_IP6)
+ ? L2_CLASSIFY_TABLE_IP6 : type_index0;
+
+ vnet_buffer(b0)->l2_classify.table_index =
+ table_index0 = rt->l2cm->classify_table_index_by_sw_if_index
+ [type_index0][sw_if_index0];
+
+ if (table_index0 != ~0)
+ {
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ vnet_buffer(b0)->l2_classify.hash = hash0 =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+ vnet_classify_prefetch_bucket (t0, hash0);
+ }
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = L2_CLASSIFY_NEXT_ETHERNET_INPUT;
+ ethernet_header_t * h0;
+ u32 table_index0;
+ u64 hash0;
+ vnet_classify_table_t * t0;
+ vnet_classify_entry_t * e0;
+
+ if (PREDICT_TRUE (n_left_from > 2))
+ {
+ vlib_buffer_t * p2 = vlib_get_buffer(vm, from[2]);
+ u64 phash2;
+ u32 table_index2;
+ vnet_classify_table_t * tp2;
+
+ /*
+ * Prefetch table entry two ahead. Buffer / data
+ * were prefetched above...
+ */
+ table_index2 = vnet_buffer(p2)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index2 != ~0))
+ {
+ tp2 = pool_elt_at_index (vcm->tables, table_index2);
+ phash2 = vnet_buffer(p2)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp2, phash2);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current(b0);
+ table_index0 = vnet_buffer(b0)->l2_classify.table_index;
+ e0 = 0;
+
+ if (PREDICT_TRUE(table_index0 != ~0))
+ {
+ hash0 = vnet_buffer(b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0,
+ hash0, now);
+ if (e0)
+ {
+ vnet_buffer(b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < L2_CLASSIFY_N_NEXT)?
+ e0->next_index:next0;
+ hits++;
+ }
+ else
+ {
+ while (1)
+ {
+ if (t0->next_table_index != ~0)
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < L2_CLASSIFY_N_NEXT)?
+ t0->miss_next_index:next0;
+ misses++;
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer(b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < L2_CLASSIFY_N_NEXT)?
+ e0->next_index:next0;
+ hits++;
+ chain_hits++;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE(next0 == 0))
+ b0->error = node->errors[L2_CLASSIFY_ERROR_DROP];
+
+ if (PREDICT_FALSE (next0 == ~0))
+ {
+
+ // Remove ourself from the feature bitmap
+ feature_bitmap = vnet_buffer(b0)->l2.feature_bitmap
+ & ~L2INPUT_FEAT_CLASSIFY;
+
+ // save for next feature graph nodes
+ vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap;
+
+ // Determine the next node
+ next0 = feat_bitmap_get_next_node_index(cm->feat_next_node_index,
+ feature_bitmap);
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_classify_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ t->table_index = table_index0;
+ t->next_index = next0;
+ t->session_offset = e0 ? vnet_classify_get_offset (t0, e0) : 0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_CLASSIFY_ERROR_MISS,
+ misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_CLASSIFY_ERROR_HIT,
+ hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_CLASSIFY_ERROR_CHAIN_HIT,
+ chain_hits);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (l2_classify_node) = {
+ .function = l2_classify_node_fn,
+ .name = "l2-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_classify_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_classify_error_strings),
+ .error_strings = l2_classify_error_strings,
+
+ .runtime_data_bytes = sizeof (l2_classify_runtime_t),
+
+ .n_next_nodes = L2_CLASSIFY_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_CLASSIFY_NEXT_DROP] = "error-drop",
+ [L2_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2",
+ [L2_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input",
+ [L2_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input",
+ [L2_CLASSIFY_NEXT_LI] = "li-hit",
+ },
+};
+
+clib_error_t *l2_classify_init (vlib_main_t *vm)
+{
+ l2_classify_main_t * cm = &l2_classify_main;
+ l2_classify_runtime_t * rt;
+
+ rt = vlib_node_get_runtime_data (vm, l2_classify_node.index);
+
+ cm->vlib_main = vm;
+ cm->vnet_main = vnet_get_main();
+ cm->vnet_classify_main = &vnet_classify_main;
+
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes(vm,
+ l2_classify_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names(),
+ cm->feat_next_node_index);
+ rt->l2cm = cm;
+ rt->vcm = cm->vnet_classify_main;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_classify_init);
+
+
+void vnet_l2_classify_enable_disable (u32 sw_if_index,
+ int enable_disable)
+{
+ vlib_main_t * vm = vlib_get_main();
+ vnet_main_t * vnm = vnet_get_main();
+
+ if (enable_disable)
+ set_int_l2_mode (vm, vnm, MODE_L2_CLASSIFY, sw_if_index,
+ 0, 0, 0, 0);
+ else
+ set_int_l2_mode (vm, vnm, MODE_L3, sw_if_index,
+ 0, 0, 0, 0);
+}
+
+int vnet_l2_classify_set_tables (u32 sw_if_index,
+ u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 other_table_index)
+{
+ l2_classify_main_t * cm = &l2_classify_main;
+ vnet_classify_main_t * vcm = cm->vnet_classify_main;
+
+ /* Assume that we've validated sw_if_index in the API layer */
+
+ if (ip4_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip4_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ if (ip6_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, ip6_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE2;
+
+ if (other_table_index != ~0 &&
+ pool_is_free_index (vcm->tables, other_table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE3;
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP4],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP6],
+ sw_if_index);
+
+ vec_validate
+ (cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_OTHER],
+ sw_if_index);
+
+ cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP4]
+ [sw_if_index] = ip4_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP6]
+ [sw_if_index] = ip6_table_index;
+
+ cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_OTHER]
+ [sw_if_index] = other_table_index;
+
+ return 0;
+}
+
+static clib_error_t *
+int_l2_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ u32 sw_if_index = ~0;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 other_table_index = ~0;
+ int rv;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "ip4-table %d", &ip4_table_index))
+ ;
+ else if (unformat (input, "ip6-table %d", &ip6_table_index))
+ ;
+ else if (unformat (input, "other-table %d", &other_table_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface must be specified");
+
+
+ if (ip4_table_index == ~0 && ip6_table_index == ~0
+ && other_table_index == ~0)
+ {
+ vlib_cli_output (vm, "L2 classification disabled");
+ vnet_l2_classify_enable_disable (sw_if_index, 0 /* enable */);
+ return 0;
+ }
+
+ rv = vnet_l2_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index, other_table_index);
+ switch(rv)
+ {
+ case 0:
+ vnet_l2_classify_enable_disable (sw_if_index, 1 /* enable */);
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_l2_classify_set_tables: %d",
+ rv);
+ break;
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (int_l2_classify_cli, static) = {
+ .path = "set interface l2 classify",
+ .short_help =
+ "set interface l2 classify intfc <int> [ip4-table <n>]\n"
+ " [ip6-table <n>] [other-table <n>]",
+ .function = int_l2_classify_command_fn,
+};
+
+
diff --git a/vnet/vnet/l2/l2_classify.h b/vnet/vnet/l2/l2_classify.h
new file mode 100644
index 00000000000..55c2fc8b00d
--- /dev/null
+++ b/vnet/vnet/l2/l2_classify.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_vnet_l2_classify_h__
+#define __included_vnet_l2_classify_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+#include <vnet/classify/vnet_classify.h>
+
+typedef enum {
+ L2_CLASSIFY_NEXT_DROP,
+ L2_CLASSIFY_NEXT_ETHERNET_INPUT,
+ L2_CLASSIFY_NEXT_IP4_INPUT,
+ L2_CLASSIFY_NEXT_IP6_INPUT,
+ L2_CLASSIFY_NEXT_LI,
+ L2_CLASSIFY_N_NEXT,
+} l2_classify_next_t;
+
+typedef enum {
+ L2_CLASSIFY_TABLE_IP4,
+ L2_CLASSIFY_TABLE_IP6,
+ L2_CLASSIFY_TABLE_OTHER,
+ L2_CLASSIFY_N_TABLES,
+} l2_classify_table_id_t;
+
+typedef struct {
+
+ // Next nodes for each feature
+ u32 feat_next_node_index[32];
+
+ /* Per-address-family classifier table vectors */
+ u32 * classify_table_index_by_sw_if_index [L2_CLASSIFY_N_TABLES];
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+ vnet_classify_main_t * vnet_classify_main;
+} l2_classify_main_t;
+
+l2_classify_main_t l2_classify_main;
+
+vlib_node_registration_t l2_classify_node;
+
+void vnet_l2_classify_enable_disable (u32 sw_if_index,
+ int enable_disable);
+
+int vnet_l2_classify_set_tables (u32 sw_if_index, u32 ip4_table_index,
+ u32 ip6_table_index, u32 other_table_index);
+
+#endif /* __included_vnet_l2_classify_h__ */
diff --git a/vnet/vnet/l2/l2_efp_filter.c b/vnet/vnet/l2/l2_efp_filter.c
new file mode 100644
index 00000000000..a8bceca13fe
--- /dev/null
+++ b/vnet/vnet/l2/l2_efp_filter.c
@@ -0,0 +1,572 @@
+/*
+ * l2_efp_filter.c : layer 2 egress EFP Filter processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/cache.h>
+
+typedef struct {
+
+ // Next nodes for features and output interfaces
+ l2_output_next_nodes_st next_nodes;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2_efp_filter_main_t;
+
+
+typedef struct {
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u8 raw[12]; // raw data (vlans)
+ u32 sw_if_index;
+} l2_efp_filter_trace_t;
+
+/* packet trace format function */
+static u8 * format_l2_efp_filter_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_efp_filter_trace_t * t = va_arg (*args, l2_efp_filter_trace_t *);
+
+ s = format (s, "l2-output-vtr: sw_if_index %d dst %U src %U data "
+ "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4], t->raw[5],
+ t->raw[6], t->raw[7], t->raw[8], t->raw[9], t->raw[10], t->raw[11]);
+ return s;
+}
+
+l2_efp_filter_main_t l2_efp_filter_main;
+
+static vlib_node_registration_t l2_efp_filter_node;
+
+#define foreach_l2_efp_filter_error \
+_(L2_EFP_FILTER, "L2 EFP filter packets") \
+_(DROP, "L2 EFP filter post-rewrite drops")
+
+typedef enum {
+#define _(sym,str) L2_EFP_FILTER_ERROR_##sym,
+ foreach_l2_efp_filter_error
+#undef _
+ L2_EFP_FILTER_N_ERROR,
+} l2_efp_filter_error_t;
+
+static char * l2_efp_filter_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_efp_filter_error
+#undef _
+};
+
+typedef enum {
+ L2_EFP_FILTER_NEXT_DROP,
+ L2_EFP_FILTER_N_NEXT,
+} l2_efp_filter_next_t;
+
+
+// Extract fields from the packet that will be used in interface classification
+static_always_inline void
+extract_keys (vnet_main_t * vnet_main,
+ u32 sw_if_index0,
+ vlib_buffer_t * b0,
+ u32 * port_sw_if_index0,
+ u16 * first_ethertype0,
+ u16 * outer_id0,
+ u16 * inner_id0,
+ u32 * match_flags0)
+{
+ ethernet_header_t * e0;
+ ethernet_vlan_header_t * h0;
+ u32 tag_len;
+ u32 tag_num;
+
+ *port_sw_if_index0 = vnet_get_sup_sw_interface (vnet_main, sw_if_index0)->sw_if_index;
+
+ e0 = vlib_buffer_get_current (b0);
+ h0 = (ethernet_vlan_header_t *)(e0+1);
+
+ *first_ethertype0 = clib_net_to_host_u16(e0->type);
+ *outer_id0 = clib_net_to_host_u16 (h0[0].priority_cfi_and_id);
+ *inner_id0 = clib_net_to_host_u16 (h0[1].priority_cfi_and_id);
+
+ tag_len = vnet_buffer(b0)->l2.l2_len - sizeof(ethernet_header_t);
+ tag_num = tag_len / sizeof(ethernet_vlan_header_t);
+ *match_flags0 = eth_create_valid_subint_match_flags (tag_num);
+}
+
+/*
+ * EFP filtering is a basic switch feature which prevents an interface from
+ * transmitting a packet that doesn't match the interface's ingress match
+ * criteria. The check has two parts, one performed before egress vlan tag
+ * rewrite and one after.
+ *
+ * The pre-rewrite check insures the packet matches what an ingress packet looks
+ * like after going through the interface's ingress tag rewrite operation. Only
+ * pushed tags are compared. So:
+ * - if the ingress vlan tag rewrite pushes no tags (or is not enabled),
+ * any packet passes the filter
+ * - if the ingress vlan tag rewrite pushes one tag,
+ * the packet must have at least one tag, and the outer tag must match the pushed tag
+ * - if the ingress vlan tag rewrite pushes two tags,
+ * the packet must have at least two tags, and the outer two tags must match the pushed tags
+ *
+ * The pre-rewrite check is performed in the l2-output node.
+ *
+ * The post-rewrite check insures the packet matches what an ingress packet looks
+ * like before going through the interface's ingress tag rewrite operation. It verifies
+ * that such a packet arriving on the wire at this port would be classified as arriving
+ * an input interface equal to the packet's output interface. This can be done by running
+ * the output packet's vlan tags and output port through the interface classification,
+ * and checking if the resulting interface matches the output interface.
+ *
+ * The post-rewrite check is performed here.
+ */
+
+static uword
+l2_efp_filter_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2_efp_filter_next_t next_index;
+ l2_efp_filter_main_t * msm = &l2_efp_filter_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_efp_filter_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t * em = &vm->error_main;
+ u32 cached_sw_if_index = ~0;
+ u32 cached_next_index = ~0;
+
+ /* invalidate cache to begin with */
+ cached_sw_if_index = ~0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 feature_bitmap0, feature_bitmap1;
+ u16 first_ethertype0, first_ethertype1;
+ u16 outer_id0, inner_id0, outer_id1, inner_id1;
+ u32 match_flags0, match_flags1;
+ u32 port_sw_if_index0, subint_sw_if_index0, port_sw_if_index1, subint_sw_if_index1;
+ vnet_hw_interface_t * hi0, * hi1;
+ main_intf_t * main_intf0, * main_intf1;
+ vlan_intf_t * vlan_intf0, * vlan_intf1;
+ qinq_intf_t * qinq_intf0, * qinq_intf1;
+ u32 is_l20, is_l21;
+ __attribute__((unused)) u32 matched0, matched1;
+ u8 error0, error1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3, * p4, * p5;
+ __attribute__((unused)) u32 sw_if_index2, sw_if_index3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ // Prefetch the buffer header and packet for the N+2 loop iteration
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+ // Prefetch the input config for the N+1 loop iteration
+ // This depends on the buffer header above
+ sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_TX];
+ sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_TX];
+ //TODO CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index2), CLIB_CACHE_LINE_BYTES, LOAD);
+ //TODO CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index3), CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* TX interface handles */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+
+ // process 2 packets
+ em->counters[node_counter_base_index + L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 2;
+
+ // Remove ourself from the feature bitmap
+ feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER;
+ feature_bitmap1 = vnet_buffer(b1)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER;
+
+ // Determine next node
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2_efp_filter_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0,
+ sw_if_index0,
+ feature_bitmap0,
+ &next0);
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2_efp_filter_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b1,
+ sw_if_index1,
+ feature_bitmap1,
+ &next1);
+
+ // perform the efp filter check on two packets
+
+ extract_keys (msm->vnet_main,
+ sw_if_index0,
+ b0,
+ &port_sw_if_index0,
+ &first_ethertype0,
+ &outer_id0,
+ &inner_id0,
+ &match_flags0);
+
+ extract_keys (msm->vnet_main,
+ sw_if_index1,
+ b1,
+ &port_sw_if_index1,
+ &first_ethertype1,
+ &outer_id1,
+ &inner_id1,
+ &match_flags1);
+
+ eth_vlan_table_lookups (&ethernet_main,
+ msm->vnet_main,
+ port_sw_if_index0,
+ first_ethertype0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0,
+ &vlan_intf0,
+ &qinq_intf0);
+
+ eth_vlan_table_lookups (&ethernet_main,
+ msm->vnet_main,
+ port_sw_if_index1,
+ first_ethertype1,
+ outer_id1,
+ inner_id1,
+ &hi1,
+ &main_intf1,
+ &vlan_intf1,
+ &qinq_intf1);
+
+ matched0 = eth_identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0,
+ &subint_sw_if_index0,
+ &error0,
+ &is_l20);
+
+ matched1 = eth_identify_subint (hi1,
+ b1,
+ match_flags1,
+ main_intf1,
+ vlan_intf1,
+ qinq_intf1,
+ &subint_sw_if_index1,
+ &error1,
+ &is_l21);
+
+ if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0)) {
+ // Drop packet
+ next0 = L2_EFP_FILTER_NEXT_DROP;
+ b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE (sw_if_index1 != subint_sw_if_index1)) {
+ // Drop packet
+ next1 = L2_EFP_FILTER_NEXT_DROP;
+ b1->error = node->errors[L2_EFP_FILTER_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED) {
+ ethernet_header_t * h0 = vlib_buffer_get_current (b0);
+ l2_efp_filter_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ memcpy(t->raw, &h0->type, sizeof(t->raw));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED) {
+ ethernet_header_t * h1 = vlib_buffer_get_current (b1);
+ l2_efp_filter_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ memcpy(t->src, h1->src_address, 6);
+ memcpy(t->dst, h1->dst_address, 6);
+ memcpy(t->raw, &h1->type, sizeof(t->raw));
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ u32 feature_bitmap0;
+ u16 first_ethertype0;
+ u16 outer_id0, inner_id0;
+ u32 match_flags0;
+ u32 port_sw_if_index0, subint_sw_if_index0;
+ vnet_hw_interface_t * hi0;
+ main_intf_t * main_intf0;
+ vlan_intf_t * vlan_intf0;
+ qinq_intf_t * qinq_intf0;
+ u32 is_l20;
+ __attribute__((unused)) u32 matched0;
+ u8 error0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+
+ // process 1 packet
+ em->counters[node_counter_base_index + L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 1;
+
+ // Remove ourself from the feature bitmap
+ feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER;
+
+ // Determine next node
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2_efp_filter_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0,
+ sw_if_index0,
+ feature_bitmap0,
+ &next0);
+
+ // perform the efp filter check on one packet
+
+ extract_keys (msm->vnet_main,
+ sw_if_index0,
+ b0,
+ &port_sw_if_index0,
+ &first_ethertype0,
+ &outer_id0,
+ &inner_id0,
+ &match_flags0);
+
+ eth_vlan_table_lookups (&ethernet_main,
+ msm->vnet_main,
+ port_sw_if_index0,
+ first_ethertype0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0,
+ &vlan_intf0,
+ &qinq_intf0);
+
+ matched0 = eth_identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0,
+ &subint_sw_if_index0,
+ &error0,
+ &is_l20);
+
+ if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0)) {
+ // Drop packet
+ next0 = L2_EFP_FILTER_NEXT_DROP;
+ b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
+ ethernet_header_t * h0 = vlib_buffer_get_current (b0);
+ l2_efp_filter_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ memcpy(t->raw, &h0->type, sizeof(t->raw));
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (l2_efp_filter_node,static) = {
+ .function = l2_efp_filter_node_fn,
+ .name = "l2-efp-filter",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_efp_filter_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_efp_filter_error_strings),
+ .error_strings = l2_efp_filter_error_strings,
+
+ .n_next_nodes = L2_EFP_FILTER_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_EFP_FILTER_NEXT_DROP] = "error-drop",
+ },
+};
+
+clib_error_t *l2_efp_filter_init (vlib_main_t *vm)
+{
+ l2_efp_filter_main_t * mp = &l2_efp_filter_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes(vm,
+ l2_efp_filter_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names(),
+ mp->next_nodes.feat_next_node_index);
+
+ // Initialize the output node mapping table
+ l2output_init_output_node_vec(&mp->next_nodes.output_node_index_vec);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_efp_filter_init);
+
+
+// Enable/disable the EFP Filter check on the subinterface
+void l2_efp_filter_configure (vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 enable)
+{
+ // set the interface flag
+ l2output_intf_bitmap_enable(sw_if_index, L2OUTPUT_FEAT_EFP_FILTER, enable);
+}
+
+
+// set subinterface egress efp filter enable/disable
+// The CLI format is:
+// set interface l2 efp-filter <interface> [disable]]
+static clib_error_t *
+int_l2_efp_filter (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // enable/disable the feature
+ l2_efp_filter_configure (vnm, sw_if_index, enable);
+
+ done:
+ return error;
+}
+
+
+VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = {
+ .path = "set interface l2 efp-filter",
+ .short_help = "set interface l2 efp-filter <interface> [disable]",
+ .function = int_l2_efp_filter,
+};
+
diff --git a/vnet/vnet/l2/l2_efp_filter.h b/vnet/vnet/l2/l2_efp_filter.h
new file mode 100644
index 00000000000..f8baf092fa8
--- /dev/null
+++ b/vnet/vnet/l2/l2_efp_filter.h
@@ -0,0 +1,28 @@
+/*
+ * l2_efp_filter.h : layer 2 egress EFP Filter processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef included_vnet_l2_efp_filter_h
+#define included_vnet_l2_efp_filter_h
+
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+
+
+#endif
+
diff --git a/vnet/vnet/l2/l2_fib.c b/vnet/vnet/l2/l2_fib.c
new file mode 100644
index 00000000000..198ffd281bb
--- /dev/null
+++ b/vnet/vnet/l2/l2_fib.c
@@ -0,0 +1,567 @@
+/*
+ * l2_fib.c : layer 2 forwarding table (aka mac table)
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_learn.h>
+#include <vnet/l2/l2_bd.h>
+
+#include <vppinfra/bihash_template.c>
+
+typedef struct {
+
+ /* hash table */
+ BVT(clib_bihash) mac_table;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2fib_main_t;
+
+l2fib_main_t l2fib_main;
+
+
+// Format sw_if_index. If the value is ~0, use the text "N/A"
+u8 * format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args)
+{
+ vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
+ u32 sw_if_index = va_arg (*args, u32);
+ if (sw_if_index == ~0)
+ return format (s, "N/A");
+ else
+ return format (s, "%U",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index));
+}
+
+void l2fib_table_dump (u32 bd_index, l2fib_entry_key_t **l2fe_key,
+ l2fib_entry_result_t **l2fe_res)
+{
+ l2fib_main_t * msm = &l2fib_main;
+ BVT(clib_bihash) * h = &msm->mac_table;
+ clib_bihash_bucket_t * b;
+ BVT(clib_bihash_value) * v;
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ int i, j, k;
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+ v = BV(clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1<<b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ continue;
+
+ key.raw = v->kvp[k].key;
+ result.raw = v->kvp[k].value;
+
+ if ((bd_index == ~0) || (bd_index == key.fields.bd_index))
+ {
+ vec_add1 (*l2fe_key, key);
+ vec_add1 (*l2fe_res, result);
+ }
+ }
+ v++;
+ }
+ }
+}
+
+// Display the contents of the l2fib
+static clib_error_t *
+show_l2fib (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bd_main_t * bdm = &bd_main;
+ l2fib_main_t * msm = &l2fib_main;
+ BVT(clib_bihash) * h = &msm->mac_table;
+ clib_bihash_bucket_t * b;
+ BVT(clib_bihash_value) * v;
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ u32 first_entry = 1;
+ u64 total_entries = 0;
+ int i, j, k;
+ u8 verbose = 0;
+ u8 raw = 0;
+ u32 bd_id, bd_index = ~0;
+
+ if (unformat (input, "raw"))
+ raw = 1;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else if (unformat (input, "bd_index %d", &bd_index))
+ verbose = 1;
+ else if (unformat (input, "bd_id %d", &bd_id))
+ {
+ uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (p)
+ {
+ verbose = 1;
+ bd_index = p[0];
+ }
+ else
+ {
+ vlib_cli_output (vm, "no such bridge domain id");
+ return 0;
+ }
+ }
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets[i];
+ if (b->offset == 0)
+ continue;
+ v = BV(clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1<<b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ continue;
+
+ if (verbose && first_entry)
+ {
+ first_entry=0;
+ vlib_cli_output (vm,
+ "%=19s%=7s%=30s%=7s%=8s%=8s%=5s%=9s%=11s",
+ "Mac Address", "BD Idx", "Interface",
+ "Index", "static", "filter", "bvi",
+ "refresh", "timestamp");
+ }
+
+ key.raw = v->kvp[k].key;
+ result.raw = v->kvp[k].value;
+
+ if (verbose
+ & ((bd_index >>31) || (bd_index == key.fields.bd_index)))
+ {
+ vlib_cli_output (vm,
+ "%=19U%=7d%=30U%=7d%=8d%=8d%=5d%=9d%=11X",
+ format_ethernet_address, key.fields.mac,
+ key.fields.bd_index,
+ format_vnet_sw_if_index_name_with_NA,
+ msm->vnet_main, result.fields.sw_if_index,
+ result.fields.sw_if_index == ~0
+ ? -1 : result.fields.sw_if_index,
+ result.fields.static_mac,
+ result.fields.filter,
+ result.fields.bvi,
+ result.fields.refresh,
+ result.fields.timestamp);
+ }
+ total_entries++;
+ }
+ v++;
+ }
+ }
+
+ if (total_entries == 0)
+ vlib_cli_output (vm, "no l2fib entries");
+ else
+ vlib_cli_output (vm, "%lld l2fib entries", total_entries);
+
+ if (raw)
+ vlib_cli_output (vm, "Raw Hash Table:\n%U\n",
+ BV(format_bihash), h, 1 /* verbose */);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_l2fib_cli, static) = {
+ .path = "show l2fib",
+ .short_help = "show l2fib [verbose | bd_id <nn> | bd_index <nn> | raw]",
+ .function = show_l2fib,
+};
+
+
+// Remove all entries from the l2fib
+void l2fib_clear_table (uint keep_static)
+{
+ l2fib_main_t * mp = &l2fib_main;
+
+ if (keep_static) {
+ // TODO: remove only non-static entries
+ } else {
+ // Remove all entries
+ BV(clib_bihash_free) (&mp->mac_table);
+ BV(clib_bihash_init) (&mp->mac_table, "l2fib mac table",
+ L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE);
+ }
+
+ l2learn_main.global_learn_count = 0;
+}
+
+// Clear all entries in L2FIB
+// TODO: Later we may want a way to remove only the non-static entries
+static clib_error_t *
+clear_l2fib (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ l2fib_clear_table (0);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (clear_l2fib_cli, static) = {
+ .path = "clear l2fib",
+ .short_help = "Clear l2fib mac forwarding entries",
+ .function = clear_l2fib,
+};
+
+
+// Add an entry to the l2fib.
+// If the entry already exists then overwrite it
+void l2fib_add_entry (u64 mac,
+ u32 bd_index,
+ u32 sw_if_index,
+ u32 static_mac,
+ u32 filter_mac,
+ u32 bvi_mac) {
+ l2fib_entry_key_t key;
+ l2fib_entry_result_t result;
+ __attribute__((unused)) u32 bucket_contents;
+ l2fib_main_t * mp = &l2fib_main;
+ BVT(clib_bihash_kv) kv;
+
+ // set up key
+ key.raw = l2fib_make_key ((u8 *)&mac, bd_index);
+
+ // set up result
+ result.raw = 0; // clear all fields
+ result.fields.sw_if_index = sw_if_index;
+ result.fields.static_mac = static_mac;
+ result.fields.filter = filter_mac;
+ result.fields.bvi = bvi_mac;
+
+ kv.key = key.raw;
+ kv.value = result.raw;
+
+ BV(clib_bihash_add_del) (&mp->mac_table, &kv, 1 /* is_add */);
+
+ // increment counter if dynamically learned mac
+ if (result.fields.static_mac) {
+ l2learn_main.global_learn_count++;
+ }
+}
+
+// Add an entry to the L2FIB
+// The CLI format is:
+// l2fib add <mac> <bd> <intf> [static] [bvi]
+// l2fib add <mac> <bd> filter
+// Note that filter and bvi entries are always static
+static clib_error_t *
+l2fib_add (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bd_main_t * bdm = &bd_main;
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u64 mac;
+ u32 bd_id;
+ u32 bd_index;
+ u32 sw_if_index = ~0;
+ u32 filter_mac = 0;
+ u32 static_mac = 0;
+ u32 bvi_mac = 0;
+ uword * p;
+
+ if (! unformat_user (input, unformat_ethernet_address, &mac))
+ {
+ error = clib_error_return (0, "expected mac address `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%d", &bd_id)) {
+ error = clib_error_return (0, "expected bridge domain ID `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (!p) {
+ error = clib_error_return (0, "bridge domain ID %d invalid", bd_id);
+ goto done;
+ }
+ bd_index = p[0];
+
+ if (unformat (input, "filter")) {
+ filter_mac = 1;
+ static_mac = 1;
+
+ } else {
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ if (unformat (input, "static")) {
+ static_mac = 1;
+ } else if (unformat (input, "bvi")) {
+ bvi_mac = 1;
+ static_mac = 1;
+ }
+ }
+
+ l2fib_add_entry(mac, bd_index, sw_if_index, static_mac, filter_mac, bvi_mac);
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (l2fib_add_cli, static) = {
+ .path = "l2fib add",
+ .short_help = "Add l2fib mac forwarding entry <mac> <bd-id> filter | <intf> [static | bvi]",
+ .function = l2fib_add,
+};
+
+
+static clib_error_t *
+l2fib_test_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t * error = 0;
+ u64 mac, save_mac;
+ u32 bd_index = 0;
+ u32 sw_if_index = 8;
+ u32 filter_mac = 0;
+ u32 bvi_mac = 0;
+ u32 is_add = 0;
+ u32 is_del = 0;
+ u32 is_check = 0;
+ u32 count = 1;
+ int mac_set = 0;
+ int i;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "mac %U", unformat_ethernet_address, &mac))
+ mac_set = 1;
+ else if (unformat (input, "add"))
+ is_add = 1;
+ else if (unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "check"))
+ is_check = 1;
+ else if (unformat (input, "count %d", &count))
+ ;
+ else
+ break;
+ }
+
+ if (mac_set == 0)
+ return clib_error_return (0, "mac not set");
+
+ if (is_add == 0 && is_del == 0 && is_check == 0)
+ return clib_error_return (0, "noop: pick at least one of (add,del,check)");
+
+ save_mac = mac;
+
+ if (is_add)
+ {
+ for (i = 0; i < count; i++)
+ {
+ u64 tmp;
+ l2fib_add_entry(mac, bd_index, sw_if_index, mac,
+ filter_mac, bvi_mac);
+ tmp = clib_net_to_host_u64(mac);
+ tmp >>= 16;
+ tmp++;
+ tmp <<= 16;
+ mac = clib_host_to_net_u64 (tmp);
+ }
+ }
+
+ if (is_check)
+ {
+ BVT(clib_bihash_kv) kv;
+ l2fib_main_t * mp = &l2fib_main;
+
+ mac = save_mac;
+
+ for (i = 0; i < count; i++)
+ {
+ u64 tmp;
+ kv.key = l2fib_make_key ((u8 *)&mac, bd_index);
+ if (BV(clib_bihash_search) (&mp->mac_table, &kv, &kv))
+ {
+ clib_warning ("key %U AWOL", format_ethernet_address, &mac);
+ break;
+ }
+ tmp = clib_net_to_host_u64(mac);
+ tmp >>= 16;
+ tmp++;
+ tmp <<= 16;
+ mac = clib_host_to_net_u64 (tmp);
+ }
+ }
+
+ if (is_del)
+ {
+ for (i = 0; i < count; i++)
+ {
+ u64 tmp;
+
+ l2fib_del_entry (mac, bd_index);
+
+ tmp = clib_net_to_host_u64(mac);
+ tmp >>= 16;
+ tmp++;
+ tmp <<= 16;
+ mac = clib_host_to_net_u64 (tmp);
+ }
+ }
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (l2fib_test_command, static) = {
+ .path = "test l2fib",
+ .short_help = "test l2fib [del] mac <base-addr> count <nn>",
+ .function = l2fib_test_command_fn,
+};
+
+
+// Delete an entry from the l2fib.
+// Return 0 if the entry was deleted, or 1 if it was not found
+u32 l2fib_del_entry (u64 mac,
+ u32 bd_index) {
+
+ l2fib_entry_result_t result;
+ l2fib_main_t * mp = &l2fib_main;
+ BVT(clib_bihash_kv) kv;
+
+ // set up key
+ kv.key = l2fib_make_key ((u8 *)&mac, bd_index);
+
+ if (BV(clib_bihash_search) (&mp->mac_table, &kv, &kv))
+ return 1;
+
+ result.raw = kv.value;
+
+ // decrement counter if dynamically learned mac
+ if (result.fields.static_mac) {
+ if (l2learn_main.global_learn_count > 0) {
+ l2learn_main.global_learn_count--;
+ }
+ }
+
+ // Remove entry from hash table
+ BV(clib_bihash_add_del) (&mp->mac_table, &kv, 0 /* is_add */);
+ return 0;
+}
+
+// Delete an entry from the L2FIB
+// The CLI format is:
+// l2fib del <mac> <bd-id>
+static clib_error_t *
+l2fib_del (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bd_main_t * bdm = &bd_main;
+ clib_error_t * error = 0;
+ u64 mac;
+ u32 bd_id;
+ u32 bd_index;
+ uword * p;
+
+ if (! unformat_user (input, unformat_ethernet_address, &mac))
+ {
+ error = clib_error_return (0, "expected mac address `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%d", &bd_id)) {
+ error = clib_error_return (0, "expected bridge domain ID `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ p = hash_get (bdm->bd_index_by_bd_id, bd_id);
+ if (!p) {
+ error = clib_error_return (0, "bridge domain ID %d invalid", bd_id);
+ goto done;
+ }
+ bd_index = p[0];
+
+ // Delete the entry
+ if (l2fib_del_entry(mac, bd_index)) {
+ error = clib_error_return (0, "mac entry not found");
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (l2fib_del_cli, static) = {
+ .path = "l2fib del",
+ .short_help = "Delete l2fib mac forwarding entry <mac> <bd-id>",
+ .function = l2fib_del,
+};
+
+
+BVT(clib_bihash) *get_mac_table(void) {
+ l2fib_main_t * mp = &l2fib_main;
+ return &mp->mac_table;
+}
+
+clib_error_t *l2fib_init (vlib_main_t *vm)
+{
+ l2fib_main_t * mp = &l2fib_main;
+ l2fib_entry_key_t test_key;
+ u8 test_mac[6];
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ // Create the hash table
+ BV(clib_bihash_init) (&mp->mac_table, "l2fib mac table",
+ L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE);
+
+ // verify the key constructor is good, since it is endian-sensitive
+ test_mac[0] = 0x11;
+ test_key.raw = 0;
+ test_key.raw = l2fib_make_key ((u8 *)&test_mac, 0x1234);
+ ASSERT (test_key.fields.mac[0] == 0x11);
+ ASSERT (test_key.fields.bd_index == 0x1234);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2fib_init);
+
diff --git a/vnet/vnet/l2/l2_fib.h b/vnet/vnet/l2/l2_fib.h
new file mode 100644
index 00000000000..1dcc0200f60
--- /dev/null
+++ b/vnet/vnet/l2/l2_fib.h
@@ -0,0 +1,226 @@
+/*
+ * l2_fib.h : layer 2 forwarding table (aka mac table)
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2fib_h
+#define included_l2fib_h
+
+#include <vlib/vlib.h>
+#include <vppinfra/bihash_8_8.h>
+
+/*
+ * The size of the hash table
+ */
+#define L2FIB_NUM_BUCKETS (64 * 1024)
+#define L2FIB_MEMORY_SIZE (256<<20)
+
+/*
+ * The L2fib key is the mac address and bridge domain ID
+ */
+typedef struct {
+ union {
+ struct {
+ u16 bd_index;
+ u8 mac[6];
+ } fields;
+ struct {
+ u32 w0;
+ u32 w1;
+ } words;
+ u64 raw;
+ };
+} l2fib_entry_key_t;
+
+/*
+ * The l2fib entry results
+ */
+typedef struct {
+ union {
+ struct {
+ u32 sw_if_index; // output sw_if_index (L3 interface if bvi==1)
+
+ u8 static_mac:1; // static mac, no dataplane learning
+ u8 bvi:1; // mac is for a bridged virtual interface
+ u8 filter:1; // drop packets to/from this mac
+ u8 refresh:1; // refresh flag for aging
+ u8 unused1:4;
+ u8 timestamp; // timestamp for aging
+ u16 unused2;
+ } fields;
+ u64 raw;
+ };
+} l2fib_entry_result_t;
+
+
+// Compute the hash for the given key and return the corresponding bucket index
+always_inline
+u32 l2fib_compute_hash_bucket (l2fib_entry_key_t *key) {
+ u32 result;
+ u32 temp_a;
+ u32 temp_b;
+
+ result = 0xa5a5a5a5; // some seed
+ temp_a = key->words.w0;
+ temp_b = key->words.w1;
+ hash_mix32(temp_a, temp_b, result);
+
+ return result % L2FIB_NUM_BUCKETS;
+}
+
+always_inline
+u64 l2fib_make_key (u8 * mac_address, u16 bd_index) {
+ u64 temp;
+
+ // The mac address in memory is A:B:C:D:E:F
+ // The bd id in register is H:L
+#if CLIB_ARCH_IS_LITTLE_ENDIAN
+ // Create the in-register key as F:E:D:C:B:A:H:L
+ // In memory the key is L:H:A:B:C:D:E:F
+ temp = *((u64 *)(mac_address - 2));
+ temp = (temp & ~0xffff) | (u64)(bd_index);
+#else
+ // Create the in-register key as H:L:A:B:C:D:E:F
+ // In memory the key is H:L:A:B:C:D:E:F
+ temp = *((u64 *)(mac_address)) >> 16;
+ temp = temp | (bd_index << 48);
+#endif
+
+ return temp;
+}
+
+
+
+// Lookup the entry for mac and bd_index in the mac table for 1 packet.
+// Cached_key and cached_result are used as a one-entry cache.
+// The function reads and updates them as needed.
+//
+// mac0 and bd_index0 are the keys. The entry is written to result0.
+// If the entry was not found, result0 is set to ~0.
+//
+// key0 and bucket0 return with the computed key and hash bucket,
+// convenient if the entry needs to be updated afterward.
+// If the cached_result was used, bucket0 is set to ~0.
+
+static_always_inline void
+l2fib_lookup_1 (BVT(clib_bihash) * mac_table,
+ l2fib_entry_key_t * cached_key,
+ l2fib_entry_result_t * cached_result,
+ u8 * mac0,
+ u16 bd_index0,
+ l2fib_entry_key_t * key0,
+ u32 * bucket0,
+ l2fib_entry_result_t *result0)
+{
+ // set up key
+ key0->raw = l2fib_make_key (mac0, bd_index0);
+ *bucket0 = ~0;
+
+ if (key0->raw == cached_key->raw) {
+ // Hit in the one-entry cache
+ result0->raw = cached_result->raw;
+ } else {
+ // Do a regular mac table lookup
+ BVT(clib_bihash_kv) kv;
+
+ kv.key = key0->raw;
+ kv.value = ~0ULL;
+ BV(clib_bihash_search_inline) (mac_table, &kv);
+ result0->raw = kv.value;
+
+ // Update one-entry cache
+ cached_key->raw = key0->raw;
+ cached_result->raw = result0->raw;
+ }
+}
+
+
+// Lookup the entry for mac and bd_index in the mac table for 2 packets.
+// The lookups for the two packets are interleaved.
+//
+// Cached_key and cached_result are used as a one-entry cache.
+// The function reads and updates them as needed.
+//
+// mac0 and bd_index0 are the keys. The entry is written to result0.
+// If the entry was not found, result0 is set to ~0. The same
+// holds for mac1/bd_index1/result1.
+
+static_always_inline void
+l2fib_lookup_2 (BVT(clib_bihash) * mac_table,
+ l2fib_entry_key_t * cached_key,
+ l2fib_entry_result_t * cached_result,
+ u8 * mac0,
+ u8 * mac1,
+ u16 bd_index0,
+ u16 bd_index1,
+ l2fib_entry_key_t * key0,
+ l2fib_entry_key_t * key1,
+ u32 * bucket0,
+ u32 * bucket1,
+ l2fib_entry_result_t *result0,
+ l2fib_entry_result_t *result1)
+{
+ // set up key
+ key0->raw = l2fib_make_key (mac0, bd_index0);
+ key1->raw = l2fib_make_key (mac1, bd_index1);
+
+ if ((key0->raw == cached_key->raw) &&
+ (key1->raw == cached_key->raw)) {
+ // Both hit in the one-entry cache
+ result0->raw = cached_result->raw;
+ result1->raw = cached_result->raw;
+ *bucket0 = ~0;
+ *bucket1 = ~0;
+
+ } else {
+ BVT(clib_bihash_kv) kv0, kv1;
+
+ // Do a regular mac table lookup
+ // Interleave lookups for packet 0 and packet 1
+ kv0.key = key0->raw;
+ kv1.key = key1->raw;
+ kv0.value = ~0ULL;
+ kv1.value = ~0ULL;
+
+ BV(clib_bihash_search_inline) (mac_table, &kv0);
+ BV(clib_bihash_search_inline) (mac_table, &kv1);
+
+ result0->raw = kv0.value;
+ result1->raw = kv1.value;
+
+ // Update one-entry cache
+ cached_key->raw = key1->raw;
+ cached_result->raw = result1->raw;
+ }
+}
+
+
+BVT(clib_bihash) *get_mac_table(void);
+void l2fib_clear_table (uint keep_static);
+void l2fib_add_entry (u64 mac,
+ u32 bd_index,
+ u32 sw_if_index,
+ u32 static_mac,
+ u32 drop_mac,
+ u32 bvi_mac);
+u32 l2fib_del_entry (u64 mac,
+ u32 bd_index);
+
+void l2fib_table_dump (u32 bd_index, l2fib_entry_key_t **l2fe_key,
+ l2fib_entry_result_t **l2fe_res);
+
+u8 * format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args);
+
+#endif
diff --git a/vnet/vnet/l2/l2_flood.c b/vnet/vnet/l2/l2_flood.c
new file mode 100644
index 00000000000..8a702168715
--- /dev/null
+++ b/vnet/vnet/l2/l2_flood.c
@@ -0,0 +1,520 @@
+/*
+ * l2_flood.c : layer 2 flooding
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_bvi.h>
+#include <vnet/replication.h>
+#include <vnet/l2/l2_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+
+
+/*
+ * Flooding uses the packet replication infrastructure to send a copy of the
+ * packet to each member interface. Logically the replication infrastructure
+ * expects two graph nodes: a prep node that initiates replication and sends the
+ * packet to the first destination, and a recycle node that is passed the packet
+ * after it has been transmitted.
+ *
+ * To decrease the amount of code, l2 flooding implements both functions in
+ * the same graph node. This node can tell if is it being called as the "prep"
+ * or "recycle" using replication_is_recycled().
+ */
+
+
+typedef struct {
+
+ // Next nodes for each feature
+ u32 feat_next_node_index[32];
+
+ // next node index for the L3 input node of each ethertype
+ next_by_ethertype_t l3_next;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2flood_main_t;
+
+typedef struct {
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u16 bd_index;
+} l2flood_trace_t;
+
+
+/* packet trace format function */
+static u8 * format_l2flood_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2flood_trace_t * t = va_arg (*args, l2flood_trace_t *);
+
+ s = format (s, "l2-flood: sw_if_index %d dst %U src %U bd_index %d",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->bd_index);
+ return s;
+}
+
+l2flood_main_t l2flood_main;
+
+static vlib_node_registration_t l2flood_node;
+
+#define foreach_l2flood_error \
+_(L2FLOOD, "L2 flood packets") \
+_(REPL_FAIL, "L2 replication failures") \
+_(NO_MEMBERS, "L2 replication complete") \
+_(BVI_TAGGED, "BVI packet with vlan tag") \
+_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype")
+
+typedef enum {
+#define _(sym,str) L2FLOOD_ERROR_##sym,
+ foreach_l2flood_error
+#undef _
+ L2FLOOD_N_ERROR,
+} l2flood_error_t;
+
+static char * l2flood_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2flood_error
+#undef _
+};
+
+typedef enum {
+ L2FLOOD_NEXT_L2_OUTPUT,
+ L2FLOOD_NEXT_DROP,
+ L2FLOOD_N_NEXT,
+} l2flood_next_t;
+
+/*
+ * Perform flooding on one packet
+ *
+ * Due to the way BVI processing can modify the packet, the BVI interface
+ * (if present) must be processed last in the replication. The member vector
+ * is arranged so that the BVI interface is always the first element.
+ * Flooding walks the vector in reverse.
+ *
+ * BVI processing causes the packet to go to L3 processing. This strips the
+ * L2 header, which is fine because the replication infrastructure restores
+ * it. However L3 processing can trigger larger changes to the packet. For
+ * example, an ARP request could be turned into an ARP reply, an ICMP request
+ * could be turned into an ICMP reply. If BVI processing is not performed
+ * last, the modified packet would be replicated to the remaining members.
+ */
+
+static_always_inline void
+l2flood_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ l2flood_main_t * msm,
+ u64 * counter_base,
+ vlib_buffer_t * b0,
+ u32 sw_if_index0,
+ l2fib_entry_key_t * key0,
+ u32 * bucket0,
+ l2fib_entry_result_t * result0,
+ u32 * next0)
+{
+ u16 bd_index0;
+ l2_bridge_domain_t *bd_config;
+ l2_flood_member_t * members;
+ i32 current_member; // signed
+ replication_context_t * ctx;
+ u8 in_shg = vnet_buffer(b0)->l2.shg;
+
+ if (!replication_is_recycled(b0)) {
+
+ // Do flood "prep node" processing
+
+ // Get config for the bridge domain interface
+ bd_index0 = vnet_buffer(b0)->l2.bd_index;
+ bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index0);
+ members = bd_config->members;
+
+ // Find first member that passes the reflection and SHG checks
+ current_member = vec_len(members) - 1;
+ while ((current_member >= 0) &&
+ ((members[current_member].sw_if_index == sw_if_index0) ||
+ (in_shg && members[current_member].shg == in_shg))) {
+ current_member--;
+ }
+
+ if (current_member < 0) {
+ // No members to flood to
+ *next0 = L2FLOOD_NEXT_DROP;
+ b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS];
+ return;
+ }
+
+ if ((current_member > 0) &&
+ ((current_member > 1) ||
+ ((members[0].sw_if_index != sw_if_index0) &&
+ (!in_shg || members[0].shg != in_shg)))) {
+ // If more than one member then initiate replication
+ ctx = replication_prep (vm, b0, l2flood_node.index, 1 /* l2_packet */);
+ ctx->feature_replicas = (u64) members;
+ ctx->feature_counter = current_member;
+ }
+
+ } else {
+
+ // Do flood "recycle node" processing
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL))
+ {
+ (void)replication_recycle (vm, b0, 1 /* is_last */);
+ *next0 = L2FLOOD_NEXT_DROP;
+ b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL];
+ return;
+ }
+
+ ctx = replication_get_ctx (b0);
+ replication_clear_recycled (b0);
+
+ members = (l2_flood_member_t *) ctx->feature_replicas;
+ current_member = (i32)ctx->feature_counter - 1;
+
+ // Find next member that passes the reflection and SHG check
+ while ((current_member >= 0) &&
+ ((members[current_member].sw_if_index == sw_if_index0) ||
+ (in_shg && members[current_member].shg == in_shg))) {
+ current_member--;
+ }
+
+ if (current_member < 0) {
+ // No more members to flood to.
+ // Terminate replication and drop packet.
+
+ replication_recycle (vm, b0, 1 /* is_last */);
+
+ *next0 = L2FLOOD_NEXT_DROP;
+ // Ideally we woudn't bump a counter here, just silently complete
+ b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS];
+ return;
+ }
+
+ // Restore packet and context and continue replication
+ ctx->feature_counter = current_member;
+ replication_recycle (vm, b0,
+ ((current_member == 0) || /*is_last */
+ ((current_member == 1) &&
+ ((members[0].sw_if_index == sw_if_index0) ||
+ (in_shg && members[0].shg == in_shg)))));
+ }
+
+ // Forward packet to the current member
+
+ if (PREDICT_TRUE(members[current_member].flags == L2_FLOOD_MEMBER_NORMAL)) {
+ // Do normal L2 forwarding
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = members[current_member].sw_if_index;
+ *next0 = L2FLOOD_NEXT_L2_OUTPUT;
+
+ } else {
+ // Do BVI processing
+ u32 rc;
+ rc = l2_to_bvi (vm,
+ msm->vnet_main,
+ b0,
+ members[current_member].sw_if_index,
+ &msm->l3_next,
+ next0);
+
+ if (PREDICT_FALSE(rc)) {
+ if (rc == TO_BVI_ERR_TAGGED) {
+ b0->error = node->errors[L2FLOOD_ERROR_BVI_TAGGED];
+ *next0 = L2FLOOD_NEXT_DROP;
+ } else if (rc == TO_BVI_ERR_ETHERTYPE) {
+ b0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE];
+ *next0 = L2FLOOD_NEXT_DROP;
+ }
+ }
+ }
+
+}
+
+
+static uword
+l2flood_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2flood_next_t next_index;
+ l2flood_main_t * msm = &l2flood_main;
+ vlib_node_t *n = vlib_get_node (vm, l2flood_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t * em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t * h0, * h1;
+ l2fib_entry_key_t key0, key1;
+ l2fib_entry_result_t result0, result1;
+ u32 bucket0, bucket1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3, * p4, * p5;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ // Prefetch the buffer header for the N+2 loop iteration
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ // Prefetch the replication context for the N+1 loop iteration
+ // This depends on the buffer header above
+ replication_prefetch_ctx (p2);
+ replication_prefetch_ctx (p3);
+
+ // Prefetch the packet for the N+1 loop iteration
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ /* Process 2 x pkts */
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ /* process 2 pkts */
+ em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 2;
+
+ l2flood_process (vm, node, msm, &em->counters[node_counter_base_index],
+ b0, sw_if_index0, &key0, &bucket0, &result0, &next0);
+
+ l2flood_process (vm, node, msm, &em->counters[node_counter_base_index],
+ b1, sw_if_index1, &key1, &bucket1, &result1, &next1);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer(b0)->l2.bd_index;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ l2flood_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->bd_index = vnet_buffer(b1)->l2.bd_index;
+ memcpy(t->src, h1->src_address, 6);
+ memcpy(t->dst, h1->dst_address, 6);
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t * h0;
+ l2fib_entry_key_t key0;
+ l2fib_entry_result_t result0;
+ u32 bucket0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+
+ /* process 1 pkt */
+ em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 1;
+
+ l2flood_process (vm, node, msm, &em->counters[node_counter_base_index],
+ b0, sw_if_index0, &key0, &bucket0, &result0, &next0);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer(b0)->l2.bd_index;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (l2flood_node,static) = {
+ .function = l2flood_node_fn,
+ .name = "l2-flood",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2flood_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2flood_error_strings),
+ .error_strings = l2flood_error_strings,
+
+ .n_next_nodes = L2FLOOD_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2FLOOD_NEXT_L2_OUTPUT] = "l2-output",
+ [L2FLOOD_NEXT_DROP] = "error-drop",
+ },
+};
+
+clib_error_t *l2flood_init (vlib_main_t *vm)
+{
+ l2flood_main_t * mp = &l2flood_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes(vm,
+ l2flood_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names(),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2flood_init);
+
+
+
+// Add the L3 input node for this ethertype to the next nodes structure
+void
+l2flood_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type,
+ u32 node_index)
+{
+ l2flood_main_t * mp = &l2flood_main;
+ u32 next_index;
+
+ next_index = vlib_node_add_next (vm,
+ l2flood_node.index,
+ node_index);
+
+ next_by_ethertype_register (&mp->l3_next, type, next_index);
+}
+
+
+// set subinterface flood enable/disable
+// The CLI format is:
+// set interface l2 flood <interface> [disable]
+static clib_error_t *
+int_flood (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // set the interface flag
+ l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_FLOOD, enable);
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (int_flood_cli, static) = {
+ .path = "set interface l2 flood",
+ .short_help = "set interface l2 flood <interface> [disable]",
+ .function = int_flood,
+};
diff --git a/vnet/vnet/l2/l2_flood.h b/vnet/vnet/l2/l2_flood.h
new file mode 100644
index 00000000000..3c9273d48d5
--- /dev/null
+++ b/vnet/vnet/l2/l2_flood.h
@@ -0,0 +1,28 @@
+/*
+ * l2_flood.h : layer 2 flooding
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2flood_h
+#define included_l2flood_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+void
+l2flood_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type,
+ u32 node_index);
+#endif
diff --git a/vnet/vnet/l2/l2_fwd.c b/vnet/vnet/l2/l2_fwd.c
new file mode 100644
index 00000000000..089d4008ea8
--- /dev/null
+++ b/vnet/vnet/l2/l2_fwd.c
@@ -0,0 +1,446 @@
+/*
+ * l2_fwd.c : layer 2 forwarding using l2fib
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_bvi.h>
+#include <vnet/l2/l2_fwd.h>
+#include <vnet/l2/l2_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/sparse_vec.h>
+
+
+typedef struct {
+
+ // Hash table
+ BVT(clib_bihash) *mac_table;
+
+ // next node index for the L3 input node of each ethertype
+ next_by_ethertype_t l3_next;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2fwd_main_t;
+
+typedef struct {
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u16 bd_index;
+} l2fwd_trace_t;
+
+/* packet trace format function */
+static u8 * format_l2fwd_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2fwd_trace_t * t = va_arg (*args, l2fwd_trace_t *);
+
+ s = format (s, "l2-fwd: sw_if_index %d dst %U src %U bd_index %d",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->bd_index);
+ return s;
+}
+
+l2fwd_main_t l2fwd_main;
+
+static vlib_node_registration_t l2fwd_node;
+
+#define foreach_l2fwd_error \
+_(L2FWD, "L2 forward packets") \
+_(FLOOD, "L2 forward misses") \
+_(HIT, "L2 forward hits") \
+_(BVI_TAGGED, "BVI packet with vlan tag") \
+_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") \
+_(FILTER_DROP, "Filter Mac Drop") \
+_(REFLECT_DROP, "Reflection Drop")
+
+typedef enum {
+#define _(sym,str) L2FWD_ERROR_##sym,
+ foreach_l2fwd_error
+#undef _
+ L2FWD_N_ERROR,
+} l2fwd_error_t;
+
+static char * l2fwd_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2fwd_error
+#undef _
+};
+
+typedef enum {
+ L2FWD_NEXT_L2_OUTPUT,
+ L2FWD_NEXT_FLOOD,
+ L2FWD_NEXT_DROP,
+ L2FWD_N_NEXT,
+} l2fwd_next_t;
+
+// Forward one packet based on the mac table lookup result
+
+static_always_inline void
+l2fwd_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ l2fwd_main_t * msm,
+ vlib_error_main_t * em,
+ vlib_buffer_t * b0,
+ u32 sw_if_index0,
+ l2fib_entry_result_t * result0,
+ u32 * next0)
+{
+ if (PREDICT_FALSE (result0->raw == ~0)) {
+ // lookup miss, so flood
+ // TODO:replicate packet to each intf in bridge-domain
+ // For now just drop
+ if (vnet_buffer(b0)->l2.feature_bitmap & L2INPUT_FEAT_UU_FLOOD) {
+ *next0 = L2FWD_NEXT_FLOOD;
+ } else {
+ // Flooding is disabled
+ b0->error = node->errors[L2FWD_ERROR_FLOOD];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+
+ } else {
+
+ // lookup hit, forward packet
+#ifdef COUNTERS
+ em->counters[node_counter_base_index + L2FWD_ERROR_HIT] += 1;
+#endif
+
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = result0->fields.sw_if_index;
+ *next0 = L2FWD_NEXT_L2_OUTPUT;
+
+ // perform reflection check
+ if (PREDICT_FALSE (sw_if_index0 == result0->fields.sw_if_index)) {
+ b0->error = node->errors[L2FWD_ERROR_REFLECT_DROP];
+ *next0 = L2FWD_NEXT_DROP;
+
+ // perform filter check
+ } else if (PREDICT_FALSE (result0->fields.filter)) {
+ b0->error = node->errors[L2FWD_ERROR_FILTER_DROP];
+ *next0 = L2FWD_NEXT_DROP;
+
+ // perform BVI check
+ } else if (PREDICT_FALSE (result0->fields.bvi)) {
+ u32 rc;
+ rc = l2_to_bvi (vm,
+ msm->vnet_main,
+ b0,
+ vnet_buffer(b0)->sw_if_index[VLIB_TX],
+ &msm->l3_next,
+ next0);
+
+ if (PREDICT_FALSE(rc)) {
+ if (rc == TO_BVI_ERR_TAGGED) {
+ b0->error = node->errors[L2FWD_ERROR_BVI_TAGGED];
+ *next0 = L2FWD_NEXT_DROP;
+ } else if (rc == TO_BVI_ERR_ETHERTYPE) {
+ b0->error = node->errors[L2FWD_ERROR_BVI_ETHERTYPE];
+ *next0 = L2FWD_NEXT_DROP;
+ }
+ }
+ }
+ }
+}
+
+
+static uword
+l2fwd_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2fwd_next_t next_index;
+ l2fwd_main_t * msm = &l2fwd_main;
+ vlib_node_t *n = vlib_get_node (vm, l2fwd_node.index);
+ CLIB_UNUSED(u32 node_counter_base_index) = n->error_heap_index;
+ vlib_error_main_t * em = &vm->error_main;
+ l2fib_entry_key_t cached_key;
+ l2fib_entry_result_t cached_result;
+
+ // Clear the one-entry cache in case mac table was updated
+ cached_key.raw = ~0;
+ cached_result.raw = ~0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t * h0, * h1;
+ l2fib_entry_key_t key0, key1;
+ l2fib_entry_result_t result0, result1;
+ u32 bucket0, bucket1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer(b0)->l2.bd_index;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->bd_index = vnet_buffer(b1)->l2.bd_index;
+ memcpy(t->src, h1->src_address, 6);
+ memcpy(t->dst, h1->dst_address, 6);
+ }
+ }
+
+ /* process 2 pkts */
+#ifdef COUNTERS
+ em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 2;
+#endif
+ l2fib_lookup_2 (msm->mac_table, &cached_key, &cached_result,
+ h0->dst_address,
+ h1->dst_address,
+ vnet_buffer(b0)->l2.bd_index,
+ vnet_buffer(b1)->l2.bd_index,
+ &key0, // not used
+ &key1, // not used
+ &bucket0, // not used
+ &bucket1, // not used
+ &result0,
+ &result1);
+ l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0, &next0);
+ l2fwd_process (vm, node, msm, em, b1, sw_if_index1, &result1, &next1);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t * h0;
+ l2fib_entry_key_t key0;
+ l2fib_entry_result_t result0;
+ u32 bucket0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
+ l2fwd_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer(b0)->l2.bd_index;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+
+ /* process 1 pkt */
+#ifdef COUNTERS
+ em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 1;
+#endif
+ l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result,
+ h0->dst_address, vnet_buffer(b0)->l2.bd_index,
+ &key0, // not used
+ &bucket0, // not used
+ &result0);
+ l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0, &next0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (l2fwd_node,static) = {
+ .function = l2fwd_node_fn,
+ .name = "l2-fwd",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2fwd_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2fwd_error_strings),
+ .error_strings = l2fwd_error_strings,
+
+ .n_next_nodes = L2FWD_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2FWD_NEXT_L2_OUTPUT] = "l2-output",
+ [L2FWD_NEXT_FLOOD] = "l2-flood",
+ [L2FWD_NEXT_DROP] = "error-drop",
+ },
+};
+
+clib_error_t *l2fwd_init (vlib_main_t *vm)
+{
+ l2fwd_main_t * mp = &l2fwd_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ /* init the hash table ptr */
+ mp->mac_table = get_mac_table();
+
+ // Initialize the next nodes for each ethertype
+ next_by_ethertype_init (&mp->l3_next);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2fwd_init);
+
+
+// Add the L3 input node for this ethertype to the next nodes structure
+void
+l2fwd_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type,
+ u32 node_index)
+{
+ l2fwd_main_t * mp = &l2fwd_main;
+ u32 next_index;
+
+ next_index = vlib_node_add_next (vm,
+ l2fwd_node.index,
+ node_index);
+
+ next_by_ethertype_register (&mp->l3_next, type, next_index);
+}
+
+
+// set subinterface forward enable/disable
+// The CLI format is:
+// set interface l2 forward <interface> [disable]
+static clib_error_t *
+int_fwd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // set the interface flag
+ if (l2input_intf_config(sw_if_index)->xconnect) {
+ l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_XCONNECT, enable);
+ } else {
+ l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_FWD, enable);
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (int_fwd_cli, static) = {
+ .path = "set interface l2 forward",
+ .short_help = "set interface l2 forward <interface> [disable]",
+ .function = int_fwd,
+};
diff --git a/vnet/vnet/l2/l2_fwd.h b/vnet/vnet/l2/l2_fwd.h
new file mode 100644
index 00000000000..f08717dfdf8
--- /dev/null
+++ b/vnet/vnet/l2/l2_fwd.h
@@ -0,0 +1,29 @@
+/*
+ * l2_fwd.c : layer 2 forwarding using l2fib
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2fwd_h
+#define included_l2fwd_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+
+void
+l2fwd_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type,
+ u32 node_index);
+#endif
diff --git a/vnet/vnet/l2/l2_input.c b/vnet/vnet/l2/l2_input.c
new file mode 100644
index 00000000000..34f8a77184f
--- /dev/null
+++ b/vnet/vnet/l2/l2_input.c
@@ -0,0 +1,963 @@
+/*
+ * l2_input.c : layer 2 input packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_bvi.h>
+#include <vnet/l2/l2_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+extern clib_error_t *
+ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags);
+
+// Feature graph node names
+static char * l2input_feat_names[] = {
+#define _(sym,name) name,
+ foreach_l2input_feat
+#undef _
+};
+
+char **l2input_get_feat_names(void) {
+ return l2input_feat_names;
+}
+
+
+typedef struct {
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 next_index;
+ u32 sw_if_index;
+} l2input_trace_t;
+
+/* packet trace format function */
+static u8 * format_l2input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2input_trace_t * t = va_arg (*args, l2input_trace_t *);
+
+ s = format (s, "l2-input: sw_if_index %d dst %U src %U",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src);
+ return s;
+}
+
+l2input_main_t l2input_main;
+
+static vlib_node_registration_t l2input_node;
+
+#define foreach_l2input_error \
+_(L2INPUT, "L2 input packets") \
+_(DROP, "L2 input drops")
+
+typedef enum {
+#define _(sym,str) L2INPUT_ERROR_##sym,
+ foreach_l2input_error
+#undef _
+ L2INPUT_N_ERROR,
+} l2input_error_t;
+
+static char * l2input_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2input_error
+#undef _
+};
+
+typedef enum { /* */
+ L2INPUT_NEXT_LEARN,
+ L2INPUT_NEXT_FWD,
+ L2INPUT_NEXT_DROP,
+ L2INPUT_N_NEXT,
+} l2input_next_t;
+
+
+static_always_inline void
+classify_and_dispatch (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 cpu_index,
+ l2input_main_t * msm,
+ vlib_buffer_t * b0,
+ u32 *next0)
+{
+ // Load L2 input feature struct
+ // Load bridge domain struct
+ // Parse ethernet header to determine unicast/mcast/broadcast
+ // take L2 input stat
+ // classify packet as IP/UDP/TCP, control, other
+ // mask feature bitmap
+ // go to first node in bitmap
+ // Later: optimize VTM
+ //
+ // For L2XC,
+ // set tx sw-if-handle
+
+ u8 mcast_dmac;
+ __attribute__((unused)) u8 l2bcast;
+ __attribute__((unused)) u8 l2mcast;
+ __attribute__((unused)) u8 l2_stat_kind;
+ u16 ethertype;
+ u8 protocol;
+ l2_input_config_t *config;
+ l2_bridge_domain_t *bd_config;
+ u16 bd_index0;
+ u32 feature_bitmap;
+ u32 feat_mask;
+ ethernet_header_t * h0;
+ u8 * l3h0;
+ u32 sw_if_index0;
+ u8 bvi_flg = 0;
+
+#define get_u32(addr) ( *((u32 *)(addr)) )
+#define get_u16(addr) ( *((u16 *)(addr)) )
+#define STATS_IF_LAYER2_UCAST_INPUT_CNT 0
+#define STATS_IF_LAYER2_MCAST_INPUT_CNT 1
+#define STATS_IF_LAYER2_BCAST_INPUT_CNT 2
+
+ // Check for from-BVI processing
+ // When we come from ethernet-input, TX is ~0
+ if (PREDICT_FALSE (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)) {
+ // Set up for a from-bvi packet
+ bvi_to_l2 (vm,
+ msm->vnet_main,
+ cpu_index,
+ b0,
+ vnet_buffer(b0)->sw_if_index[VLIB_TX]);
+ bvi_flg = 1;
+ }
+
+ // The RX interface can be changed by bvi_to_l2()
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+ l3h0 = (u8 *)h0 + vnet_buffer(b0)->l2.l2_len;
+
+ // Determine L3 packet type. Only need to check the common types.
+ // Used to filter out features that don't apply to common packets.
+ ethertype = clib_net_to_host_u16(get_u16(l3h0 - 2));
+ if (ethertype == ETHERNET_TYPE_IP4) {
+ protocol = ((ip4_header_t *)l3h0)->protocol;
+ if ((protocol == IP_PROTOCOL_UDP) ||
+ (protocol == IP_PROTOCOL_TCP)) {
+ feat_mask = IP_UDP_TCP_FEAT_MASK;
+ } else {
+ feat_mask = IP4_FEAT_MASK;
+ }
+ } else if (ethertype == ETHERNET_TYPE_IP6) {
+ protocol = ((ip6_header_t *)l3h0)->protocol;
+ // Don't bother checking for extension headers for now
+ if ((protocol == IP_PROTOCOL_UDP) ||
+ (protocol == IP_PROTOCOL_TCP)) {
+ feat_mask = IP_UDP_TCP_FEAT_MASK;
+ } else {
+ feat_mask = IP6_FEAT_MASK;
+ }
+ } else if (ethertype == ETHERNET_TYPE_MPLS_UNICAST) {
+ feat_mask = IP6_FEAT_MASK;
+ } else {
+ // allow all features
+ feat_mask = ~0;
+ }
+
+ // determine layer2 kind for stat and mask
+ mcast_dmac = ethernet_address_cast(h0->dst_address);
+ l2bcast = 0;
+ l2mcast = 0;
+ l2_stat_kind = STATS_IF_LAYER2_UCAST_INPUT_CNT;
+ if (PREDICT_FALSE (mcast_dmac)) {
+ u32 *dsthi = (u32 *) &h0->dst_address[0];
+ u32 *dstlo = (u32 *) &h0->dst_address[2];
+
+ // Disable bridge forwarding (flooding will execute instead if not xconnect)
+ feat_mask &= ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD);
+ if (ethertype != ETHERNET_TYPE_ARP) // Disable ARP-term for non-ARP packet
+ feat_mask &= ~(L2INPUT_FEAT_ARP_TERM);
+
+ // dest mac is multicast or broadcast
+ if ((*dstlo == 0xFFFFFFFF) && (*dsthi == 0xFFFFFFFF)) {
+ // dest mac == FF:FF:FF:FF:FF:FF
+ l2_stat_kind = STATS_IF_LAYER2_BCAST_INPUT_CNT;
+ l2bcast=1;
+ } else {
+ l2_stat_kind = STATS_IF_LAYER2_MCAST_INPUT_CNT;
+ l2mcast=1;
+ }
+ }
+ // TODO: take l2 stat
+
+ // Get config for the input interface
+ config = vec_elt_at_index(msm->configs, sw_if_index0);
+
+ // Save split horizon group, use 0 for BVI to make sure not dropped
+ vnet_buffer(b0)->l2.shg = bvi_flg ? 0 : config->shg;
+
+ if (config->xconnect) {
+ // Set the output interface
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index;
+
+ } else {
+
+ // Do bridge-domain processing
+ bd_index0 = config->bd_index;
+ // save BD ID for next feature graph nodes
+ vnet_buffer(b0)->l2.bd_index = bd_index0;
+
+ // Get config for the bridge domain interface
+ bd_config = vec_elt_at_index(msm->bd_configs, bd_index0);
+
+ // Process bridge domain feature enables.
+ // To perform learning/flooding/forwarding, the corresponding bit
+ // must be enabled in both the input interface config and in the
+ // bridge domain config. In the bd_bitmap, bits for features other
+ // than learning/flooding/forwarding should always be set.
+ feat_mask = feat_mask & bd_config->feature_bitmap;
+ }
+
+ // mask out features from bitmap using packet type and bd config
+ feature_bitmap = config->feature_bitmap & feat_mask;
+
+ // save for next feature graph nodes
+ vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap;
+
+ // Determine the next node
+ *next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index,
+ feature_bitmap);
+}
+
+
+static uword
+l2input_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2input_next_t next_index;
+ l2input_main_t * msm = &l2input_main;
+ vlib_node_t *n = vlib_get_node (vm, l2input_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t * em = &vm->error_main;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3, * p4 , * p5;
+ u32 sw_if_index2, sw_if_index3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ // Prefetch the buffer header and packet for the N+2 loop iteration
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+ // Prefetch the input config for the N+1 loop iteration
+ // This depends on the buffer header above
+ sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_RX];
+ CLIB_PREFETCH (&msm->configs[sw_if_index2], CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (&msm->configs[sw_if_index3], CLIB_CACHE_LINE_BYTES, LOAD);
+
+ // Don't bother prefetching the bridge-domain config (which
+ // depends on the input config above). Only a small number of
+ // bridge domains are expected. Plus the structure is small
+ // and several fit in a cache line.
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) {
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED) {
+ ethernet_header_t * h0 = vlib_buffer_get_current (b0);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED) {
+ ethernet_header_t * h1 = vlib_buffer_get_current (b1);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ memcpy(t->src, h1->src_address, 6);
+ memcpy(t->dst, h1->dst_address, 6);
+ }
+ }
+
+ em->counters[node_counter_base_index + L2INPUT_ERROR_L2INPUT] += 2;
+
+ classify_and_dispatch (vm,
+ node,
+ cpu_index,
+ msm,
+ b0,
+ &next0);
+
+ classify_and_dispatch (vm,
+ node,
+ cpu_index,
+ msm,
+ b1,
+ &next1);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
+ ethernet_header_t * h0 = vlib_buffer_get_current (b0);
+ l2input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ t->sw_if_index = sw_if_index0;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+
+ em->counters[node_counter_base_index + L2INPUT_ERROR_L2INPUT] += 1;
+
+ classify_and_dispatch (vm,
+ node,
+ cpu_index,
+ msm,
+ b0,
+ &next0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (l2input_node,static) = {
+ .function = l2input_node_fn,
+ .name = "l2-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2input_error_strings),
+ .error_strings = l2input_error_strings,
+
+ .n_next_nodes = L2INPUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2INPUT_NEXT_LEARN] = "l2-learn",
+ [L2INPUT_NEXT_FWD] = "l2-fwd",
+ [L2INPUT_NEXT_DROP] = "error-drop",
+ },
+};
+
+clib_error_t *l2input_init (vlib_main_t *vm)
+{
+ l2input_main_t * mp = &l2input_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ // Get packets RX'd from L2 interfaces
+ ethernet_register_l2_input (vm, l2input_node.index);
+
+ // Create the config vector
+ vec_validate(mp->configs, 100);
+ // create 100 sw interface entries and zero them
+
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes(vm,
+ l2input_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names(),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2input_init);
+
+
+// Get a pointer to the config for the given interface
+l2_input_config_t * l2input_intf_config (u32 sw_if_index)
+{
+ l2input_main_t * mp = &l2input_main;
+
+ vec_validate(mp->configs, sw_if_index);
+ return vec_elt_at_index(mp->configs, sw_if_index);
+}
+
+// Enable (or disable) the feature in the bitmap for the given interface
+u32 l2input_intf_bitmap_enable (u32 sw_if_index,
+ u32 feature_bitmap,
+ u32 enable)
+{
+ l2input_main_t * mp = &l2input_main;
+ l2_input_config_t *config;
+
+ vec_validate(mp->configs, sw_if_index);
+ config = vec_elt_at_index(mp->configs, sw_if_index);
+
+ if (enable) {
+ config->feature_bitmap |= feature_bitmap;
+ } else {
+ config->feature_bitmap &= ~feature_bitmap;
+ }
+
+ return config->feature_bitmap;
+}
+
+
+
+// Set the subinterface to run in l2 or l3 mode.
+// for L3 mode, just the sw_if_index is specified
+// for bridged mode, the bd id and bvi flag are also specified
+// for xconnect mode, the peer sw_if_index is also specified
+// Return 0 if ok, or non-0 if there was an error
+
+u32 set_int_l2_mode (vlib_main_t * vm,
+ vnet_main_t * vnet_main,
+ u32 mode,
+ u32 sw_if_index,
+ u32 bd_index, // for bridged interface
+ u32 bvi, // the bridged interface is the BVI
+ u32 shg, // the bridged interface's split horizon group
+ u32 xc_sw_if_index) // peer interface for xconnect
+{
+ l2input_main_t * mp = &l2input_main;
+ vnet_main_t * vnm = vnet_get_main();
+ vnet_hw_interface_t * hi;
+ l2_output_config_t * out_config;
+ l2_input_config_t * config;
+ l2_bridge_domain_t * bd_config;
+ l2_flood_member_t member;
+ u64 mac;
+ i32 l2_if_adjust = 0;
+
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+
+ vec_validate(mp->configs, sw_if_index);
+ config = vec_elt_at_index(mp->configs, sw_if_index);
+
+ if (config->bridge) {
+ // Interface is already in bridge mode. Undo the existing config.
+ bd_config = vec_elt_at_index(mp->bd_configs, config->bd_index);
+
+ // remove interface from flood vector
+ bd_remove_member (bd_config, sw_if_index);
+
+ // undo any BVI-related config
+ if (bd_config->bvi_sw_if_index == sw_if_index) {
+ bd_config->bvi_sw_if_index = ~0;
+ config->bvi = 0;
+
+ // restore output node
+ hi->output_node_index = bd_config->saved_bvi_output_node_index;
+
+ // delete the l2fib entry for the bvi interface
+ mac = *((u64 *)hi->hw_address);
+ l2fib_del_entry (mac, config->bd_index);
+ }
+ l2_if_adjust--;
+ } else if (config->xconnect) {
+ l2_if_adjust--;
+ }
+
+ // Initialize the l2-input configuration for the interface
+ if (mode == MODE_L3) {
+ config->xconnect = 0;
+ config->bridge = 0;
+ config->shg = 0;
+ config->bd_index = 0;
+ config->feature_bitmap = L2INPUT_FEAT_DROP;
+ } else if (mode == MODE_L2_CLASSIFY) {
+ config->xconnect = 1;
+ config->bridge = 0;
+ config->output_sw_if_index = xc_sw_if_index;
+
+ // Make sure last-chance drop is configured
+ config->feature_bitmap |= L2INPUT_FEAT_DROP | L2INPUT_FEAT_CLASSIFY;
+
+ // Make sure bridging features are disabled
+ config->feature_bitmap &=
+ ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD);
+ shg = 0; // not used in xconnect
+
+ // Insure all packets go to ethernet-input
+ ethernet_set_rx_redirect (vnet_main, hi, 1);
+ } else {
+
+ if (mode == MODE_L2_BRIDGE) {
+ /*
+ * Remove a check that the interface must be an Ethernet.
+ * Specifically so we can bridge to L3 tunnel interfaces.
+ * Here's the check:
+ * if (hi->hw_class_index != ethernet_hw_interface_class.index)
+ *
+ */
+ if (!hi)
+ return MODE_ERROR_ETH; // non-ethernet
+
+ config->xconnect = 0;
+ config->bridge = 1;
+ config->bd_index = bd_index;
+
+ // Enable forwarding, flooding, learning and ARP termination by default
+ // (note that ARP term is disabled on BD feature bitmap by default)
+ config->feature_bitmap |= L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD |
+ L2INPUT_FEAT_FLOOD | L2INPUT_FEAT_LEARN | L2INPUT_FEAT_ARP_TERM;
+
+ // Make sure last-chance drop is configured
+ config->feature_bitmap |= L2INPUT_FEAT_DROP;
+
+ // Make sure xconnect is disabled
+ config->feature_bitmap &= ~L2INPUT_FEAT_XCONNECT;
+
+ // Set up bridge domain
+ vec_validate(mp->bd_configs, bd_index);
+ bd_config = vec_elt_at_index(mp->bd_configs, bd_index);
+ bd_validate (bd_config);
+
+ // TODO: think: add l2fib entry even for non-bvi interface?
+
+ // Do BVI interface initializations
+ if (bvi) {
+ // insure BD has no bvi interface (or replace that one with this??)
+ if (bd_config->bvi_sw_if_index != ~0) {
+ return MODE_ERROR_BVI_DEF; // bd already has a bvi interface
+ }
+ bd_config->bvi_sw_if_index = sw_if_index;
+ config->bvi = 1;
+
+ // make BVI outputs go to l2-input
+ bd_config->saved_bvi_output_node_index = hi->output_node_index;
+ hi->output_node_index = l2input_node.index;
+
+ // create the l2fib entry for the bvi interface
+ mac = *((u64 *)hi->hw_address);
+ l2fib_add_entry (mac, bd_index, sw_if_index, 1, 0, 1); // static + bvi
+
+ // Disable learning by default. no use since l2fib entry is static.
+ config->feature_bitmap &= ~L2INPUT_FEAT_LEARN;
+
+ // Add BVI to arp_input_next_index_by_hw_if_index table so arp-input
+ // node can send out ARP response via BVI to BD
+ ethernet_arp_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0);
+
+ }
+
+ // Add interface to bridge-domain flood vector
+ member.sw_if_index = sw_if_index;
+ member.flags = bvi ? L2_FLOOD_MEMBER_BVI : L2_FLOOD_MEMBER_NORMAL;
+ member.shg = shg;
+ bd_add_member (bd_config, &member);
+
+ } else {
+ config->xconnect = 1;
+ config->bridge = 0;
+ config->output_sw_if_index = xc_sw_if_index;
+
+ // Make sure last-chance drop is configured
+ config->feature_bitmap |= L2INPUT_FEAT_DROP;
+
+ // Make sure bridging features are disabled
+ config->feature_bitmap &= ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD);
+
+ config->feature_bitmap |= L2INPUT_FEAT_XCONNECT;
+ shg = 0; // not used in xconnect
+ }
+
+ // set up split-horizon group
+ config->shg = shg;
+ out_config = l2output_intf_config (sw_if_index);
+ out_config->shg = shg;
+
+ // Test: remove this when non-IP features can be configured.
+ // Enable a non-IP feature to test IP feature masking
+ // config->feature_bitmap |= L2INPUT_FEAT_CTRL_PKT;
+
+ l2_if_adjust++;
+ }
+
+ // Adjust count of L2 interfaces
+ hi->l2_if_count += l2_if_adjust;
+
+ if (hi->hw_class_index == ethernet_hw_interface_class.index) {
+ if ((hi->l2_if_count == 1) && (l2_if_adjust == 1)) {
+ // Just added first L2 interface on this port
+
+ // Set promiscuous mode on the l2 interface
+ ethernet_set_flags (vnet_main, hi->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+
+ // Insure all packets go to ethernet-input
+ ethernet_set_rx_redirect (vnet_main, hi, 1);
+
+ } else if ((hi->l2_if_count == 0) && (l2_if_adjust == -1)) {
+ // Just removed only L2 subinterface on this port
+
+ // Disable promiscuous mode on the l2 interface
+ ethernet_set_flags (vnet_main, hi->hw_if_index, 0);
+
+ // Allow ip packets to go directly to ip4-input etc
+ ethernet_set_rx_redirect (vnet_main, hi, 0);
+ }
+ }
+
+ // Set up the L2/L3 flag in the interface parsing tables
+ ethernet_sw_interface_set_l2_mode(vnm, sw_if_index, (mode!=MODE_L3));
+
+ return 0;
+}
+
+// set subinterface in bridging mode with a bridge-domain ID
+// The CLI format is:
+// set interface l2 bridge <interface> <bd> [bvi] [split-horizon-group]
+static clib_error_t *
+int_l2_bridge (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 bd_index, bd_id;
+ u32 sw_if_index;
+ u32 bvi;
+ u32 rc;
+ u32 shg;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (!unformat (input, "%d", &bd_id)) {
+ error = clib_error_return (0, "expected bridge domain ID `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ bd_index = bd_find_or_add_bd_index (&bd_main, bd_id);
+
+ // optional bvi
+ bvi = unformat (input, "bvi");
+
+ // optional split horizon group
+ shg = 0;
+ (void) unformat (input, "%d", &shg);
+
+ // set the interface mode
+ if ((rc = set_int_l2_mode(vm, vnm, MODE_L2_BRIDGE, sw_if_index, bd_index, bvi, shg, 0))) {
+ if (rc == MODE_ERROR_ETH) {
+ error = clib_error_return (0, "bridged interface must be ethernet",
+ format_unformat_error, input);
+ } else if (rc == MODE_ERROR_BVI_DEF) {
+ error = clib_error_return (0, "bridge-domain already has a bvi interface",
+ format_unformat_error, input);
+ } else {
+ error = clib_error_return (0, "invalid configuration for interface",
+ format_unformat_error, input);
+ }
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = {
+ .path = "set interface l2 bridge",
+ .short_help = "set interface to L2 bridging mode in <bridge-domain ID> [bvi] [shg]",
+ .function = int_l2_bridge,
+};
+
+// set subinterface in xconnect mode with another interface
+// The CLI format is:
+// set interface l2 xconnect <interface> <peer interface>
+static clib_error_t *
+int_l2_xc (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 xc_sw_if_index;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &xc_sw_if_index))
+ {
+ error = clib_error_return (0, "unknown peer interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ // set the interface mode
+ if (set_int_l2_mode(vm, vnm, MODE_L2_XC, sw_if_index, 0, 0, 0, xc_sw_if_index)) {
+ error = clib_error_return (0, "invalid configuration for interface",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (int_l2_xc_cli, static) = {
+ .path = "set interface l2 xconnect",
+ .short_help = "set interface to L2 cross-connect mode with <peer interface>",
+ .function = int_l2_xc,
+};
+
+// set subinterface in L3 mode
+// The CLI format is:
+// set interface l3 <interface>
+static clib_error_t *
+int_l3 (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ // set the interface mode
+ if (set_int_l2_mode(vm, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0)) {
+ error = clib_error_return (0, "invalid configuration for interface",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (int_l3_cli, static) = {
+ .path = "set interface l3",
+ .short_help = "set interface to L3 mode",
+ .function = int_l3,
+};
+
+// The CLI format is:
+// show mode [<if-name1> <if-name2> ...]
+static clib_error_t *
+show_int_mode (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ char * mode;
+ u8 * args;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ vnet_sw_interface_t * si, * sis = 0;
+ l2input_main_t * mp = &l2input_main;
+ l2_input_config_t * config;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u32 sw_if_index;
+
+ /* See if user wants to show specific interface */
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ si = pool_elt_at_index (im->sw_interfaces, sw_if_index);
+ vec_add1 (sis, si[0]);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ }
+
+ if (vec_len (sis) == 0) /* Get all interfaces */
+ {
+ /* Gather interfaces. */
+ sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
+ _vec_len (sis) = 0;
+ pool_foreach (si, im->sw_interfaces, ({ vec_add1 (sis, si[0]); }));
+ }
+
+ vec_foreach (si, sis)
+ {
+ vec_validate(mp->configs, si->sw_if_index);
+ config = vec_elt_at_index(mp->configs, si->sw_if_index);
+ if (config->bridge) {
+ u32 bd_id;
+ mode = "l2 bridge";
+ bd_id = l2input_main.bd_configs[config->bd_index].bd_id;
+
+ args = format (0, "bd_id %d%s%d", bd_id,
+ config->bvi ? " bvi shg " : " shg ", config->shg);
+ } else if (config->xconnect) {
+ mode = "l2 xconnect";
+ args = format (0, "%U",
+ format_vnet_sw_if_index_name,
+ vnm, config->output_sw_if_index);
+ } else {
+ mode = "l3";
+ args = format (0, " ");
+ }
+ vlib_cli_output (vm, "%s %U %v\n",
+ mode,
+ format_vnet_sw_if_index_name,
+ vnm, si->sw_if_index,
+ args);
+ vec_free (args);
+ }
+
+done:
+ vec_free (sis);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (show_l2_mode, static) = {
+ .path = "show mode",
+ .short_help = "show mode [<if-name1> <if-name2> ...]",
+ .function = show_int_mode,
+};
+
+#define foreach_l2_init_function \
+_(feat_bitmap_drop_init) \
+_(l2fib_init) \
+_(l2_classify_init) \
+_(l2bd_init) \
+_(l2fwd_init) \
+_(l2_inacl_init) \
+_(l2input_init) \
+_(l2_vtr_init) \
+_(l2_invtr_init) \
+_(l2_efp_filter_init) \
+_(l2learn_init) \
+_(l2flood_init) \
+_(l2_outacl_init) \
+_(l2output_init) \
+_(l2_patch_init) \
+_(l2_xcrw_init)
+
+clib_error_t *l2_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+#define _(a) do { \
+ if ((error = vlib_call_init_function (vm, a))) return error; } \
+while (0);
+ foreach_l2_init_function;
+#undef _
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_init);
diff --git a/vnet/vnet/l2/l2_input.h b/vnet/vnet/l2/l2_input.h
new file mode 100644
index 00000000000..e650162b593
--- /dev/null
+++ b/vnet/vnet/l2/l2_input.h
@@ -0,0 +1,279 @@
+/*
+ * l2_input.h : layer 2 input packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_input_h
+#define included_vnet_l2_input_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip.h>
+
+// Per-subinterface L2 feature configuration
+
+typedef struct {
+
+ union {
+ u16 bd_index; // bridge domain id
+ u32 output_sw_if_index; // for xconnect
+ };
+
+ // Interface mode. If both are 0, this interface is in L3 mode
+ u8 xconnect;
+ u8 bridge;
+
+ // this is the bvi interface for the bridge-domain
+ u8 bvi;
+
+ // config for which input features are configured on this interface
+ u32 feature_bitmap;
+
+ // some of these flags are also in the feature bitmap
+ u8 learn_enable;
+ u8 fwd_enable;
+ u8 flood_enable;
+
+ // split horizon group
+ u8 shg;
+
+} l2_input_config_t;
+
+
+typedef struct {
+
+ // Next nodes for the feature bitmap
+ u32 feat_next_node_index[32];
+
+ /* config vector indexed by sw_if_index */
+ l2_input_config_t *configs;
+
+ /* bridge domain config vector indexed by BD ID */
+ l2_bridge_domain_t *bd_configs;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2input_main_t;
+
+extern l2input_main_t l2input_main;
+
+static_always_inline l2_bridge_domain_t *
+l2input_bd_config_from_index (l2input_main_t * l2im, u32 bd_index)
+{
+ l2_bridge_domain_t * bd_config;
+
+ bd_config = vec_elt_at_index (l2im->bd_configs, bd_index);
+ return bd_is_valid (bd_config) ? bd_config : NULL;
+}
+
+// L2 input features
+
+// Mappings from feature ID to graph node name
+#define foreach_l2input_feat \
+ _(DROP, "feature-bitmap-drop") \
+ _(CLASSIFY, "l2-classify") \
+ _(XCONNECT, "l2-output") \
+ _(IPIW, "feature-bitmap-drop") \
+ _(FLOOD, "l2-flood") \
+ _(ARP_TERM, "arp-term-l2bd") \
+ _(UU_FLOOD, "l2-flood") \
+ _(FWD, "l2-fwd") \
+ _(LEARN, "l2-learn") \
+ _(VTR, "l2-input-vtr") \
+ _(VPATH, "vpath-input-l2") \
+ _(CTRL_PKT, "feature-bitmap-drop") \
+ _(L2PT, "feature-bitmap-drop") \
+ _(IGMP_SNOOP, "feature-bitmap-drop") \
+ _(MLD_SNOOP, "feature-bitmap-drop") \
+ _(DHCP_SNOOP, "feature-bitmap-drop") \
+ _(DAI, "feature-bitmap-drop") \
+ _(IPSG, "feature-bitmap-drop") \
+ _(ACL, "l2-input-acl") \
+ _(QOS, "feature-bitmap-drop") \
+ _(CFM, "feature-bitmap-drop") \
+ _(SPAN, "feature-bitmap-drop")
+
+// Feature bitmap positions
+typedef enum {
+#define _(sym,str) L2INPUT_FEAT_##sym##_BIT,
+ foreach_l2input_feat
+#undef _
+ L2INPUT_N_FEAT,
+} l2input_feat_t;
+
+// Feature bit masks
+typedef enum {
+#define _(sym,str) L2INPUT_FEAT_##sym = (1<<L2INPUT_FEAT_##sym##_BIT),
+ foreach_l2input_feat
+#undef _
+} l2input_feat_masks_t;
+
+// Return an array of strings containing graph node names of each feature
+char **l2input_get_feat_names(void);
+
+
+static_always_inline u8 bd_feature_flood (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD) ==
+ L2INPUT_FEAT_FLOOD);
+}
+
+static_always_inline u8 bd_feature_uu_flood (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD) ==
+ L2INPUT_FEAT_UU_FLOOD);
+}
+
+static_always_inline u8 bd_feature_forward (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_FWD) ==
+ L2INPUT_FEAT_FWD);
+}
+
+static_always_inline u8 bd_feature_learn (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_LEARN) ==
+ L2INPUT_FEAT_LEARN);
+}
+
+static_always_inline u8 bd_feature_arp_term (l2_bridge_domain_t * bd_config)
+{
+ return ((bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM) ==
+ L2INPUT_FEAT_ARP_TERM);
+}
+
+// Masks for eliminating features that do not apply to a packet
+
+#define IP4_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \
+ L2INPUT_FEAT_MLD_SNOOP | \
+ L2INPUT_FEAT_L2PT | \
+ L2INPUT_FEAT_CFM | \
+ L2INPUT_FEAT_DAI)
+
+#define IP6_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \
+ L2INPUT_FEAT_IGMP_SNOOP | \
+ L2INPUT_FEAT_L2PT | \
+ L2INPUT_FEAT_CFM | \
+ L2INPUT_FEAT_DAI)
+
+#define IP_UDP_TCP_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \
+ L2INPUT_FEAT_L2PT | \
+ L2INPUT_FEAT_IGMP_SNOOP | \
+ L2INPUT_FEAT_MLD_SNOOP | \
+ L2INPUT_FEAT_DHCP_SNOOP | \
+ L2INPUT_FEAT_CFM | \
+ L2INPUT_FEAT_DAI)
+
+#define MPLS_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \
+ L2INPUT_FEAT_L2PT | \
+ L2INPUT_FEAT_IGMP_SNOOP | \
+ L2INPUT_FEAT_MLD_SNOOP | \
+ L2INPUT_FEAT_DHCP_SNOOP | \
+ L2INPUT_FEAT_CFM | \
+ L2INPUT_FEAT_DAI)
+
+
+// Get a pointer to the config for the given interface
+l2_input_config_t * l2input_intf_config (u32 sw_if_index);
+
+// Enable (or disable) the feature in the bitmap for the given interface
+u32 l2input_intf_bitmap_enable (u32 sw_if_index,
+ u32 feature_bitmap,
+ u32 enable);
+
+
+#define MODE_L3 0
+#define MODE_L2_BRIDGE 1
+#define MODE_L2_XC 2
+#define MODE_L2_CLASSIFY 3
+
+#define MODE_ERROR_ETH 1
+#define MODE_ERROR_BVI_DEF 2
+
+u32 set_int_l2_mode (vlib_main_t * vm,
+ vnet_main_t * vnet_main,
+ u32 mode,
+ u32 sw_if_index,
+ u32 bd_index,
+ u32 bvi,
+ u32 shg,
+ u32 xc_sw_if_index);
+
+static inline void
+vnet_update_l2_len (vlib_buffer_t * b)
+{
+ ethernet_header_t * eth;
+ u16 ethertype;
+
+ /* point at currrent l2 hdr */
+ eth = vlib_buffer_get_current (b);
+
+ /*
+ * l2-output pays no attention to this
+ * but the tag push/pop code on an l2 subif needs it.
+ *
+ * Determine l2 header len, check for up to 2 vlans
+ */
+ vnet_buffer(b)->l2.l2_len = sizeof(ethernet_header_t);
+ ethertype = clib_net_to_host_u16(eth->type);
+ if ((ethertype == ETHERNET_TYPE_VLAN) ||
+ (ethertype == ETHERNET_TYPE_DOT1AD) ||
+ (ethertype == ETHERNET_TYPE_VLAN_9100) ||
+ (ethertype == ETHERNET_TYPE_VLAN_9200)) {
+ ethernet_vlan_header_t * vlan;
+ vnet_buffer(b)->l2.l2_len += sizeof (*vlan);
+ vlan = (void *) (eth+1);
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ if (ethertype == ETHERNET_TYPE_VLAN) {
+ vnet_buffer(b)->l2.l2_len += sizeof (*vlan);
+ }
+ }
+}
+
+/*
+ * Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet
+ * is ip4 or ip6. Otherwise hash on smac/dmac/etype.
+ * The vlib buffer current pointer is expected to be at ethernet header
+ * and vnet l2.l2_len is exppected to be setup already.
+ */
+static inline u32 vnet_l2_compute_flow_hash (vlib_buffer_t *b)
+{
+ ethernet_header_t * eh = vlib_buffer_get_current(b);
+ u8 * l3h = (u8 *)eh + vnet_buffer(b)->l2.l2_len;
+ u16 ethertype = clib_net_to_host_u16(*(u16 *)(l3h - 2));
+
+ if (ethertype == ETHERNET_TYPE_IP4)
+ return ip4_compute_flow_hash((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ return ip6_compute_flow_hash((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
+ else
+ {
+ u32 a, b, c;
+ u32 * ap = (u32 *) &eh->dst_address[2];
+ u32 * bp = (u32 *) &eh->src_address[2];
+ a = * ap;
+ b = * bp;
+ c = ethertype;
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+ return c;
+ }
+}
+
+#endif
+
diff --git a/vnet/vnet/l2/l2_input_acl.c b/vnet/vnet/l2/l2_input_acl.c
new file mode 100644
index 00000000000..77fa8944e9f
--- /dev/null
+++ b/vnet/vnet/l2/l2_input_acl.c
@@ -0,0 +1,427 @@
+/*
+ * l2_input_acl.c : layer 2 input acl processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+
+typedef struct {
+
+ // Next nodes for each feature
+ u32 feat_next_node_index[32];
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2_inacl_main_t;
+
+typedef struct {
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 offset;
+} l2_inacl_trace_t;
+
+/* packet trace format function */
+static u8 * format_l2_inacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_inacl_trace_t * t = va_arg (*args, l2_inacl_trace_t *);
+
+ s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d",
+ t->sw_if_index, t->next_index, t->table_index, t->offset);
+ return s;
+}
+
+l2_inacl_main_t l2_inacl_main;
+
+static vlib_node_registration_t l2_inacl_node;
+
+#define foreach_l2_inacl_error \
+_(NONE, "valid input ACL packets") \
+_(MISS, "input ACL misses") \
+_(HIT, "input ACL hits") \
+_(CHAIN_HIT, "input ACL hits after chain walk") \
+_(TABLE_MISS, "input ACL table-miss drops") \
+_(SESSION_DENY, "input ACL session deny drops")
+
+
+typedef enum {
+#define _(sym,str) L2_INACL_ERROR_##sym,
+ foreach_l2_inacl_error
+#undef _
+ L2_INACL_N_ERROR,
+} l2_inacl_error_t;
+
+static char * l2_inacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_inacl_error
+#undef _
+};
+
+static uword
+l2_inacl_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ acl_next_index_t next_index;
+ l2_inacl_main_t * msm = &l2_inacl_main;
+ input_acl_main_t * am = &input_acl_main;
+ vnet_classify_main_t * vcm = am->vnet_classify_main;
+ input_acl_table_id_t tid = INPUT_ACL_TABLE_L2;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ /* First pass: compute hashes */
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ u32 bi0, bi1;
+ u8 * h0, * h1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t * t0, * t1;
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t * p1, * p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = b1->data;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ table_index1 = am->classify_table_index_by_sw_if_index[tid][sw_if_index1];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ vnet_buffer(b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer(b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t * b0;
+ u32 bi0;
+ u8 * h0;
+ u32 sw_if_index0;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = ACL_NEXT_INDEX_DENY;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+ vnet_classify_entry_t * e0;
+ u64 hash0;
+ u8 * h0;
+ u8 error0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]);
+ vnet_classify_table_t * tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer(p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer(p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+ table_index0 = vnet_buffer(b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+
+ /* Feature bitmap update */
+ vnet_buffer(b0)->l2.feature_bitmap &= ~L2INPUT_FEAT_ACL;
+
+ /* Determine the next node */
+ next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index,
+ vnet_buffer(b0)->l2.feature_bitmap);
+
+ if (PREDICT_TRUE(table_index0 != ~0))
+ {
+ hash0 = vnet_buffer(b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0,
+ now);
+ if (e0)
+ {
+ vlib_buffer_advance (b0, e0->advance);
+
+ next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)?
+ e0->next_index:next0;
+
+ hits++;
+
+ error0 = (next0 == ACL_NEXT_INDEX_DENY)?
+ L2_INACL_ERROR_SESSION_DENY:L2_INACL_ERROR_NONE;
+ b0->error = node->errors[error0];
+ }
+ else
+ {
+ while (1)
+ {
+ if (PREDICT_TRUE(t0->next_table_index != ~0))
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < ACL_NEXT_INDEX_N_NEXT)?
+ t0->miss_next_index:next0;
+
+ misses++;
+
+ error0 = (next0 == ACL_NEXT_INDEX_DENY)?
+ L2_INACL_ERROR_TABLE_MISS:L2_INACL_ERROR_NONE;
+ b0->error = node->errors[error0];
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry
+ (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)?
+ e0->next_index:next0;
+ hits++;
+ chain_hits++;
+
+ error0 = (next0 == ACL_NEXT_INDEX_DENY)?
+ L2_INACL_ERROR_SESSION_DENY:L2_INACL_ERROR_NONE;
+ b0->error = node->errors[error0];
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_inacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->offset = e0 ? vnet_classify_get_offset (t0, e0): ~0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INACL_ERROR_MISS,
+ misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INACL_ERROR_HIT,
+ hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ L2_INACL_ERROR_CHAIN_HIT,
+ chain_hits);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (l2_inacl_node,static) = {
+ .function = l2_inacl_node_fn,
+ .name = "l2-input-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_inacl_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_inacl_error_strings),
+ .error_strings = l2_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+
+clib_error_t *l2_inacl_init (vlib_main_t *vm)
+{
+ l2_inacl_main_t * mp = &l2_inacl_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes(vm,
+ l2_inacl_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names(),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_inacl_init);
+
+
+// set subinterface inacl enable/disable
+// The CLI format is:
+// set interface acl input <interface> [disable]
+static clib_error_t *
+int_l2_inacl (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // set the interface flag
+ l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_ACL, enable);
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (int_l2_inacl_cli, static) = {
+ .path = "set interface acl input",
+ .short_help = "set interface acl input <interface> [disable]",
+ .function = int_l2_inacl,
+};
diff --git a/vnet/vnet/l2/l2_input_vtr.c b/vnet/vnet/l2/l2_input_vtr.c
new file mode 100644
index 00000000000..d07a0287d04
--- /dev/null
+++ b/vnet/vnet/l2/l2_input_vtr.c
@@ -0,0 +1,314 @@
+/*
+ * l2_input_vtr.c : layer 2 input vlan tag rewrite processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/l2/l2_input_vtr.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/cache.h>
+
+
+typedef struct {
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u8 raw[12]; // raw data (vlans)
+ u32 sw_if_index;
+} l2_invtr_trace_t;
+
+/* packet trace format function */
+static u8 * format_l2_invtr_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_invtr_trace_t * t = va_arg (*args, l2_invtr_trace_t *);
+
+ s = format (s, "l2-input-vtr: sw_if_index %d dst %U src %U data "
+ "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4], t->raw[5],
+ t->raw[6], t->raw[7], t->raw[8], t->raw[9], t->raw[10], t->raw[11]);
+ return s;
+}
+
+l2_invtr_main_t l2_invtr_main;
+
+static vlib_node_registration_t l2_invtr_node;
+
+#define foreach_l2_invtr_error \
+_(L2_INVTR, "L2 inverter packets") \
+_(DROP, "L2 input tag rewrite drops")
+
+typedef enum {
+#define _(sym,str) L2_INVTR_ERROR_##sym,
+ foreach_l2_invtr_error
+#undef _
+ L2_INVTR_N_ERROR,
+} l2_invtr_error_t;
+
+static char * l2_invtr_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_invtr_error
+#undef _
+};
+
+typedef enum {
+ L2_INVTR_NEXT_DROP,
+ L2_INVTR_N_NEXT,
+} l2_invtr_next_t;
+
+
+static uword
+l2_invtr_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2_invtr_next_t next_index;
+ l2_invtr_main_t * msm = &l2_invtr_main;
+ // vlib_node_t *n = vlib_get_node (vm, l2_invtr_node.index);
+ // u32 node_counter_base_index = n->error_heap_index;
+ // vlib_error_main_t * em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 feature_bitmap0, feature_bitmap1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3, * p4, * p5;
+ u32 sw_if_index2, sw_if_index3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ // Prefetch the buffer header and packet for the N+2 loop iteration
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+
+ // Prefetch the input config for the N+1 loop iteration
+ // This depends on the buffer header above
+ sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_RX];
+ CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index2), CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index3), CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ // process 2 packets
+ // em->counters[node_counter_base_index + L2_INVTR_ERROR_L2_INVTR] += 2;
+
+ // Remove ourself from the feature bitmap
+ feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR;
+ feature_bitmap1 = vnet_buffer(b1)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR;
+
+ // save for next feature graph nodes
+ vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap0;
+ vnet_buffer(b1)->l2.feature_bitmap = feature_bitmap1;
+
+ // Determine the next node
+ next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index,
+ feature_bitmap0);
+ next1 = feat_bitmap_get_next_node_index(msm->feat_next_node_index,
+ feature_bitmap1);
+
+ // perform the tag rewrite on two packets
+ if (l2_vtr_process(b0, &(vec_elt_at_index(l2output_main.configs, sw_if_index0)->input_vtr))) {
+ // Drop packet
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+ if (l2_vtr_process(b1, &(vec_elt_at_index(l2output_main.configs, sw_if_index1)->input_vtr))) {
+ // Drop packet
+ next1 = L2_INVTR_NEXT_DROP;
+ b1->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED) {
+ l2_invtr_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t * h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ memcpy(t->raw, &h0->type, sizeof(t->raw));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED) {
+ l2_invtr_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ ethernet_header_t * h1 = vlib_buffer_get_current (b1);
+ t->sw_if_index = sw_if_index0;
+ memcpy(t->src, h1->src_address, 6);
+ memcpy(t->dst, h1->dst_address, 6);
+ memcpy(t->raw, &h1->type, sizeof(t->raw));
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ u32 feature_bitmap0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ // process 1 packet
+ // em->counters[node_counter_base_index + L2_INVTR_ERROR_L2_INVTR] += 1;
+
+ // Remove ourself from the feature bitmap
+ feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR;
+
+ // save for next feature graph nodes
+ vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap0;
+
+ // Determine the next node
+ next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index,
+ feature_bitmap0);
+
+ // perform the tag rewrite on one packet
+ if (l2_vtr_process(b0, &(vec_elt_at_index(l2output_main.configs, sw_if_index0)->input_vtr))) {
+ // Drop packet
+ next0 = L2_INVTR_NEXT_DROP;
+ b0->error = node->errors[L2_INVTR_ERROR_DROP];
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
+ l2_invtr_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ ethernet_header_t * h0 = vlib_buffer_get_current (b0);
+ t->sw_if_index = sw_if_index0;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ memcpy(t->raw, &h0->type, sizeof(t->raw));
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (l2_invtr_node,static) = {
+ .function = l2_invtr_node_fn,
+ .name = "l2-input-vtr",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_invtr_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_invtr_error_strings),
+ .error_strings = l2_invtr_error_strings,
+
+ .n_next_nodes = L2_INVTR_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_INVTR_NEXT_DROP] = "error-drop",
+ },
+};
+
+clib_error_t *l2_invtr_init (vlib_main_t *vm)
+{
+ l2_invtr_main_t * mp = &l2_invtr_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes(vm,
+ l2_invtr_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names(),
+ mp->feat_next_node_index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_invtr_init);
+
diff --git a/vnet/vnet/l2/l2_input_vtr.h b/vnet/vnet/l2/l2_input_vtr.h
new file mode 100644
index 00000000000..57c8e409dea
--- /dev/null
+++ b/vnet/vnet/l2/l2_input_vtr.h
@@ -0,0 +1,43 @@
+/*
+ * l2_input_vtr.h : layer 2 input vlan tag rewrite processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_input_vtr_h
+#define included_vnet_l2_input_vtr_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+
+
+typedef struct {
+
+ // The input vtr data is located in l2_output_config_t because
+ // the same config data is used for the egress EFP Filter check.
+
+ // Next nodes for each feature
+ u32 feat_next_node_index[32];
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2_invtr_main_t;
+
+extern l2_invtr_main_t l2_invtr_main;
+
+#endif // included_vnet_l2_input_vtr_h
+
diff --git a/vnet/vnet/l2/l2_learn.c b/vnet/vnet/l2/l2_learn.c
new file mode 100644
index 00000000000..29315bedc98
--- /dev/null
+++ b/vnet/vnet/l2/l2_learn.c
@@ -0,0 +1,504 @@
+/*
+ * l2_learn.c : layer 2 learning using l2fib
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_fib.h>
+#include <vnet/l2/l2_learn.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+
+/*
+ * Ethernet bridge learning
+ *
+ * Populate the mac table with entries mapping the packet's source mac + bridge
+ * domain ID to the input sw_if_index.
+ *
+ * Note that learning and forwarding are separate graph nodes. This means that
+ * for a set of packets, all learning is performed first, then all nodes are
+ * forwarded. The forwarding is done based on the end-state of the mac table,
+ * instead of the state after each packet. Thus the forwarding results could
+ * differ in certain cases (mac move tests), but this not expected to cause
+ * problems in real-world networks. It is much simpler to separate learning
+ * and forwarding into separate nodes.
+ */
+
+
+typedef struct {
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u16 bd_index;
+} l2learn_trace_t;
+
+
+/* packet trace format function */
+static u8 * format_l2learn_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2learn_trace_t * t = va_arg (*args, l2learn_trace_t *);
+
+ s = format (s, "l2-learn: sw_if_index %d dst %U src %U bd_index %d",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src,
+ t->bd_index);
+ return s;
+}
+
+static vlib_node_registration_t l2learn_node;
+
+#define foreach_l2learn_error \
+_(L2LEARN, "L2 learn packets") \
+_(MISS, "L2 learn misses") \
+_(MAC_MOVE, "L2 mac moves") \
+_(MAC_MOVE_VIOLATE, "L2 mac move violations") \
+_(LIMIT, "L2 not learned due to limit") \
+_(HIT, "L2 learn hits") \
+_(FILTER_DROP, "L2 filter mac drops")
+
+typedef enum {
+#define _(sym,str) L2LEARN_ERROR_##sym,
+ foreach_l2learn_error
+#undef _
+ L2LEARN_N_ERROR,
+} l2learn_error_t;
+
+static char * l2learn_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2learn_error
+#undef _
+};
+
+typedef enum {
+ L2LEARN_NEXT_L2FWD,
+ L2LEARN_NEXT_DROP,
+ L2LEARN_N_NEXT,
+} l2learn_next_t;
+
+
+// Perform learning on one packet based on the mac table lookup result
+
+static_always_inline void
+l2learn_process (vlib_node_runtime_t * node,
+ l2learn_main_t * msm,
+ u64 * counter_base,
+ vlib_buffer_t * b0,
+ u32 sw_if_index0,
+ l2fib_entry_key_t * key0,
+ l2fib_entry_key_t * cached_key,
+ u32 * bucket0,
+ l2fib_entry_result_t * result0,
+ u32 * next0)
+{
+ u32 feature_bitmap;
+
+ // Set up the default next node (typically L2FWD)
+
+ // Remove ourself from the feature bitmap
+ feature_bitmap = vnet_buffer(b0)->l2.feature_bitmap & ~L2INPUT_FEAT_LEARN;
+
+ // Save for next feature graph nodes
+ vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap;
+
+ // Determine the next node
+ *next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index,
+ feature_bitmap);
+
+ // Check mac table lookup result
+
+ if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0)) {
+ // The entry was in the table, and the sw_if_index matched, the normal case
+
+ // TODO: for dataplane learning and aging, do this:
+ // if refresh=0 and not a static mac, set refresh=1
+ counter_base[L2LEARN_ERROR_HIT] += 1;
+
+ } else if (result0->raw == ~0) {
+
+ // The entry was not in table, so add it
+
+ counter_base[L2LEARN_ERROR_MISS] += 1;
+
+ if (msm->global_learn_count == msm->global_learn_limit) {
+ // Global limit reached. Do not learn the mac but forward the packet.
+ // In the future, limits could also be per-interface or bridge-domain.
+ counter_base[L2LEARN_ERROR_LIMIT] += 1;
+ goto done;
+
+ } else {
+ BVT(clib_bihash_kv) kv;
+ // It is ok to learn
+
+ result0->raw = 0; // clear all fields
+ result0->fields.sw_if_index = sw_if_index0;
+ // TODO: set timestamp in entry to clock for dataplane aging
+ kv.key = key0->raw;
+ kv.value = result0->raw;
+
+ BV(clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */);
+
+ cached_key->raw = ~0; // invalidate the cache
+ msm->global_learn_count++;
+ }
+
+ } else {
+
+ // The entry was in the table, but with the wrong sw_if_index mapping (mac move)
+ counter_base[L2LEARN_ERROR_MAC_MOVE] += 1;
+
+ if (result0->fields.static_mac) {
+ // Don't overwrite a static mac
+ // TODO: Check violation policy. For now drop the packet
+ b0->error = node->errors[L2LEARN_ERROR_MAC_MOVE_VIOLATE];
+ *next0 = L2LEARN_NEXT_DROP;
+ } else {
+ // Update the entry
+ // TODO: may want to rate limit mac moves
+ // TODO: check global/bridge domain/interface learn limits
+ BVT(clib_bihash_kv) kv;
+
+ result0->raw = 0; // clear all fields
+ result0->fields.sw_if_index = sw_if_index0;
+
+ kv.key = key0->raw;
+ kv.value = result0->raw;
+
+ cached_key->raw = ~0; // invalidate the cache
+
+ BV(clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */);
+ }
+ }
+
+ if (result0->fields.filter) {
+ // drop packet because lookup matched a filter mac entry
+
+ if (*next0 != L2LEARN_NEXT_DROP) {
+ // if we're not already dropping the packet, do it now
+ b0->error = node->errors[L2LEARN_ERROR_FILTER_DROP];
+ *next0 = L2LEARN_NEXT_DROP;
+ }
+ }
+
+done:
+ return;
+}
+
+
+static uword
+l2learn_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2learn_next_t next_index;
+ l2learn_main_t * msm = &l2learn_main;
+ vlib_node_t *n = vlib_get_node (vm, l2learn_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t * em = &vm->error_main;
+ l2fib_entry_key_t cached_key;
+ l2fib_entry_result_t cached_result;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ // Clear the one-entry cache in case mac table was updated
+ cached_key.raw = ~0;
+ cached_result.raw = ~0; /* warning be gone */
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t * h0, * h1;
+ l2fib_entry_key_t key0, key1;
+ l2fib_entry_result_t result0, result1;
+ u32 bucket0, bucket1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* RX interface handles */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ /* Process 2 x pkts */
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer(b0)->l2.bd_index;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->bd_index = vnet_buffer(b1)->l2.bd_index;
+ memcpy(t->src, h1->src_address, 6);
+ memcpy(t->dst, h1->dst_address, 6);
+ }
+ }
+
+ /* process 2 pkts */
+ em->counters[node_counter_base_index + L2LEARN_ERROR_L2LEARN] += 2;
+
+ l2fib_lookup_2 (msm->mac_table, &cached_key, &cached_result,
+ h0->src_address,
+ h1->src_address,
+ vnet_buffer(b0)->l2.bd_index,
+ vnet_buffer(b1)->l2.bd_index,
+ &key0,
+ &key1,
+ &bucket0,
+ &bucket1,
+ &result0,
+ &result1);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b0, sw_if_index0, &key0, &cached_key,
+ &bucket0, &result0, &next0);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b1, sw_if_index1, &key1, &cached_key,
+ &bucket1, &result1, &next1);
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t * h0;
+ l2fib_entry_key_t key0;
+ l2fib_entry_result_t result0;
+ u32 bucket0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ h0 = vlib_buffer_get_current (b0);
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
+ l2learn_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->bd_index = vnet_buffer(b0)->l2.bd_index;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+
+ /* process 1 pkt */
+ em->counters[node_counter_base_index + L2LEARN_ERROR_L2LEARN] += 1;
+
+ l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result,
+ h0->src_address, vnet_buffer(b0)->l2.bd_index,
+ &key0,
+ &bucket0,
+ &result0);
+
+ l2learn_process (node, msm, &em->counters[node_counter_base_index],
+ b0, sw_if_index0, &key0, &cached_key,
+ &bucket0, &result0, &next0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (l2learn_node,static) = {
+ .function = l2learn_node_fn,
+ .name = "l2-learn",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2learn_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2learn_error_strings),
+ .error_strings = l2learn_error_strings,
+
+ .n_next_nodes = L2LEARN_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2LEARN_NEXT_DROP] = "error-drop",
+ [L2LEARN_NEXT_L2FWD] = "l2-fwd",
+ },
+};
+
+
+clib_error_t *l2learn_init (vlib_main_t *vm)
+{
+ l2learn_main_t * mp = &l2learn_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes(vm,
+ l2learn_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names(),
+ mp->feat_next_node_index);
+
+ /* init the hash table ptr */
+ mp->mac_table = get_mac_table();
+
+ // Set the default number of dynamically learned macs to the number
+ // of buckets.
+ mp->global_learn_limit = L2FIB_NUM_BUCKETS * 16;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2learn_init);
+
+
+// set subinterface learn enable/disable
+// The CLI format is:
+// set interface l2 learn <interface> [disable]
+static clib_error_t *
+int_learn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // set the interface flag
+ l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_LEARN, enable);
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (int_learn_cli, static) = {
+ .path = "set interface l2 learn",
+ .short_help = "set interface l2 learn <interface> [disable]",
+ .function = int_learn,
+};
+
+
+static clib_error_t *
+l2learn_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ l2learn_main_t *mp = &l2learn_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "limit %d", &mp->global_learn_limit))
+ ;
+
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (l2learn_config, "l2learn");
+
diff --git a/vnet/vnet/l2/l2_learn.h b/vnet/vnet/l2/l2_learn.h
new file mode 100644
index 00000000000..25674858fc9
--- /dev/null
+++ b/vnet/vnet/l2/l2_learn.h
@@ -0,0 +1,47 @@
+/*
+ * l2_learn.c : layer 2 learning using l2fib
+ *
+ * Copyright (c) 2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_l2learn_h
+#define included_l2learn_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+
+typedef struct {
+
+ // Hash table
+ BVT(clib_bihash) *mac_table;
+
+ // number of dynamically learned mac entries
+ u32 global_learn_count;
+
+ // maximum number of dynamically learned mac entries
+ u32 global_learn_limit;
+
+ // Next nodes for each feature
+ u32 feat_next_node_index[32];
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2learn_main_t;
+
+
+l2learn_main_t l2learn_main;
+
+#endif
diff --git a/vnet/vnet/l2/l2_output.c b/vnet/vnet/l2/l2_output.c
new file mode 100644
index 00000000000..72c3d0374e3
--- /dev/null
+++ b/vnet/vnet/l2/l2_output.c
@@ -0,0 +1,541 @@
+/*
+ * l2_output.c : layer 2 output packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/cli.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_output.h>
+
+
+// Feature graph node names
+static char * l2output_feat_names[] = {
+#define _(sym,name) name,
+ foreach_l2output_feat
+#undef _
+};
+
+char **l2output_get_feat_names(void) {
+ return l2output_feat_names;
+}
+
+l2output_main_t l2output_main;
+
+typedef struct {
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+} l2output_trace_t;
+
+/* packet trace format function */
+static u8 * format_l2output_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2output_trace_t * t = va_arg (*args, l2output_trace_t *);
+
+ s = format (s, "l2-output: sw_if_index %d dst %U src %U",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src);
+ return s;
+}
+
+
+#define foreach_l2output_error \
+_(L2OUTPUT, "L2 output packets") \
+_(EFP_DROP, "L2 EFP filter pre-rewrite drops") \
+_(VTR_DROP, "L2 output tag rewrite drops") \
+_(SHG_DROP, "L2 split horizon drops") \
+_(DROP, "L2 output drops")
+
+typedef enum {
+#define _(sym,str) L2OUTPUT_ERROR_##sym,
+ foreach_l2output_error
+#undef _
+ L2OUTPUT_N_ERROR,
+} l2output_error_t;
+
+static char * l2output_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2output_error
+#undef _
+};
+
+typedef enum {
+ L2OUTPUT_NEXT_DROP,
+ L2OUTPUT_N_NEXT,
+} l2output_next_t;
+
+// Return 0 if split horizon check passes, otherwise return non-zero
+// Packets should not be transmitted out an interface with the same
+// split-horizon group as the input interface, except if the shg is 0
+// in which case the check always passes.
+static_always_inline u32
+split_horizon_violation (u8 shg1, u8 shg2)
+{
+ if (PREDICT_TRUE (shg1 == 0)) {
+ return 0;
+ } else {
+ return shg1 == shg2;
+ }
+}
+
+
+static uword
+l2output_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2output_next_t next_index;
+ l2output_main_t * msm = &l2output_main;
+ vlib_node_t *n = vlib_get_node (vm, l2output_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t * em = &vm->error_main;
+ u32 cached_sw_if_index;
+ u32 cached_next_index;
+
+ /* Invalidate cache */
+ cached_sw_if_index = ~0;
+ cached_next_index = ~0; /* warning be gone */
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 6 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t * h0, * h1;
+ l2_output_config_t * config0, * config1;
+ u32 feature_bitmap0, feature_bitmap1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3, * p4 , * p5;
+ u32 sw_if_index2, sw_if_index3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ // Prefetch the buffer header for the N+2 loop iteration
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ // Note: no need to prefetch packet data. This node doesn't reference it.
+
+ // Prefetch the input config for the N+1 loop iteration
+ // This depends on the buffer header above
+ sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_TX];
+ sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_TX];
+ CLIB_PREFETCH (&msm->configs[sw_if_index2], CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (&msm->configs[sw_if_index3], CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* TX interface handles */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ memcpy(t->src, h1->src_address, 6);
+ memcpy(t->dst, h1->dst_address, 6);
+ }
+ }
+
+ em->counters[node_counter_base_index + L2OUTPUT_ERROR_L2OUTPUT] += 2;
+
+ // Get config for the output interface
+ config0 = vec_elt_at_index(msm->configs, sw_if_index0);
+ config1 = vec_elt_at_index(msm->configs, sw_if_index1);
+
+ // Get features from the config
+ // TODO: mask out any non-applicable features
+ feature_bitmap0 = config0->feature_bitmap;
+ feature_bitmap1 = config1->feature_bitmap;
+
+ // Determine next node
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2output_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0,
+ sw_if_index0,
+ feature_bitmap0,
+ &next0);
+
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2output_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b1,
+ sw_if_index1,
+ feature_bitmap1,
+ &next1);
+
+ // Perform output vlan tag rewrite and the pre-vtr EFP filter check.
+ // The EFP Filter only needs to be run if there is an output VTR
+ // configured. The flag for the post-vtr EFP Filter node is used
+ // to trigger the pre-vtr check as well.
+
+ if (PREDICT_FALSE (config0->output_vtr.push_and_pop_bytes)) {
+ // Perform pre-vtr EFP filter check if configured
+ u32 failed1 = (feature_bitmap0 & L2OUTPUT_FEAT_EFP_FILTER) &&
+ (l2_efp_filter_process(b0, &(config0->input_vtr)));
+ u32 failed2 = l2_vtr_process(b0, &(config0->output_vtr));
+
+ if (PREDICT_FALSE (failed1 | failed2)) {
+ next0 = L2OUTPUT_NEXT_DROP;
+ if (failed2) {
+ b0->error = node->errors[L2OUTPUT_ERROR_VTR_DROP];
+ }
+ if (failed1) {
+ b0->error = node->errors[L2OUTPUT_ERROR_EFP_DROP];
+ }
+ }
+ }
+
+ if (PREDICT_FALSE (config1->output_vtr.push_and_pop_bytes)) {
+ // Perform pre-vtr EFP filter check if configured
+ u32 failed1 = (feature_bitmap1 & L2OUTPUT_FEAT_EFP_FILTER) &&
+ (l2_efp_filter_process(b1, &(config1->input_vtr)));
+ u32 failed2 = l2_vtr_process(b1, &(config1->output_vtr));
+
+ if (PREDICT_FALSE (failed1 | failed2)) {
+ next1 = L2OUTPUT_NEXT_DROP;
+ if (failed2) {
+ b1->error = node->errors[L2OUTPUT_ERROR_VTR_DROP];
+ }
+ if (failed1) {
+ b1->error = node->errors[L2OUTPUT_ERROR_EFP_DROP];
+ }
+ }
+ }
+
+ // Perform the split horizon check
+ // The check can only fail for non-zero shg's
+ if (PREDICT_FALSE (config0->shg + config1->shg)) {
+ // one of the checks might fail, check both
+ if (split_horizon_violation (config0->shg, vnet_buffer(b0)->l2.shg)) {
+ next0 = L2OUTPUT_NEXT_DROP;
+ b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ if (split_horizon_violation (config1->shg, vnet_buffer(b1)->l2.shg)) {
+ next1 = L2OUTPUT_NEXT_DROP;
+ b1->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t * h0;
+ l2_output_config_t *config0;
+ u32 feature_bitmap0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
+ l2output_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ h0 = vlib_buffer_get_current (b0);
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+
+ em->counters[node_counter_base_index + L2OUTPUT_ERROR_L2OUTPUT] += 1;
+
+ // Get config for the output interface
+ config0 = vec_elt_at_index(msm->configs, sw_if_index0);
+
+ // Get features from the config
+ // TODO: mask out any non-applicable features
+ feature_bitmap0 = config0->feature_bitmap;
+
+ // Determine next node
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2output_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0,
+ sw_if_index0,
+ feature_bitmap0,
+ &next0);
+
+ // Perform output vlan tag rewrite and the pre-vtr EFP filter check.
+ // The EFP Filter only needs to be run if there is an output VTR
+ // configured. The flag for the post-vtr EFP Filter node is used
+ // to trigger the pre-vtr check as well.
+
+ if (config0->output_vtr.push_and_pop_bytes) {
+ // Perform pre-vtr EFP filter check if configured
+ u32 failed1 = (feature_bitmap0 & L2OUTPUT_FEAT_EFP_FILTER) &&
+ (l2_efp_filter_process(b0, &(config0->input_vtr)));
+ u32 failed2 = l2_vtr_process(b0, &(config0->output_vtr));
+
+ if (PREDICT_FALSE (failed1 | failed2)) {
+ next0 = L2OUTPUT_NEXT_DROP;
+ if (failed2) {
+ b0->error = node->errors[L2OUTPUT_ERROR_VTR_DROP];
+ }
+ if (failed1) {
+ b0->error = node->errors[L2OUTPUT_ERROR_EFP_DROP];
+ }
+ }
+ }
+
+ // Perform the split horizon check
+ if (PREDICT_FALSE (split_horizon_violation (config0->shg, vnet_buffer(b0)->l2.shg))) {
+ next0 = L2OUTPUT_NEXT_DROP;
+ b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP];
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (l2output_node) = {
+ .function = l2output_node_fn,
+ .name = "l2-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2output_error_strings),
+ .error_strings = l2output_error_strings,
+
+ .n_next_nodes = L2OUTPUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2OUTPUT_NEXT_DROP] = "error-drop",
+ },
+};
+
+clib_error_t *l2output_init (vlib_main_t *vm)
+{
+ l2output_main_t * mp = &l2output_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ // Create the config vector
+ vec_validate(mp->configs, 100);
+ // Until we hook up the CLI config, just create 100 sw interface entries and zero them
+
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes(vm,
+ l2output_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names(),
+ mp->next_nodes.feat_next_node_index);
+
+ // Initialize the output node mapping table
+ l2output_init_output_node_vec(&mp->next_nodes.output_node_index_vec);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2output_init);
+
+typedef struct {
+ u32 node_index;
+ u32 sw_if_index;
+} output_node_mapping_rpc_args_t;
+
+#if DPDK > 0
+static void output_node_rpc_callback
+( output_node_mapping_rpc_args_t * a);
+
+static void output_node_mapping_send_rpc
+(u32 node_index,
+ u32 sw_if_index)
+{
+ output_node_mapping_rpc_args_t args;
+
+ args.node_index = node_index;
+ args.sw_if_index = sw_if_index;
+
+ vl_api_rpc_call_main_thread (output_node_rpc_callback,
+ (u8 *) &args, sizeof (args));
+}
+#endif
+
+
+// Create a mapping in the next node mapping table for the given sw_if_index
+u32 l2output_create_output_node_mapping (
+ vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 node_index, // index of current node
+ u32 * output_node_index_vec,
+ u32 sw_if_index) {
+
+ u32 next; // index of next graph node
+ vnet_hw_interface_t *hw0;
+ u32 *node;
+#if DPDK > 0
+ uword cpu_number;
+
+ cpu_number = os_get_cpu_number();
+
+ if (cpu_number)
+ {
+ output_node_mapping_send_rpc (node_index, sw_if_index);
+ return 0;
+ }
+#endif
+
+ hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+
+ // dynamically create graph node arc
+ next = vlib_node_add_next (vlib_main,
+ node_index,
+ hw0->output_node_index);
+
+ // Initialize vector with the mapping
+
+ node = vec_elt_at_index(output_node_index_vec, sw_if_index);
+ *node = next;
+
+ return next;
+}
+
+#if DPDK > 0
+void output_node_rpc_callback (output_node_mapping_rpc_args_t *a)
+{
+ vlib_main_t * vm = vlib_get_main();
+ vnet_main_t * vnm = vnet_get_main();
+ l2output_main_t * mp = &l2output_main;
+
+ (void) l2output_create_output_node_mapping
+ (vm, vnm, a->node_index, mp->next_nodes.output_node_index_vec,
+ a->sw_if_index);
+}
+#endif
+
+// Get a pointer to the config for the given interface
+l2_output_config_t * l2output_intf_config (u32 sw_if_index)
+{
+ l2output_main_t * mp = &l2output_main;
+
+ vec_validate(mp->configs, sw_if_index);
+ return vec_elt_at_index(mp->configs, sw_if_index);
+}
+
+// Enable (or disable) the feature in the bitmap for the given interface
+void l2output_intf_bitmap_enable (u32 sw_if_index,
+ u32 feature_bitmap,
+ u32 enable)
+{
+ l2output_main_t * mp = &l2output_main;
+ l2_output_config_t *config;
+
+ vec_validate(mp->configs, sw_if_index);
+ config = vec_elt_at_index(mp->configs, sw_if_index);
+
+ if (enable) {
+ config->feature_bitmap |= feature_bitmap;
+ } else {
+ config->feature_bitmap &= ~feature_bitmap;
+ }
+}
diff --git a/vnet/vnet/l2/l2_output.h b/vnet/vnet/l2/l2_output.h
new file mode 100644
index 00000000000..0d171b82541
--- /dev/null
+++ b/vnet/vnet/l2/l2_output.h
@@ -0,0 +1,219 @@
+/*
+ * l2_output.h : layer 2 output packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_output_h
+#define included_vnet_l2_output_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+
+
+// The L2 output feature configuration, a per-interface struct
+typedef struct {
+
+ u32 feature_bitmap;
+
+ // vlan tag rewrite for ingress and egress
+ // ingress vtr is located here because the same config data is used for
+ // the egress EFP filter check
+ vtr_config_t input_vtr;
+ vtr_config_t output_vtr;
+
+ // some of these flags may get integrated into the feature bitmap
+ u8 fwd_enable;
+ u8 flood_enable;
+
+ // split horizon group
+ u8 shg;
+
+} l2_output_config_t;
+
+
+// The set of next nodes for features and interface output.
+// Each output feature node should include this.
+typedef struct {
+ // vector of output next node index, indexed by sw_if_index.
+ // used when all output features have been executed and the
+ // next nodes are the interface output nodes.
+ u32 * output_node_index_vec;
+
+ // array of next node index for each output feature, indexed
+ // by l2output_feat_t. Used to determine next feature node.
+ u32 feat_next_node_index[32];
+
+} l2_output_next_nodes_st;
+
+
+typedef struct {
+ // Next nodes for features and output interfaces
+ l2_output_next_nodes_st next_nodes;
+
+ /* config vector indexed by sw_if_index */
+ l2_output_config_t *configs;
+
+ /* Convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2output_main_t;
+
+l2output_main_t l2output_main;
+vlib_node_registration_t l2output_node;
+
+// L2 output features
+
+// Mappings from feature ID to graph node name
+#define foreach_l2output_feat \
+ _(SPAN, "feature-bitmap-drop") \
+ _(CFM, "feature-bitmap-drop") \
+ _(QOS, "feature-bitmap-drop") \
+ _(ACL, "l2-output-acl") \
+ _(L2PT, "feature-bitmap-drop") \
+ _(EFP_FILTER, "l2-efp-filter") \
+ _(IPIW, "feature-bitmap-drop") \
+ _(STP_BLOCKED, "feature-bitmap-drop") \
+ _(LINESTATUS_DOWN, "feature-bitmap-drop") \
+ _(XCRW, "l2-xcrw")
+
+// Feature bitmap positions
+typedef enum {
+#define _(sym,str) L2OUTPUT_FEAT_##sym##_BIT,
+ foreach_l2output_feat
+#undef _
+ L2OUTPUT_N_FEAT,
+} l2output_feat_t;
+
+// Feature bit masks
+typedef enum {
+#define _(sym,str) L2OUTPUT_FEAT_##sym = (1<<L2OUTPUT_FEAT_##sym##_BIT),
+ foreach_l2output_feat
+#undef _
+} l2output_feat_masks_t;
+
+// Return an array of strings containing graph node names of each feature
+char **l2output_get_feat_names(void);
+
+
+// The next set of functions is for use by output feature graph nodes.
+// When the last bit has been cleared from the output feature bitmap,
+// the next node is the output graph node for the TX sw_if_index.
+// These functions help the feature nodes get that node index.
+
+// Create a mapping to the output graph node for the given sw_if_index
+u32 l2output_create_output_node_mapping (
+ vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 node_index, // index of current node
+ u32 * output_node_index_vec,
+ u32 sw_if_index);
+
+// Initialize the next node mapping table
+always_inline
+void l2output_init_output_node_vec (u32 **output_node_index_vec) {
+
+ // Size it at 100 sw_if_indexes initially
+ // Uninitialized mappings are set to ~0
+ vec_validate_init_empty(*output_node_index_vec, 100, ~0);
+}
+
+
+// Get a mapping from the output node mapping table,
+// creating the entry if necessary.
+always_inline
+u32 l2output_get_output_node (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 node_index, // index of current node
+ u32 sw_if_index,
+ u32 ** output_node_index_vec) // may be updated
+{
+ u32 next; // index of next graph node
+
+ // Insure the vector is big enough
+ vec_validate_init_empty(*output_node_index_vec, sw_if_index, ~0);
+
+ // Get the mapping for the sw_if_index
+ next = vec_elt(*output_node_index_vec, sw_if_index);
+
+ if (next == ~0) {
+ // Mapping doesn't exist so create it
+ next = l2output_create_output_node_mapping (vlib_main,
+ vnet_main,
+ node_index,
+ *output_node_index_vec,
+ sw_if_index);
+ }
+
+ return next;
+}
+
+
+// Determine the next L2 node based on the output feature bitmap
+always_inline void
+l2_output_dispatch (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ vlib_node_runtime_t * node,
+ u32 node_index,
+ u32 * cached_sw_if_index,
+ u32 * cached_next_index,
+ l2_output_next_nodes_st *next_nodes,
+ vlib_buffer_t * b0,
+ u32 sw_if_index,
+ u32 feature_bitmap,
+ u32 *next0)
+{
+ if (feature_bitmap) {
+ // There are some features to execute
+
+ // Save bitmap for the next feature graph nodes
+ vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap;
+
+ // Determine the next node
+ *next0 = feat_bitmap_get_next_node_index(next_nodes->feat_next_node_index,
+ feature_bitmap);
+ } else {
+ // There are no features. Send packet to TX node for sw_if_index0
+ // This is a little tricky in that the output interface next node indexes
+ // are not precomputed at init time.
+
+ if (sw_if_index == *cached_sw_if_index) {
+ // We hit in the one-entry cache. Use it.
+ *next0 = *cached_next_index;
+ } else {
+ // Look up the output TX node
+ *next0 = l2output_get_output_node(vlib_main,
+ vnet_main,
+ node_index,
+ sw_if_index,
+ &next_nodes->output_node_index_vec);
+
+ // Update the one-entry cache
+ *cached_sw_if_index = sw_if_index;
+ *cached_next_index = *next0;
+ }
+ }
+}
+
+// Get a pointer to the config for the given interface
+l2_output_config_t * l2output_intf_config (u32 sw_if_index);
+
+// Enable (or disable) the feature in the bitmap for the given interface
+void l2output_intf_bitmap_enable (u32 sw_if_index,
+ u32 feature_bitmap,
+ u32 enable);
+
+#endif
diff --git a/vnet/vnet/l2/l2_output_acl.c b/vnet/vnet/l2/l2_output_acl.c
new file mode 100644
index 00000000000..2f6c1dce41f
--- /dev/null
+++ b/vnet/vnet/l2/l2_output_acl.c
@@ -0,0 +1,335 @@
+/*
+ * l2_output_acl.c : layer 2 output acl processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+
+
+typedef struct {
+ // Next nodes for features and output interfaces
+ l2_output_next_nodes_st next_nodes;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2_outacl_main_t;
+
+
+
+typedef struct {
+ /* per-pkt trace data */
+ u8 src[6];
+ u8 dst[6];
+ u32 next_index;
+ u32 sw_if_index;
+} l2_outacl_trace_t;
+
+/* packet trace format function */
+static u8 * format_l2_outacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_outacl_trace_t * t = va_arg (*args, l2_outacl_trace_t *);
+
+ s = format (s, "l2-output-acl: sw_if_index %d dst %U src %U",
+ t->sw_if_index,
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src);
+ return s;
+}
+
+l2_outacl_main_t l2_outacl_main;
+
+static vlib_node_registration_t l2_outacl_node;
+
+#define foreach_l2_outacl_error \
+_(L2_OUTACL, "L2 output ACL packets") \
+_(DROP, "L2 output drops")
+
+typedef enum {
+#define _(sym,str) L2_OUTACL_ERROR_##sym,
+ foreach_l2_outacl_error
+#undef _
+ L2_OUTACL_N_ERROR,
+} l2_outacl_error_t;
+
+static char * l2_outacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_outacl_error
+#undef _
+};
+
+typedef enum {
+ L2_OUTACL_NEXT_DROP,
+ L2_OUTACL_N_NEXT,
+} l2_outacl_next_t;
+
+
+
+static uword
+l2_outacl_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2_outacl_next_t next_index;
+ l2_outacl_main_t * msm = &l2_outacl_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_outacl_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t * em = &vm->error_main;
+ u32 cached_sw_if_index = (u32)~0;
+ u32 cached_next_index = (u32)~0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors; /* number of packets to process */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ /* get space to enqueue frame to graph node "next_index" */
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (0 && n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t * h0, * h1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ /* bi is "buffer index", b is pointer to the buffer */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* TX interface handles */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_outacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_outacl_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ memcpy(t->src, h1->src_address, 6);
+ memcpy(t->dst, h1->dst_address, 6);
+ }
+ }
+
+ em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] += 2;
+
+ /* add core loop code here */
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ethernet_header_t * h0;
+ u32 feature_bitmap0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED))) {
+ l2_outacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ memcpy(t->src, h0->src_address, 6);
+ memcpy(t->dst, h0->dst_address, 6);
+ }
+
+ em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] += 1;
+
+ // L2_OUTACL code
+ // Dummy for now, just go to next feature node
+
+
+ // Remove ourself from the feature bitmap
+ feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_ACL;
+
+ // Determine next node
+ l2_output_dispatch (msm->vlib_main,
+ msm->vnet_main,
+ node,
+ l2_outacl_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ &msm->next_nodes,
+ b0,
+ sw_if_index0,
+ feature_bitmap0,
+ &next0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (l2_outacl_node,static) = {
+ .function = l2_outacl_node_fn,
+ .name = "l2-output-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_outacl_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_outacl_error_strings),
+ .error_strings = l2_outacl_error_strings,
+
+ .n_next_nodes = L2_OUTACL_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_OUTACL_NEXT_DROP] = "error-drop",
+ },
+};
+
+clib_error_t *l2_outacl_init (vlib_main_t *vm)
+{
+ l2_outacl_main_t * mp = &l2_outacl_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes(vm,
+ l2_outacl_node.index,
+ L2OUTPUT_N_FEAT,
+ l2output_get_feat_names(),
+ mp->next_nodes.feat_next_node_index);
+
+ // Initialize the output node mapping table
+ l2output_init_output_node_vec(&mp->next_nodes.output_node_index_vec);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_outacl_init);
+
+// set subinterface outacl enable/disable
+// The CLI format is:
+// set interface acl output <interface> [disable]
+static clib_error_t *
+int_l2_outacl (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 enable;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ enable = 1;
+ if (unformat (input, "disable")) {
+ enable = 0;
+ }
+
+ // set the interface flag
+ l2output_intf_bitmap_enable(sw_if_index, L2OUTPUT_FEAT_ACL, enable);
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (int_l2_outacl_cli, static) = {
+ .path = "set interface acl output",
+ .short_help = "set interface acl output <interface> [disable]",
+ .function = int_l2_outacl,
+};
diff --git a/vnet/vnet/l2/l2_patch.c b/vnet/vnet/l2/l2_patch.c
new file mode 100644
index 00000000000..63be409d3b8
--- /dev/null
+++ b/vnet/vnet/l2/l2_patch.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/error.h>
+
+typedef struct {
+ u32 cached_next_index;
+ u32 cached_rx_sw_if_index;
+
+ /* vector of dispositions, indexed by rx_sw_if_index */
+ u32 *tx_next_by_rx_sw_if_index;
+ u32 *tx_sw_if_index_by_rx_sw_if_index;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2_patch_main_t;
+
+typedef struct {
+ u32 rx_sw_if_index;
+ u32 tx_sw_if_index;
+} l2_patch_trace_t;
+
+/* packet trace format function */
+static u8 * format_l2_patch_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_patch_trace_t * t = va_arg (*args, l2_patch_trace_t *);
+
+ s = format (s, "L2_PATCH: rx %d tx %d", t->rx_sw_if_index,
+ t->tx_sw_if_index);
+ return s;
+}
+
+l2_patch_main_t l2_patch_main;
+
+static vlib_node_registration_t l2_patch_node;
+
+#define foreach_l2_patch_error \
+_(PATCHED, "L2 patch packets") \
+_(DROPPED, "L2 patch misconfigured drops")
+
+typedef enum {
+#define _(sym,str) L2_PATCH_ERROR_##sym,
+ foreach_l2_patch_error
+#undef _
+ L2_PATCH_N_ERROR,
+} l2_patch_error_t;
+
+static char * l2_patch_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_patch_error
+#undef _
+};
+
+typedef enum {
+ L2_PATCH_NEXT_DROP,
+ L2_PATCH_N_NEXT,
+} l2_patch_next_t;
+
+static uword
+l2_patch_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2_patch_next_t next_index;
+ l2_patch_main_t * l2pm = &l2_patch_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_patch_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t * em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ /* So stupid / simple, we don't need to prefetch data */
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ ASSERT(l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0);
+ ASSERT(l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0);
+ ASSERT(l2pm->tx_next_by_rx_sw_if_index[sw_if_index1] != ~0);
+ ASSERT(l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1] != ~0);
+
+ if (PREDICT_TRUE (sw_if_index0 == l2pm->cached_rx_sw_if_index))
+ next0 = l2pm->cached_next_index;
+ else
+ {
+ next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0];
+ l2pm->cached_rx_sw_if_index = sw_if_index0;
+ l2pm->cached_next_index = next0;
+ }
+
+ if (PREDICT_TRUE (sw_if_index1 == l2pm->cached_rx_sw_if_index))
+ next1 = l2pm->cached_next_index;
+ else
+ next1 = l2pm->tx_next_by_rx_sw_if_index [sw_if_index1];
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index0;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index [sw_if_index0];
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index1;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index [sw_if_index1];
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ ASSERT(l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0);
+ ASSERT(l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0);
+
+ if (PREDICT_TRUE (sw_if_index0 == l2pm->cached_rx_sw_if_index))
+ next0 = l2pm->cached_next_index;
+ else
+ {
+ next0 = l2pm->tx_next_by_rx_sw_if_index [sw_if_index0];
+ l2pm->cached_rx_sw_if_index = sw_if_index0;
+ l2pm->cached_next_index = next0;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index0;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index [sw_if_index0];
+ }
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ em->counters[node_counter_base_index + L2_PATCH_ERROR_PATCHED] +=
+ frame->n_vectors;
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (l2_patch_node, static) = {
+ .function = l2_patch_node_fn,
+ .name = "l2_patch",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_patch_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_patch_error_strings),
+ .error_strings = l2_patch_error_strings,
+
+ .n_next_nodes = L2_PATCH_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_PATCH_NEXT_DROP] = "error-drop",
+ },
+};
+
+int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
+{
+ l2_patch_main_t * l2pm = &l2_patch_main;
+ vnet_hw_interface_t * rxhi, *txhi;
+ u32 tx_next_index;
+
+ /*
+ * We assume that the API msg handler has used 2x VALIDATE_SW_IF_INDEX
+ * macros...
+ */
+
+ rxhi = vnet_get_sup_hw_interface (l2pm->vnet_main, rx_sw_if_index);
+
+ /* Make sure caller didn't pass a vlan subif, etc. */
+ if (rxhi->sw_if_index != rx_sw_if_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ txhi = vnet_get_sup_hw_interface (l2pm->vnet_main, tx_sw_if_index);
+ if (txhi->sw_if_index != tx_sw_if_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX_2;
+
+ if (is_add)
+ {
+ tx_next_index = vlib_node_add_next (l2pm->vlib_main,
+ l2_patch_node.index,
+ txhi->output_node_index);
+
+ vec_validate_init_empty (l2pm->tx_next_by_rx_sw_if_index,
+ rx_sw_if_index, ~0);
+
+ l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = tx_next_index;
+ vec_validate_init_empty (l2pm->tx_sw_if_index_by_rx_sw_if_index,
+ rx_sw_if_index, ~0);
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index]
+ = txhi->sw_if_index;
+
+ ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+
+ vnet_hw_interface_rx_redirect_to_node (l2pm->vnet_main,
+ rxhi->hw_if_index,
+ l2_patch_node.index);
+ }
+ else
+ {
+ ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index,
+ 0 /* disable promiscuous mode */);
+
+ vnet_hw_interface_rx_redirect_to_node (l2pm->vnet_main,
+ rxhi->hw_if_index,
+ ~0 /* disable */);
+ if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index)
+ {
+ l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0;
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index] = ~0;
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+test_patch_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ l2_patch_main_t * l2pm = &l2_patch_main;
+ unformat_input_t _line_input, * line_input = &_line_input;
+ u32 rx_sw_if_index, tx_sw_if_index;
+ int rv;
+ int rx_set = 0;
+ int tx_set = 0;
+ int is_add = 1;
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "rx %U", unformat_vnet_sw_interface,
+ l2pm->vnet_main, &rx_sw_if_index))
+ rx_set = 1;
+ else if (unformat (line_input, "tx %U", unformat_vnet_sw_interface,
+ l2pm->vnet_main, &tx_sw_if_index))
+ tx_set = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else break;
+ }
+
+ if (rx_set == 0)
+ return clib_error_return (0, "rx interface not set");
+
+ if (tx_set == 0)
+ return clib_error_return (0, "tx interface not set");
+
+ rv = vnet_l2_patch_add_del (rx_sw_if_index, tx_sw_if_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "rx interface not a physical port");
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX_2:
+ return clib_error_return (0, "tx interface not a physical port");
+
+ default:
+ return clib_error_return
+ (0, "WARNING: vnet_l2_patch_add_del returned %d", rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_patch_command, static) = {
+ .path = "test l2patch",
+ .short_help =
+ "rx <intfc> tx <intfc> [del]",
+ .function = test_patch_command_fn,
+};
+
+// Display the contents of the l2patch table.
+static clib_error_t *
+show_l2patch (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ l2_patch_main_t * l2pm = &l2_patch_main;
+ u32 rx_sw_if_index;
+ u32 no_entries = 1;
+
+ ASSERT(vec_len(l2pm->tx_next_by_rx_sw_if_index) ==
+ vec_len(l2pm->tx_sw_if_index_by_rx_sw_if_index));
+
+ for (rx_sw_if_index = 0;
+ rx_sw_if_index < vec_len (l2pm->tx_sw_if_index_by_rx_sw_if_index);
+ rx_sw_if_index++)
+ {
+ u32 tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index];
+ if (tx_sw_if_index != ~0)
+ {
+ no_entries = 0;
+ vlib_cli_output (vm, "%26U -> %U",
+ format_vnet_sw_if_index_name,
+ l2pm->vnet_main, rx_sw_if_index,
+ format_vnet_sw_if_index_name,
+ l2pm->vnet_main,tx_sw_if_index);
+ }
+ }
+
+ if (no_entries)
+ vlib_cli_output (vm, "no l2patch entries");
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_l2patch_cli, static) = {
+ .path = "show l2patch",
+ .short_help = "Show l2 interface cross-connect entries",
+ .function = show_l2patch,
+};
+
+clib_error_t *l2_patch_init (vlib_main_t *vm)
+{
+ l2_patch_main_t * mp = &l2_patch_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = vnet_get_main();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_patch_init);
diff --git a/vnet/vnet/l2/l2_vtr.c b/vnet/vnet/l2/l2_vtr.c
new file mode 100644
index 00000000000..a7499041009
--- /dev/null
+++ b/vnet/vnet/l2/l2_vtr.c
@@ -0,0 +1,448 @@
+/*
+ * l2_vtr.c : layer 2 vlan tag rewrite configuration
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/l2/l2_input_vtr.h>
+#include <vnet/l2/l2_output.h>
+
+#include <vppinfra/error.h>
+#include <vlib/cli.h>
+
+
+// Just a placeholder. Also insures file is not eliminated by linker.
+clib_error_t *l2_vtr_init (vlib_main_t *vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(l2_vtr_init);
+
+
+// Configure vtag tag rewrite on the given interface.
+// Return 1 if there is an error, 0 if ok
+u32 l2vtr_configure (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 vtr_op,
+ u32 push_dot1q, // ethertype of first pushed tag is dot1q/dot1ad
+ u32 vtr_tag1, // first pushed tag
+ u32 vtr_tag2) // second pushed tag
+{
+ vnet_hw_interface_t * hi;
+ vnet_sw_interface_t * si;
+ u32 hw_no_tags;
+ u32 error = 0;
+ vtr_config_t * in_config;
+ vtr_config_t * out_config;
+ u32 enable;
+ u32 push_inner_et;
+ u32 push_outer_et;
+ u32 cfg_tags;
+
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+ if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) {
+ error = VNET_API_ERROR_INVALID_INTERFACE; // non-ethernet interface
+ goto done;
+ }
+
+ // Init the config for this interface
+ vec_validate (l2output_main.configs, sw_if_index);
+ in_config = &(vec_elt_at_index(l2output_main.configs, sw_if_index)->input_vtr);
+ out_config = &(vec_elt_at_index(l2output_main.configs, sw_if_index)->output_vtr);
+ in_config->raw_tags = 0;
+ out_config->raw_tags = 0;
+
+ // Get the configured tags for the interface
+ si = vnet_get_sw_interface (vnet_main, sw_if_index);
+ hw_no_tags = (si->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+
+ // Construct the input tag-rewrite config
+
+ push_outer_et = clib_net_to_host_u16 (push_dot1q ? ETHERNET_TYPE_VLAN : ETHERNET_TYPE_DOT1AD);
+ push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN);
+ vtr_tag1 = clib_net_to_host_u16 (vtr_tag1);
+ vtr_tag2 = clib_net_to_host_u16 (vtr_tag2);
+
+ // Determine number of vlan tags with explictly configured values
+ cfg_tags = 0;
+ if (hw_no_tags || si->sub.eth.flags.no_tags) {
+ cfg_tags = 0;
+ } else if (si->sub.eth.flags.one_tag) {
+ cfg_tags = 1;
+ if (si->sub.eth.flags.outer_vlan_id_any) {
+ cfg_tags = 0;
+ }
+ } else if (si->sub.eth.flags.two_tags) {
+ cfg_tags = 2;
+ if (si->sub.eth.flags.inner_vlan_id_any) {
+ cfg_tags = 1;
+ }
+ if (si->sub.eth.flags.outer_vlan_id_any) {
+ cfg_tags = 0;
+ }
+ }
+
+ switch (vtr_op) {
+ case L2_VTR_DISABLED:
+ in_config->push_and_pop_bytes = 0;
+ break;
+
+ case L2_VTR_POP_1:
+ if (cfg_tags < 1) {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need one or two tags
+ goto done;
+ }
+ in_config->pop_bytes = 4;
+ in_config->push_bytes = 0;
+ break;
+
+ case L2_VTR_POP_2:
+ if (cfg_tags < 2) {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need two tags
+ goto done;
+ }
+ in_config->pop_bytes = 8;
+ in_config->push_bytes = 0;
+
+ out_config->push_bytes = in_config->pop_bytes;
+ out_config->pop_bytes = in_config->push_bytes;
+ break;
+
+ case L2_VTR_PUSH_1:
+ in_config->pop_bytes = 0;
+ in_config->push_bytes = 4;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[1].type = push_outer_et;
+ break;
+
+ case L2_VTR_PUSH_2:
+ in_config->pop_bytes = 0;
+ in_config->push_bytes = 8;
+ in_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[0].type = push_outer_et;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ in_config->tags[1].type = push_inner_et;
+ break;
+
+ case L2_VTR_TRANSLATE_1_1:
+ if (cfg_tags < 1) {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need one or two tags
+ goto done;
+ }
+ in_config->pop_bytes = 4;
+ in_config->push_bytes = 4;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[1].type = push_outer_et;
+ break;
+
+ case L2_VTR_TRANSLATE_1_2:
+ if (cfg_tags < 1) {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need one or two tags
+ goto done;
+ }
+ in_config->pop_bytes = 4;
+ in_config->push_bytes = 8;
+ in_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[0].type = push_outer_et;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ in_config->tags[1].type = push_inner_et;
+ break;
+
+ case L2_VTR_TRANSLATE_2_1:
+ if (cfg_tags < 2) {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need two tags
+ goto done;
+ }
+ in_config->pop_bytes = 8;
+ in_config->push_bytes = 4;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[1].type = push_outer_et;
+ break;
+
+ case L2_VTR_TRANSLATE_2_2:
+ if (cfg_tags < 2) {
+ error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need two tags
+ goto done;
+ }
+ in_config->pop_bytes = 8;
+ in_config->push_bytes = 8;
+ in_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ in_config->tags[0].type = push_outer_et;
+ in_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ in_config->tags[1].type = push_inner_et;
+ break;
+ }
+
+ // Construct the output tag-rewrite config
+
+ // The push/pop values are always reversed
+ out_config->push_bytes = in_config->pop_bytes;
+ out_config->pop_bytes = in_config->push_bytes;
+
+ // Any pushed tags are derived from the subinterface config
+ push_outer_et = clib_net_to_host_u16 (si->sub.eth.flags.dot1ad ? ETHERNET_TYPE_DOT1AD : ETHERNET_TYPE_VLAN);
+ push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN);
+ vtr_tag1 = clib_net_to_host_u16 (si->sub.eth.outer_vlan_id);
+ vtr_tag2 = clib_net_to_host_u16 (si->sub.eth.inner_vlan_id);
+
+ if (out_config->push_bytes == 4) {
+ out_config->tags[1].priority_cfi_and_id = vtr_tag1;
+ out_config->tags[1].type = push_outer_et;
+ } else if (out_config->push_bytes == 8) {
+ out_config->tags[0].priority_cfi_and_id = vtr_tag1;
+ out_config->tags[0].type = push_outer_et;
+ out_config->tags[1].priority_cfi_and_id = vtr_tag2;
+ out_config->tags[1].type = push_inner_et;
+ }
+
+ // set the interface enable flags
+ enable = (vtr_op != L2_VTR_DISABLED);
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VTR, enable);
+ // output vtr enable is checked explicitly in l2_output
+
+ done:
+ return error;
+}
+
+// Get vtag tag rewrite on the given interface.
+// Return 1 if there is an error, 0 if ok
+u32 l2vtr_get (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 *vtr_op,
+ u32 *push_dot1q, // ethertype of first pushed tag is dot1q/dot1ad
+ u32 *vtr_tag1, // first pushed tag
+ u32 *vtr_tag2) // second pushed tag
+{
+ vnet_hw_interface_t * hi;
+ u32 error = 0;
+ vtr_config_t * in_config;
+
+ if (!vtr_op || !push_dot1q || !vtr_tag1 || !vtr_tag2) {
+ clib_warning ("invalid arguments");
+ error = VNET_API_ERROR_INVALID_ARGUMENT;
+ goto done;
+ }
+
+ *vtr_op = L2_VTR_DISABLED;
+ *vtr_tag1 = 0;
+ *vtr_tag2 = 0;
+ *push_dot1q = 0;
+
+ hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
+ if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) {
+ // non-ethernet interface
+ goto done;
+ }
+
+ if (sw_if_index >= vec_len(l2output_main.configs)) {
+ // no specific config (return disabled)
+ goto done;
+ }
+
+ // Get the config for this interface
+ in_config = &(vec_elt_at_index(l2output_main.configs, sw_if_index)->input_vtr);
+
+ // DISABLED
+ if (in_config->push_and_pop_bytes == 0) {
+ goto done;
+ }
+
+ // find out vtr_op
+ switch (in_config->pop_bytes) {
+ case 0:
+ switch (in_config->push_bytes) {
+ case 0:
+ // DISABLED
+ goto done;
+ case 4:
+ *vtr_op = L2_VTR_PUSH_1;
+ *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[1].type));
+ break;
+ case 8:
+ *vtr_op = L2_VTR_PUSH_2;
+ *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id);
+ *vtr_tag2 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[0].type));
+ break;
+ default:
+ clib_warning ("invalid push_bytes count: %d", in_config->push_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+ break;
+
+ case 4:
+ switch (in_config->push_bytes) {
+ case 0:
+ *vtr_op = L2_VTR_POP_1;
+ break;
+ case 4:
+ *vtr_op = L2_VTR_TRANSLATE_1_1;
+ *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[1].type));
+ break;
+ case 8:
+ *vtr_op = L2_VTR_TRANSLATE_1_2;
+ *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id);
+ *vtr_tag2 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[0].type));
+ break;
+ default:
+ clib_warning ("invalid push_bytes count: %d", in_config->push_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+ break;
+
+ case 8:
+ switch (in_config->push_bytes) {
+ case 0:
+ *vtr_op = L2_VTR_POP_2;
+ break;
+ case 4:
+ *vtr_op = L2_VTR_TRANSLATE_2_1;
+ *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[1].type));
+ break;
+ case 8:
+ *vtr_op = L2_VTR_TRANSLATE_2_2;
+ *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id);
+ *vtr_tag2 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id);
+ *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[0].type));
+ break;
+ default:
+ clib_warning ("invalid push_bytes count: %d", in_config->push_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+ break;
+
+ default:
+ clib_warning ("invalid pop_bytes count: %d", in_config->pop_bytes);
+ error = VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+// set subinterface vtr enable/disable
+// The CLI format is:
+// set interface l2 tag-rewrite <interface> [disable | pop 1 | pop 2 | push {dot1q|dot1ad} <tag> [<tag>]]
+// "push" can also be replaced by "translate-{1|2}-{1|2}"
+static clib_error_t *
+int_l2_vtr (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 vtr_op;
+ u32 push_dot1q = 0;
+ u32 tag1 = 0, tag2 = 0;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ vtr_op = L2_VTR_DISABLED;
+
+ if (unformat (input, "disable")) {
+ vtr_op = L2_VTR_DISABLED;
+ } else if (unformat (input, "pop 1")) {
+ vtr_op = L2_VTR_POP_1;
+ } else if (unformat (input, "pop 2")) {
+ vtr_op = L2_VTR_POP_2;
+
+ } else if (unformat (input, "push dot1q %d %d", &tag1, &tag2)) {
+ vtr_op = L2_VTR_PUSH_2;
+ push_dot1q = 1;
+ } else if (unformat (input, "push dot1ad %d %d", &tag1, &tag2)) {
+ vtr_op = L2_VTR_PUSH_2;
+
+ } else if (unformat (input, "push dot1q %d", &tag1)) {
+ vtr_op = L2_VTR_PUSH_1;
+ push_dot1q = 1;
+ } else if (unformat (input, "push dot1ad %d", &tag1)) {
+ vtr_op = L2_VTR_PUSH_1;
+
+ } else if (unformat (input, "translate 1-1 dot1q %d", &tag1)) {
+ vtr_op = L2_VTR_TRANSLATE_1_1;
+ push_dot1q = 1;
+ } else if (unformat (input, "translate 1-1 dot1ad %d", &tag1)) {
+ vtr_op = L2_VTR_TRANSLATE_1_1;
+
+ } else if (unformat (input, "translate 2-1 dot1q %d", &tag1)) {
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+ push_dot1q = 1;
+ } else if (unformat (input, "translate 2-1 dot1ad %d", &tag1)) {
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+
+ } else if (unformat (input, "translate 2-2 dot1q %d %d", &tag1, &tag2)) {
+ vtr_op = L2_VTR_TRANSLATE_2_2;
+ push_dot1q = 1;
+ } else if (unformat (input, "translate 2-2 dot1ad %d %d", &tag1, &tag2)) {
+ vtr_op = L2_VTR_TRANSLATE_2_2;
+
+ } else if (unformat (input, "translate 1-2 dot1q %d %d", &tag1, &tag2)) {
+ vtr_op = L2_VTR_TRANSLATE_1_2;
+ push_dot1q = 1;
+ } else if (unformat (input, "translate 1-2 dot1ad %d %d", &tag1, &tag2)) {
+ vtr_op = L2_VTR_TRANSLATE_1_2;
+
+ } else {
+ error = clib_error_return (0, "expecting [disable | pop 1 | pop 2 | push {dot1q|dot1ah} <tag> [<tag>]\n"
+ " | translate {1|2}-{1|2} {dot1q|dot1ah} <tag> [<tag>]] but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (l2vtr_configure (vm,
+ vnm,
+ sw_if_index,
+ vtr_op,
+ push_dot1q,
+ tag1,
+ tag2)) {
+ error = clib_error_return (0, "vlan tag rewrite is not compatible with interface");
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = {
+ .path = "set interface l2 tag-rewrite",
+ .short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]",
+ .function = int_l2_vtr,
+};
+
diff --git a/vnet/vnet/l2/l2_vtr.h b/vnet/vnet/l2/l2_vtr.h
new file mode 100644
index 00000000000..aef6c6d255e
--- /dev/null
+++ b/vnet/vnet/l2/l2_vtr.h
@@ -0,0 +1,167 @@
+/*
+ * l2_vtr.h : layer 2 vlan tag rewrite processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_l2_vtr_h
+#define included_vnet_l2_vtr_h
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/l2/l2_vtr.h>
+
+// VTR config options for API and CLI support
+typedef enum {
+ L2_VTR_DISABLED,
+ L2_VTR_PUSH_1,
+ L2_VTR_PUSH_2,
+ L2_VTR_POP_1,
+ L2_VTR_POP_2,
+ L2_VTR_TRANSLATE_1_1,
+ L2_VTR_TRANSLATE_1_2,
+ L2_VTR_TRANSLATE_2_1,
+ L2_VTR_TRANSLATE_2_2
+} l2_vtr_op_t;
+
+// Per-interface vlan tag rewrite configuration
+// There will be one instance of this struct for each sw_if_index
+// for both input vtr and output vtr
+typedef struct {
+ union {
+ // Up to two vlan tags to push.
+ // if there is only one vlan tag to push, it is in tags[1].
+ ethernet_vlan_header_tv_t tags[2];
+ u64 raw_tags;
+ };
+
+ union {
+ struct {
+ u8 push_bytes; // number of bytes to push for up to 2 vlans (0,4,8)
+ u8 pop_bytes; // number of bytes to pop for up to 2 vlans (0,4,8)
+ };
+ u16 push_and_pop_bytes; // if 0 then the feature is disabled
+ };
+} vtr_config_t;
+
+
+// Perform the configured tag rewrite on the packet.
+// Return 0 if ok, 1 if packet should be dropped (e.g. tried to pop too many tags)
+always_inline u32
+l2_vtr_process (vlib_buffer_t * b0,
+ vtr_config_t * config)
+{
+ u64 temp_8;
+ u32 temp_4;
+ u8 * eth;
+
+ eth = vlib_buffer_get_current (b0);
+
+ // copy the 12B dmac and smac to a temporary location
+ temp_8 = *((u64 *)eth);
+ temp_4 = *((u32 *)(eth+8));
+
+ // adjust for popped tags
+ eth += config->pop_bytes;
+
+ // if not enough tags to pop then drop packet
+ if (PREDICT_FALSE ((vnet_buffer(b0)->l2.l2_len - 12) < config->pop_bytes)) {
+ return 1;
+ }
+
+ // copy the 2 new tags to the start of the packet
+ *((u64 *)(eth + 12 - 8)) = config->raw_tags;
+
+ // TODO: set cos bits
+
+ // adjust for pushed tags:
+ eth -= config->push_bytes;
+
+ // copy the 12 dmac and smac back to the packet
+ *((u64 *)eth) = temp_8;
+ *((u32 *)(eth+8)) = temp_4;
+
+ // Update l2_len
+ vnet_buffer(b0)->l2.l2_len += (word)config->push_bytes - (word)config->pop_bytes;
+
+ // Update packet len
+ vlib_buffer_advance(b0, (word)config->pop_bytes - (word)config->push_bytes);
+
+ return 0;
+}
+
+
+// Perform the egress pre-vlan tag rewrite EFP Filter check. The post-vlan tag rewrite
+// check is a separate graph node.
+//
+// This check insures that a packet being output to an interface (before output vtr
+// is performed) has vlan tags that match those on a packet received from that
+// interface (after vtr has been performed).
+// This means verifying that any tags pushed by input vtr are present on the packet.
+//
+// Return 0 if ok, 1 if packet should be dropped.
+// This function should be passed the input vtr config for the interface.
+always_inline u8
+l2_efp_filter_process (vlib_buffer_t * b0,
+ vtr_config_t * in_config)
+{
+ u8 * eth;
+ u64 packet_tags;
+ u64 tag_mask;
+
+ eth = vlib_buffer_get_current (b0);
+
+ // If there are 2 tags pushed, they must match config->tags[0] and config->tags[1].
+ // If there is one tag pushed, it must match config->tag[1].
+ // If there are 0 tags pushed, the check passes.
+
+ // mask for two vlan id and ethertypes, no cos bits
+ tag_mask = clib_net_to_host_u64(0xFFFF0FFFFFFF0FFF);
+ // mask for one vlan id and ethertype, no cos bits
+ tag_mask = (in_config->push_bytes == 4) ? clib_net_to_host_u64(0xFFFF0FFF) : tag_mask;
+ // mask for always match
+ tag_mask = (in_config->push_bytes == 0) ? 0 : tag_mask;
+
+ // Read 8B from the packet, getting the proper set of vlan tags
+ // For 0 push bytes, the address doesn't matter since the mask clears the data to 0.
+ packet_tags = *((u64 *)(eth + 4 + in_config->push_bytes));
+
+ // Check if the packet tags match the configured tags
+ return (packet_tags & tag_mask) != in_config->raw_tags;
+}
+
+
+// Configure vtag tag rewrite on the given interface.
+// Return 1 if there is an error, 0 if ok
+u32 l2vtr_configure(vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 vtr_op,
+ u32 push_dot1q,
+ u32 vtr_tag1,
+ u32 vtr_tag2);
+
+// Get vtag tag rewrite on the given interface.
+// Return 1 if there is an error, 0 if ok
+u32 l2vtr_get (vlib_main_t * vlib_main,
+ vnet_main_t * vnet_main,
+ u32 sw_if_index,
+ u32 *vtr_op,
+ u32 *push_dot1q,
+ u32 *vtr_tag1,
+ u32 *vtr_tag2);
+
+#endif // included_vnet_l2_vtr_h
+
diff --git a/vnet/vnet/l2/l2_xcrw.c b/vnet/vnet/l2/l2_xcrw.c
new file mode 100644
index 00000000000..f5fe3ca14e4
--- /dev/null
+++ b/vnet/vnet/l2/l2_xcrw.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/l2/l2_xcrw.h>
+
+/*
+ * General L2 / L3 cross-connect, used to set up
+ * "L2 interface <--> your-favorite-tunnel-encap" tunnels.
+ *
+ * We set up a typical L2 cross-connect or (future) bridge
+ * to hook L2 interface(s) up to the L3 stack in arbitrary ways.
+ *
+ * Each l2_xcrw adjacency specifies 3 things:
+ *
+ * 1. The next graph node (presumably in the L3 stack) to
+ * process the (L2 -> L3) packet
+ *
+ * 2. A new value for vnet_buffer(b)->sw_if_index[VLIB_TX]
+ * (i.e. a lookup FIB index),
+ *
+ * 3. A rewrite string to apply.
+ *
+ * Example: to cross-connect an L2 interface or (future) bridge
+ * to an mpls-o-gre tunnel, set up the L2 rewrite string as shown in
+ * mpls_gre_rewrite, and use "mpls-post-rewrite" to fix the
+ * GRE IP header checksum and length fields.
+ */
+
+typedef struct {
+ u32 next_index;
+ u32 tx_fib_index;
+} l2_xcrw_trace_t;
+
+/* packet trace format function */
+static u8 * format_l2_xcrw_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ l2_xcrw_trace_t * t = va_arg (*args, l2_xcrw_trace_t *);
+
+ s = format (s, "L2_XCRW: next index %d tx_fib_index %d",
+ t->next_index, t->tx_fib_index);
+ return s;
+}
+
+l2_xcrw_main_t l2_xcrw_main;
+
+static vlib_node_registration_t l2_xcrw_node;
+
+static char * l2_xcrw_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2_xcrw_error
+#undef _
+};
+
+static uword
+l2_xcrw_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ l2_xcrw_next_t next_index;
+ l2_xcrw_main_t * xcm = &l2_xcrw_main;
+ vlib_node_t *n = vlib_get_node (vm, l2_xcrw_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t * em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ l2_xcrw_adjacency_t * adj0, * adj1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0);
+ adj1 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index1);
+
+ next0 = adj0->rewrite_header.next_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] =
+ adj0->rewrite_header.sw_if_index;
+
+ next1 = adj1->rewrite_header.next_index;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] =
+ adj1->rewrite_header.sw_if_index;
+
+ em->counters[node_counter_base_index + next1]++;
+
+ if (PREDICT_TRUE(next0 > 0))
+ {
+ u8 * h0 = vlib_buffer_get_current (b0);
+ vnet_rewrite_one_header (adj0[0], h0,
+ adj0->rewrite_header.data_bytes);
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+ em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++;
+ }
+
+ if (PREDICT_TRUE(next1 > 0))
+ {
+ u8 * h1 = vlib_buffer_get_current (b1);
+ vnet_rewrite_one_header (adj1[0], h1,
+ adj1->rewrite_header.data_bytes);
+ vlib_buffer_advance (b1, -adj1->rewrite_header.data_bytes);
+ em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++;
+ }
+
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_xcrw_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->tx_fib_index = adj0->rewrite_header.sw_if_index;
+ }
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_xcrw_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ t->tx_fib_index = adj1->rewrite_header.sw_if_index;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ l2_xcrw_adjacency_t * adj0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+
+ adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0);
+
+ next0 = adj0->rewrite_header.next_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] =
+ adj0->rewrite_header.sw_if_index;
+
+ if (PREDICT_TRUE(next0 > 0))
+ {
+ u8 *h0 = vlib_buffer_get_current (b0);
+ vnet_rewrite_one_header (adj0[0], h0,
+ adj0->rewrite_header.data_bytes);
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+ em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2_xcrw_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->tx_fib_index = adj0->rewrite_header.sw_if_index;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (l2_xcrw_node, static) = {
+ .function = l2_xcrw_node_fn,
+ .name = "l2-xcrw",
+ .vector_size = sizeof (u32),
+ .format_trace = format_l2_xcrw_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(l2_xcrw_error_strings),
+ .error_strings = l2_xcrw_error_strings,
+
+ .n_next_nodes = L2_XCRW_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [L2_XCRW_NEXT_DROP] = "error-drop",
+ },
+};
+
+clib_error_t *l2_xcrw_init (vlib_main_t *vm)
+{
+ l2_xcrw_main_t * mp = &l2_xcrw_main;
+
+ mp->vlib_main = vm;
+ mp->vnet_main = &vnet_main;
+ mp->tunnel_index_by_l2_sw_if_index = hash_create (0, sizeof(uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (l2_xcrw_init);
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static u8 * format_xcrw_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "xcrw%d", dev_instance);
+}
+
+VNET_DEVICE_CLASS (xcrw_device_class,static) = {
+ .name = "Xcrw",
+ .format_device_name = format_xcrw_name,
+ .tx_function = dummy_interface_tx,
+};
+
+/* Create a sham tunnel interface and return its sw_if_index */
+static u32
+create_xcrw_interface (vlib_main_t * vm)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ static u32 instance;
+ u8 address[6];
+ u32 hw_if_index;
+ vnet_hw_interface_t * hi;
+ u32 sw_if_index;
+
+ /* mac address doesn't really matter */
+ memset (address, 0, sizeof (address));
+ address[2] = 0x12;
+
+ /* can returns error iff phy != 0 */
+ (void) ethernet_register_interface
+ (vnm,
+ xcrw_device_class.index,
+ instance++,
+ address,
+ &hw_if_index,
+ /* flag change */ 0);
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ sw_if_index = hi->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ /* Output to the sham tunnel invokes the encap node */
+ hi->output_node_index = l2_xcrw_node.index;
+
+ return sw_if_index;
+}
+
+int vnet_configure_l2_xcrw (vlib_main_t * vm, vnet_main_t *vnm,
+ u32 l2_sw_if_index, u32 tx_fib_index,
+ u8 * rewrite, u32 next_node_index, int is_add)
+{
+ l2_xcrw_main_t * xcm = &l2_xcrw_main;
+ l2_xcrw_adjacency_t * a;
+ l2_xcrw_tunnel_t * t;
+ uword * p;
+
+ if (is_add)
+ {
+
+ pool_get (xcm->tunnels, t);
+
+ /* No interface allocated? Do it. Otherwise, set admin up */
+ if (t->tunnel_sw_if_index == 0)
+ t->tunnel_sw_if_index = create_xcrw_interface (vm);
+ else
+ vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ t->l2_sw_if_index = l2_sw_if_index;
+
+ vec_validate (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+
+ a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+ memset (a, 0, sizeof (*a));
+
+ a->rewrite_header.sw_if_index = tx_fib_index;
+
+ /*
+ * Add or find a dynamic disposition for the successor node,
+ * e.g. so we can ship pkts to mpls_post_rewrite...
+ */
+ a->rewrite_header.next_index =
+ vlib_node_add_next (vm, l2_xcrw_node.index, next_node_index);
+
+ if (vec_len (rewrite))
+ vnet_rewrite_set_data (a[0], rewrite, vec_len(rewrite));
+
+ set_int_l2_mode (vm, vnm, MODE_L2_XC, t->l2_sw_if_index, 0, 0, 0,
+ t->tunnel_sw_if_index);
+ hash_set (xcm->tunnel_index_by_l2_sw_if_index,
+ t->l2_sw_if_index, t - xcm->tunnels);
+ return 0;
+ }
+ else
+ {
+ p = hash_get (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index);
+ if (p == 0)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ t = pool_elt_at_index (xcm->tunnels, p[0]);
+
+ a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+ /* Reset adj to drop traffic */
+ memset (a, 0, sizeof (*a));
+
+ set_int_l2_mode (vm, vnm, MODE_L3, t->l2_sw_if_index, 0, 0, 0, 0);
+
+ vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index, 0 /* down */);
+
+ hash_unset (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index);
+ pool_put (xcm->tunnels, t);
+ }
+ return 0;
+}
+
+
+static clib_error_t *
+set_l2_xcrw_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ int is_add = 1;
+ int is_ipv6 = 0; /* for fib id -> fib index mapping */
+ u32 tx_fib_id = ~0;
+ u32 tx_fib_index = ~0;
+ u32 next_node_index = ~0;
+ u32 l2_sw_if_index;
+ u8 * rw = 0;
+ vnet_main_t * vnm = vnet_get_main();
+ int rv;
+
+
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (! unformat (line_input, "%U",
+ unformat_vnet_sw_interface, vnm, &l2_sw_if_index))
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "next %U",
+ unformat_vlib_node, vm, &next_node_index))
+ ;
+ else if (unformat (line_input, "tx-fib-id %d", &tx_fib_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (line_input, "rw %U",
+ unformat_hex_string, &rw));
+ else
+ break;
+ }
+
+ if (next_node_index == ~0)
+ return clib_error_return (0, "next node not specified");
+
+ if (tx_fib_id != ~0)
+ {
+ uword * p;
+
+ if (is_ipv6)
+ p = hash_get (ip6_main.fib_index_by_table_id, tx_fib_id);
+ else
+ p = hash_get (ip4_main.fib_index_by_table_id, tx_fib_id);
+
+ if (p == 0)
+ return clib_error_return (0, "nonexistent tx_fib_id %d",
+ tx_fib_id);
+
+ tx_fib_index = p[0];
+ }
+
+ rv = vnet_configure_l2_xcrw (vm, vnm, l2_sw_if_index, tx_fib_index,
+ rw, next_node_index, is_add);
+
+ switch (rv)
+ {
+
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "%U not cross-connected",
+ format_vnet_sw_if_index_name,
+ vnm, l2_sw_if_index);
+ default:
+ return clib_error_return (0, "vnet_configure_l2_xcrw returned %d",
+ rv);
+ }
+
+ vec_free (rw);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = {
+ .path = "set interface l2 xcrw",
+ .short_help =
+ "set int l2 xcrw <interface> next <node-name>\n"
+ " [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>",
+ .function = set_l2_xcrw_command_fn,
+};
+
+static u8 * format_l2xcrw (u8 * s, va_list * args)
+{
+ vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
+ l2_xcrw_tunnel_t * t = va_arg (*args, l2_xcrw_tunnel_t *);
+ l2_xcrw_main_t * xcm = &l2_xcrw_main;
+ vlib_main_t * vm = vlib_get_main ();
+ l2_xcrw_adjacency_t * a;
+ u8 * rewrite_string;
+
+ if (t == 0)
+ {
+ s = format (s, "%-25s%s", "L2 interface", "Tunnel Details");
+ return s;
+ }
+
+ s = format (s, "%-25U %U ",
+ format_vnet_sw_if_index_name, vnm, t->l2_sw_if_index,
+ format_vnet_sw_if_index_name, vnm, t->tunnel_sw_if_index);
+
+ a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index);
+
+ s = format (s, "next %U ",
+ format_vlib_next_node_name, vm, l2_xcrw_node.index,
+ a->rewrite_header.next_index);
+
+ if (a->rewrite_header.sw_if_index != ~0)
+ s = format (s, "tx fib index %d ", a->rewrite_header.sw_if_index);
+
+ if (a->rewrite_header.data_bytes)
+ {
+ rewrite_string = (u8 *)(a + 1);
+ rewrite_string -= a->rewrite_header.data_bytes;
+ s = format (s, "rewrite data: %U ",
+ format_hex_bytes, rewrite_string,
+ a->rewrite_header.data_bytes);
+ }
+
+ s = format (s, "\n");
+
+ return s;
+}
+
+
+static clib_error_t *
+show_l2xcrw_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ l2_xcrw_main_t * xcm = &l2_xcrw_main;
+ l2_xcrw_tunnel_t * t;
+
+ if (pool_elts (xcm->tunnels) == 0)
+ {
+ vlib_cli_output (vm, "No L2 / L3 rewrite cross-connects configured");
+ return 0;
+ }
+
+ vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0);
+
+ pool_foreach (t, xcm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t);
+ }));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_l2xcrw_command, static) = {
+ .path = "show l2xcrw",
+ .short_help = "Display L2/L3 rewrite cross-connects",
+ .function = show_l2xcrw_command_fn,
+};
diff --git a/vnet/vnet/l2/l2_xcrw.h b/vnet/vnet/l2/l2_xcrw.h
new file mode 100644
index 00000000000..d32d1e8df5c
--- /dev/null
+++ b/vnet/vnet/l2/l2_xcrw.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_l2_xcrw_h__
+#define __included_l2_xcrw_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+
+typedef struct {
+ /*
+ * Let: rewrite_header.sw_if_index = tx_fib_index or ~0.
+ * rewrite_header.next_index = L2_XCRW_NEXT_XXX
+ */
+ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
+} l2_xcrw_adjacency_t;
+
+typedef struct {
+ /* L2 interface */
+ u32 l2_sw_if_index;
+
+ /* Tunnel interface */
+ u32 tunnel_sw_if_index; /* This field remains set in freed pool elts */
+
+} l2_xcrw_tunnel_t;
+
+typedef struct {
+ u32 cached_next_index;
+
+ /* Vector of cross-connect rewrites */
+ l2_xcrw_adjacency_t * adj_by_sw_if_index;
+
+ /* Pool of xcrw tunnels */
+ l2_xcrw_tunnel_t * tunnels;
+
+ /* Tunnel index by tunnel sw_if_index */
+ uword * tunnel_index_by_l2_sw_if_index;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} l2_xcrw_main_t;
+
+typedef enum {
+ L2_XCRW_NEXT_DROP,
+ L2_XCRW_N_NEXT,
+} l2_xcrw_next_t;
+
+#define foreach_l2_xcrw_error \
+_(DROP, "Packets dropped") \
+_(FWD, "Packets forwarded")
+
+typedef enum {
+#define _(sym,str) L2_XCRW_ERROR_##sym,
+ foreach_l2_xcrw_error
+#undef _
+ L2_XCRW_N_ERROR,
+} l2_xcrw_error_t;
+
+#endif /* __included_l2_xcrw_h__ */