diff options
author | Ed Warnicke <eaw@cisco.com> | 2015-12-08 15:45:58 -0700 |
---|---|---|
committer | Ed Warnicke <eaw@cisco.com> | 2015-12-08 15:47:27 -0700 |
commit | cb9cadad578297ffd78fa8a33670bdf1ab669e7e (patch) | |
tree | 6ac2be912482cc7849a26f0ab845561c3d7f4e26 /vnet/vnet/l2 | |
parent | fb0815d4ae4bb0fe27bd9313f34b45c8593b907e (diff) |
Initial commit of vpp code.v1.0.0
Change-Id: Ib246f1fbfce93274020ee93ce461e3d8bd8b9f17
Signed-off-by: Ed Warnicke <eaw@cisco.com>
Diffstat (limited to 'vnet/vnet/l2')
31 files changed, 9617 insertions, 0 deletions
diff --git a/vnet/vnet/l2/feat_bitmap.c b/vnet/vnet/l2/feat_bitmap.c new file mode 100644 index 00000000000..74917cda3ae --- /dev/null +++ b/vnet/vnet/l2/feat_bitmap.c @@ -0,0 +1,166 @@ +/* + * feat_bitmap.c: bitmap for managing feature invocation + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + + +// Drop node for feature bitmaps +// For features that just do a drop, or are not yet implemented. +// Initial feature dispatch nodes don't need to set b0->error +// in case of a possible drop because that will be done here. +// The next node is always error-drop. + + +static vlib_node_registration_t feat_bitmap_drop_node; + +#define foreach_feat_bitmap_drop_error \ +_(NO_FWD, "L2 feature forwarding disabled") \ +_(NYI, "L2 feature not implemented") + +typedef enum { +#define _(sym,str) FEAT_BITMAP_DROP_ERROR_##sym, + foreach_feat_bitmap_drop_error +#undef _ + FEAT_BITMAP_DROP_N_ERROR, +} feat_bitmap_drop_error_t; + +static char * feat_bitmap_drop_error_strings[] = { +#define _(sym,string) string, + foreach_feat_bitmap_drop_error +#undef _ +}; + +typedef enum { + FEAT_BITMAP_DROP_NEXT_DROP, + FEAT_BITMAP_DROP_N_NEXT, +} feat_bitmap_drop_next_t; + +typedef struct { + u32 feature_bitmap; +} feat_bitmap_drop_trace_t; + +/* packet trace format function */ +static u8 * format_feat_bitmap_drop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + feat_bitmap_drop_trace_t * t = va_arg (*args, feat_bitmap_drop_trace_t *); + + s = format (s, "feat_bitmap_drop: feature bitmap 0x%08x", t->feature_bitmap); + return s; +} + +static uword +feat_bitmap_drop_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + feat_bitmap_drop_next_t next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + feat_bitmap_drop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->feature_bitmap = vnet_buffer(b0)->l2.feature_bitmap; + } + + if (vnet_buffer(b0)->l2.feature_bitmap == 1) { + // If we are executing the last feature, this is the + // No forwarding catch-all + b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NO_FWD]; + } else { + b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NYI]; + } + next0 = FEAT_BITMAP_DROP_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +clib_error_t *feat_bitmap_drop_init (vlib_main_t *vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (feat_bitmap_drop_init); + +VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = { + .function = feat_bitmap_drop_node_fn, + .name = "feature-bitmap-drop", + .vector_size = sizeof (u32), + .format_trace = format_feat_bitmap_drop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(feat_bitmap_drop_error_strings), + .error_strings = feat_bitmap_drop_error_strings, + + .n_next_nodes = FEAT_BITMAP_DROP_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop", + }, +}; + + diff --git a/vnet/vnet/l2/feat_bitmap.h b/vnet/vnet/l2/feat_bitmap.h new file mode 100644 index 00000000000..7dd36a7712e --- /dev/null +++ b/vnet/vnet/l2/feat_bitmap.h @@ -0,0 +1,80 @@ +/* + * feat_bitmap.h: bitmap for managing feature invocation + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_feat_bitmap_h +#define included_vnet_l2_feat_bitmap_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +/* + * The feature bitmap is a way of organizing input and output feature graph nodes. + * The set of features to be executed are arranged in a bitmap with one bit per + * feature and each bit positioned in the same order that the features should be + * executed. Features can be dynamically removed from the set by masking off their + * corresponding bits. The bitmap is stored in packet context. Each feature clears + * its bit and then calls feat_bitmap_get_next_node_index() to go to the next + * graph node. + */ + + +// 32 features in a u32 bitmap +#define FEAT_MAX 32 + +// Initialize the feature next-node indexes of a graph node. +// Should be called by the init function of each feature graph node. +always_inline +void feat_bitmap_init_next_nodes ( + vlib_main_t * vm, + u32 node_index, // the current graph node index + u32 num_features, // number of entries in feat_names + char ** feat_names, // array of feature graph node names + u32 * next_nodes) // array of 32 next indexes to init +{ + u32 idx; + + ASSERT(num_features <= FEAT_MAX); + + for (idx=0; idx<num_features; idx++) { + if (vlib_get_node_by_name(vm, (u8 *) feat_names[idx])) { + next_nodes[idx] = + vlib_node_add_named_next(vm, node_index, feat_names[idx]); + } else { // Node may be in plugin which is not installed, use drop node + next_nodes[idx] = + vlib_node_add_named_next(vm, node_index, "feature-bitmap-drop"); + } + } + + // All unassigned bits go to the drop node + for (; idx<FEAT_MAX; idx++) { + next_nodes[idx] = vlib_node_add_named_next(vm, node_index, "feature-bitmap-drop"); + } +} + +// Return the graph node index for the feature corresponding to the +// first set bit in the bitmap. +always_inline +u32 feat_bitmap_get_next_node_index (u32 * next_nodes, u32 bitmap) +{ + u32 first_bit; + + count_leading_zeros(first_bit, bitmap); + first_bit = uword_bits - 1 - first_bit; + return next_nodes[first_bit]; +} + +#endif // included_vnet_l2_feat_bitmap_h diff --git a/vnet/vnet/l2/l2_bd.c b/vnet/vnet/l2/l2_bd.c new file mode 100644 index 00000000000..24f96d5749c --- /dev/null +++ b/vnet/vnet/l2/l2_bd.c @@ -0,0 +1,695 @@ +/* + * l2_bd.c : layer 2 bridge domain + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vlib/cli.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/format.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_bd.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/vec.h> + +bd_main_t bd_main; + +// Init bridge domain if not done already +// For feature bitmap, set all bits except ARP termination +inline void +bd_validate (l2_bridge_domain_t * bd_config) +{ + if (!bd_is_valid (bd_config)) { + bd_config->feature_bitmap = ~L2INPUT_FEAT_ARP_TERM; + bd_config->bvi_sw_if_index = ~0; + bd_config->members = 0; + bd_config->mac_by_ip4 = 0; +// bd_config->mac_by_ip6 = hash_create_mem (0, sizeof(ip6_address_t), +// sizeof(uword)); + } +} + +u32 bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id) +{ + uword * p; + u32 rv; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + return (p[0]); + + rv = clib_bitmap_first_clear (bdm->bd_index_bitmap); + + // mark this index busy + bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, rv, 1); + + hash_set (bdm->bd_index_by_bd_id, bd_id, rv); + + vec_validate (l2input_main.bd_configs, rv); + l2input_main.bd_configs[rv].bd_id = bd_id; + + return rv; +} + +int bd_delete_bd_index (bd_main_t * bdm, u32 bd_id) +{ + uword * p; + u32 bd_index; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + return -1; + + bd_index = p[0]; + + // mark this index clear + bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, bd_index, 0); + hash_unset (bdm->bd_index_by_bd_id, bd_id); + + l2input_main.bd_configs[bd_index].bd_id = ~0; + l2input_main.bd_configs[bd_index].feature_bitmap = 0; + + return 0; +} + +void +bd_add_member (l2_bridge_domain_t * bd_config, + l2_flood_member_t * member) +{ + // Add one element to the vector + + // When flooding, the bvi interface (if present) must be the last member + // processed due to how BVI processing can change the packet. To enable + // this order, we make the bvi interface the first in the vector and + // flooding walks the vector in reverse. + if ((member->flags == L2_FLOOD_MEMBER_NORMAL) || + (vec_len(bd_config->members) == 0)) { + vec_add1 (bd_config->members, *member); + + } else { + // Move 0th element to the end + vec_add1 (bd_config->members, bd_config->members[0]); + bd_config->members[0] = *member; + } +} + + +#define BD_REMOVE_ERROR_OK 0 +#define BD_REMOVE_ERROR_NOT_FOUND 1 + +u32 +bd_remove_member (l2_bridge_domain_t * bd_config, + u32 sw_if_index) +{ + u32 ix; + + // Find and delete the member + vec_foreach_index(ix, bd_config->members) { + if (vec_elt(bd_config->members, ix).sw_if_index == sw_if_index) { + vec_del1 (bd_config->members, ix); + return BD_REMOVE_ERROR_OK; + } + } + + return BD_REMOVE_ERROR_NOT_FOUND; +} + + +clib_error_t *l2bd_init (vlib_main_t *vm) +{ + bd_main_t *bdm = &bd_main; + u32 bd_index; + bdm->bd_index_by_bd_id = hash_create (0, sizeof(uword)); + // create a dummy bd with bd_id of 0 and bd_index of 0 with feature set + // to packet drop only. Thus, packets received from any L2 interface with + // uninitialized bd_index of 0 can be dropped safely. + bd_index = bd_find_or_add_bd_index (bdm, 0); + ASSERT (bd_index == 0); + l2input_main.bd_configs[0].feature_bitmap = L2INPUT_FEAT_DROP; + return 0; +} + +VLIB_INIT_FUNCTION (l2bd_init); + + +// Set the learn/forward/flood flags for the bridge domain +// Return 0 if ok, non-zero if for an error. +u32 +bd_set_flags (vlib_main_t * vm, + u32 bd_index, + u32 flags, + u32 enable) { + + l2_bridge_domain_t * bd_config; + u32 feature_bitmap = 0; + + vec_validate (l2input_main.bd_configs, bd_index); + bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index); + + bd_validate (bd_config); + + if (flags & L2_LEARN) { + feature_bitmap |= L2INPUT_FEAT_LEARN; + } + if (flags & L2_FWD) { + feature_bitmap |= L2INPUT_FEAT_FWD; + } + if (flags & L2_FLOOD) { + feature_bitmap |= L2INPUT_FEAT_FLOOD; + } + if (flags & L2_UU_FLOOD) { + feature_bitmap |= L2INPUT_FEAT_UU_FLOOD; + } + if (flags & L2_ARP_TERM) { + feature_bitmap |= L2INPUT_FEAT_ARP_TERM; + } + + if (enable) { + bd_config->feature_bitmap |= feature_bitmap; + } else { + bd_config->feature_bitmap &= ~feature_bitmap; + } + + return 0; +} + +// set bridge-domain learn enable/disable +// The CLI format is: +// set bridge-domain learn <bd_id> [disable] +static clib_error_t * +bd_learn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_LEARN, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_learn_cli, static) = { + .path = "set bridge-domain learn", + .short_help = "set bridge-domain learn <bridge-domain-id> [disable]", + .function = bd_learn, +}; + +// set bridge-domain forward enable/disable +// The CLI format is: +// set bridge-domain forward <bd_index> [disable] +static clib_error_t * +bd_fwd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_FWD, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_fwd_cli, static) = { + .path = "set bridge-domain forward", + .short_help = "set bridge-domain forward <bridge-domain-id> [disable]", + .function = bd_fwd, +}; + +// set bridge-domain flood enable/disable +// The CLI format is: +// set bridge-domain flood <bd_index> [disable] +static clib_error_t * +bd_flood (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_FLOOD, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_flood_cli, static) = { + .path = "set bridge-domain flood", + .short_help = "set bridge-domain flood <bridge-domain-id> [disable]", + .function = bd_flood, +}; + +// set bridge-domain unkown-unicast flood enable/disable +// The CLI format is: +// set bridge-domain uu-flood <bd_index> [disable] +static clib_error_t * +bd_uu_flood (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_UU_FLOOD, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = { + .path = "set bridge-domain uu-flood", + .short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]", + .function = bd_uu_flood, +}; + +// set bridge-domain arp term enable/disable +// The CLI format is: +// set bridge-domain arp term <bridge-domain-id> [disable] +static clib_error_t * +bd_arp_term (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 enable; + uword * p; + + if (! unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) bd_index = *p; + else return clib_error_return (0, "No such bridge domain %d", bd_id); + + enable = 1; + if (unformat (input, "disable")) enable = 0; + + // set the bridge domain flag + if (bd_set_flags(vm, bd_index, L2_ARP_TERM, enable)) { + error = clib_error_return (0, "bridge-domain id %d out of range", bd_index); + goto done; + } + +done: + return error; +} + +VLIB_CLI_COMMAND (bd_arp_term_cli, static) = { + .path = "set bridge-domain arp term", + .short_help = "set bridge-domain arp term <bridge-domain-id> [disable]", + .function = bd_arp_term, +}; + + +// The clib hash implementation stores uword entries in the hash table. +// The hash table mac_by_ip4 is keyed via IP4 address and store the +// 6-byte MAC address directly in the hash table entry uword. +// This only works for 64-bit processor with 8-byte uword; which means +// this code *WILL NOT WORK* for a 32-bit prcessor with 4-byte uword. +u32 bd_add_del_ip_mac(u32 bd_index, + u8 *ip_addr, + u8 *mac_addr, + u8 is_ip6, + u8 is_add) +{ + l2input_main_t * l2im = &l2input_main; + l2_bridge_domain_t * bd_cfg = l2input_bd_config_from_index (l2im, bd_index); + u64 new_mac = *(u64 *) mac_addr; + u64 * old_mac; + u16 * mac16 = (u16 *) &new_mac; + + ASSERT (sizeof(uword) == sizeof(u64)); // make sure uword is 8 bytes + + mac16[3] = 0; // Clear last 2 unsed bytes of the 8-byte MAC address + if (is_ip6) { + // ip6_address_t ip6_addr = *(ip6_address_t *) ip_addr; + return 1; // not yet implemented + } else { + ip4_address_t ip4_addr = *(ip4_address_t *) ip_addr; + old_mac = (u64 *) hash_get (bd_cfg->mac_by_ip4, ip4_addr.as_u32); + if (is_add) { + if (old_mac && (*old_mac == new_mac)) return 0; // mac entry already exist + hash_set (bd_cfg->mac_by_ip4, ip4_addr.as_u32, new_mac); + } else { + if (old_mac && (*old_mac == new_mac)) { // mac entry match + hash_unset (bd_cfg->mac_by_ip4, ip4_addr.as_u32); // clear entry + } else { + return 1; + } + } + return 0; + } +} + +// set bridge-domain arp entry add/delete +// The CLI format is: +// set bridge-domain arp entry <bd-id> <ip-addr> <mac-addr> [del] +static clib_error_t * +bd_arp_entry (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index, bd_id; + u8 is_add = 1; + u8 is_ip6 = 0; + u8 ip_addr[16]; + u8 mac_addr[6]; + uword * p; + + if (! unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p) bd_index = *p; + else return clib_error_return (0, "No such bridge domain %d", bd_id); + + if (unformat (input, "%U", unformat_ip4_address, ip_addr)) { + is_ip6 = 0; + } else if (unformat (input, "%U", unformat_ip6_address, ip_addr)) { + is_ip6 = 1; + } else { + error = clib_error_return (0, "expecting IP address but got `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat(input, "%U", unformat_ethernet_address, mac_addr)) { + error = clib_error_return (0, "expecting MAC address but got `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "del")) { + is_add = 0; + } + + // set the bridge domain flagAdd IP-MAC entry into bridge domain + if (bd_add_del_ip_mac(bd_index, ip_addr, mac_addr, is_ip6, is_add)) { + error = clib_error_return (0, "MAC %s for IP %U and MAC %U failed", + is_add ? "add" : "del", + format_ip4_address, ip_addr, + format_ethernet_address, mac_addr); + } + +done: + return error; +} + +VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = { + .path = "set bridge-domain arp entry", + .short_help = "set bridge-domain arp entry <bd-id> <ip-addr> <mac-addr> [del]", + .function = bd_arp_entry, +}; + +u8* format_vtr(u8 * s, va_list *args) +{ + u32 vtr_op = va_arg (*args, u32); + u32 dot1q = va_arg (*args, u32); + u32 tag1 = va_arg (*args, u32); + u32 tag2 = va_arg (*args, u32); + switch (vtr_op) { + case L2_VTR_DISABLED: + return format (s, "none"); + case L2_VTR_PUSH_1: + return format (s, "push-1 %s %d", dot1q? "dot1q":"dot1ad", tag1); + case L2_VTR_PUSH_2: + return format (s, "push-2 %s %d %d", dot1q? "dot1q":"dot1ad", tag1, tag2); + case L2_VTR_POP_1: + return format (s, "pop-1"); + case L2_VTR_POP_2: + return format (s, "pop-2"); + case L2_VTR_TRANSLATE_1_1: + return format (s, "trans-1-1 %s %d", dot1q? "dot1q":"dot1ad", tag1); + case L2_VTR_TRANSLATE_1_2: + return format (s, "trans-1-2 %s %d %d",dot1q? "dot1q":"dot1ad", tag1, tag2); + case L2_VTR_TRANSLATE_2_1: + return format (s, "trans-2-1 %s %d", dot1q? "dot1q":"dot1ad", tag1); + case L2_VTR_TRANSLATE_2_2: + return format (s, "trans-2-2 %s %d %d", dot1q? "dot1q":"dot1ad", tag1, tag2); + default: + return format (s, "none"); + } +} + +// show bridge-domain state +// The CLI format is: +// show bridge-domain [<bd_index>] +static clib_error_t * +bd_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u32 bd_index = ~0; + l2_bridge_domain_t * bd_config; + u32 start, end; + u32 printed; + u32 detail = 0; + u32 intf = 0; + u32 arp = 0; + u32 bd_id = ~0; + uword * p; + + start = 0; + end = vec_len(l2input_main.bd_configs); + + if (unformat (input, "%d", &bd_id)) { + if (unformat (input, "detail")) detail = 1; + else if (unformat (input, "det")) detail = 1; + if (unformat (input, "int")) intf = 1; + if (unformat (input, "arp")) arp = 1; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) bd_index = *p; + else return clib_error_return (0, "No such bridge domain %d", bd_id); + + vec_validate (l2input_main.bd_configs, bd_index); + bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index); + if (bd_is_valid (bd_config)) { + start = bd_index; + end = start + 1; + } else { + vlib_cli_output (vm, "bridge-domain %d not in use", bd_id); + goto done; + } + } + + // Show all bridge-domains that have been initialized + + printed = 0; + for (bd_index=start; bd_index<end; bd_index++) { + bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index); + if (bd_is_valid(bd_config)) { + if (!printed) { + printed = 1; + vlib_cli_output (vm, "%=5s %=7s %=10s %=10s %=10s %=10s %=10s %=14s", + "ID", + "Index", + "Learning", + "U-Forwrd", + "UU-Flood", + "Flooding", + "ARP-Term", + "BVI-Intf"); + } + + vlib_cli_output ( + vm, "%=5d %=7d %=10s %=10s %=10s %=10s %=10s %=14U", + bd_config->bd_id, bd_index, + bd_config->feature_bitmap & L2INPUT_FEAT_LEARN ? "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_FWD ? "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD ? "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD ? "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM ? "on" : "off", + format_vnet_sw_if_index_name_with_NA, vnm, bd_config->bvi_sw_if_index); + + if (detail || intf) { + // Show all member interfaces + + l2_flood_member_t * member; + u32 header = 0; + + vec_foreach(member, bd_config->members) { + u32 vtr_opr, dot1q, tag1, tag2; + if (!header) { + header = 1; + vlib_cli_output (vm, "\n%=30s%=7s%=5s%=5s%=30s", + "Interface", "Index", "SHG", "BVI","VLAN-Tag-Rewrite"); + } + l2vtr_get(vm, vnm, member->sw_if_index, &vtr_opr, &dot1q, &tag1, &tag2); + vlib_cli_output (vm, "%=30U%=7d%=5d%=5s%=30U", + format_vnet_sw_if_index_name, vnm, member->sw_if_index, + member->sw_if_index, + member->shg, + member->flags & L2_FLOOD_MEMBER_BVI ? "*" : "-", + format_vtr, vtr_opr, dot1q, tag1, tag2); + } + } + + if ((detail || arp) && + (bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM)) { + u32 ip4_addr; + u64 mac_addr; + vlib_cli_output (vm, "\n IP4 to MAC table for ARP Termination"); + hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4, ({ + vlib_cli_output (vm, "%=20U => %=20U", + format_ip4_address, &ip4_addr, + format_ethernet_address, &mac_addr); + })); + } + } + } + + if (!printed) { + vlib_cli_output (vm, "no bridge-domains in use"); + } + + done: + return error; +} + +VLIB_CLI_COMMAND (bd_show_cli, static) = { + .path = "show bridge-domain", + .short_help = "show bridge-domain [bridge-domain-id [detail|int|arp]]", + .function = bd_show, +}; diff --git a/vnet/vnet/l2/l2_bd.h b/vnet/vnet/l2/l2_bd.h new file mode 100644 index 00000000000..9d29a83b22f --- /dev/null +++ b/vnet/vnet/l2/l2_bd.h @@ -0,0 +1,120 @@ +/* + * l2_bd.h : layer 2 bridge domain + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2bd_h +#define included_l2bd_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +typedef struct { + // hash bd_id -> bd_index + uword * bd_index_by_bd_id; + + // Busy bd_index bitmap + uword * bd_index_bitmap; + + // convenience + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} bd_main_t; + +bd_main_t bd_main; + +// Bridge domain member + +#define L2_FLOOD_MEMBER_NORMAL 0 +#define L2_FLOOD_MEMBER_BVI 1 + +typedef struct { + u32 sw_if_index; // the output L2 interface + u8 flags; // 0=normal, 1=bvi + u8 shg; // split horizon group number + u16 spare; +} l2_flood_member_t; + + +// Per-bridge domain configuration + +typedef struct { + u32 feature_bitmap; + // Contains bit enables for flooding, learning, and forwarding. + // All other feature bits should always be set. + + // identity of the bridge-domain's BVI interface + // set to ~0 if there is no BVI + u32 bvi_sw_if_index; + + // output node index for bvi interface before it was changed to l2-input + u32 saved_bvi_output_node_index; + + // bridge domain id, not to be confused with bd_index + u32 bd_id; + + // Vector of members in the replication group + l2_flood_member_t * members; + + // hash ip4/ip6 -> mac for arp termination + uword *mac_by_ip4; + uword *mac_by_ip6; + +} l2_bridge_domain_t; + +// Return 1 if bridge domain has been initialized +always_inline u32 +bd_is_valid (l2_bridge_domain_t * bd_config) +{ + return (bd_config->feature_bitmap != 0); +} + +// Init bridge domain if not done already +inline void +bd_validate (l2_bridge_domain_t * bd_config); + + +void +bd_add_member (l2_bridge_domain_t * bd_config, + l2_flood_member_t * member); + +u32 +bd_remove_member (l2_bridge_domain_t * bd_config, + u32 sw_if_index); + + +#define L2_LEARN (1<<0) +#define L2_FWD (1<<1) +#define L2_FLOOD (1<<2) +#define L2_UU_FLOOD (1<<3) +#define L2_ARP_TERM (1<<4) + +u32 +bd_set_flags (vlib_main_t * vm, + u32 bd_index, + u32 flags, + u32 enable); + +u32 bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id); +int bd_delete_bd_index (bd_main_t * bdm, u32 bd_id); + +u32 bd_add_del_ip_mac(u32 bd_index, + u8 *ip_addr, + u8 *mac_addr, + u8 is_ip6, + u8 is_add); + +#endif + diff --git a/vnet/vnet/l2/l2_bvi.c b/vnet/vnet/l2/l2_bvi.c new file mode 100644 index 00000000000..828e955617b --- /dev/null +++ b/vnet/vnet/l2/l2_bvi.c @@ -0,0 +1,35 @@ +/* + * l2_bvi.c : layer 2 Bridged Virtual Interface + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/l2_fwd.h> +#include <vnet/l2/l2_flood.h> +#include <vnet/l2/l2_bvi.h> + + +// Call the L2 nodes that need the ethertype mapping +void +l2bvi_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index) +{ + l2fwd_register_input_type (vm, type, node_index); + l2flood_register_input_type (vm, type, node_index); +} + + diff --git a/vnet/vnet/l2/l2_bvi.h b/vnet/vnet/l2/l2_bvi.h new file mode 100644 index 00000000000..ca5673373fb --- /dev/null +++ b/vnet/vnet/l2/l2_bvi.h @@ -0,0 +1,122 @@ +/* + * l2_bvi.h : layer 2 Bridged Virtual Interface + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2bvi_h +#define included_l2bvi_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/sparse_vec.h> + +#include <vnet/l2/l2_input.h> + +#define TO_BVI_ERR_OK 0 +#define TO_BVI_ERR_TAGGED 1 +#define TO_BVI_ERR_ETHERTYPE 2 + +// Send a packet from L2 processing to L3 via the BVI interface. +// Set next0 to the proper L3 input node. +// Return an error if the packet isn't what we expect. + +static_always_inline u32 +l2_to_bvi (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + vlib_buffer_t * b0, + u32 bvi_sw_if_index, + next_by_ethertype_t * l3_next, + u32 * next0) +{ + u8 l2_len; + u16 ethertype; + u8 * l3h; + + // Save L2 header position which may be changed due to packet replication + vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data; + + // Strip L2 header + l2_len = vnet_buffer(b0)->l2.l2_len; + vlib_buffer_advance (b0, l2_len); + + l3h = vlib_buffer_get_current (b0); + ethertype = clib_net_to_host_u16(*(u16 *)(l3h - 2)); + + // Set the input interface to be the BVI interface + vnet_buffer(b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = ~0; + + // Go to appropriate L3 input node + if (ethertype == ETHERNET_TYPE_IP4) { + *next0 = l3_next->input_next_ip4; + } else if (ethertype == ETHERNET_TYPE_IP6) { + *next0 = l3_next->input_next_ip6; + } else { + // uncommon ethertype, check table + u32 i0; + + i0 = sparse_vec_index (l3_next->input_next_by_type, ethertype); + *next0 = vec_elt (l3_next->input_next_by_type, i0); + + if (i0 == SPARSE_VEC_INVALID_INDEX) { + return TO_BVI_ERR_ETHERTYPE; + } + } + + // increment BVI RX interface stat + vlib_increment_combined_counter + (vnet_main->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + vlib_main->cpu_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX], + 1, + vlib_buffer_length_in_chain (vlib_main, b0)); + return TO_BVI_ERR_OK; +} + + +// Prepare a packet that was sent to the BVI interface for L2 processing. + +static_always_inline void +bvi_to_l2 (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 cpu_index, + vlib_buffer_t * b0, + u32 bvi_sw_if_index) +{ + // Set the input interface to be the BVI interface + vnet_buffer(b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = ~0; + + // Update l2_len in packet which is expected by l2 path, + // including l2 tag push/pop code on output + vnet_update_l2_len(b0); + + // increment BVI TX interface stat + vlib_increment_combined_counter + (vnet_main->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + cpu_index, + bvi_sw_if_index, + 1, + vlib_buffer_length_in_chain (vlib_main, b0)); +} + + +void +l2bvi_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index); +#endif diff --git a/vnet/vnet/l2/l2_classify.c b/vnet/vnet/l2/l2_classify.c new file mode 100644 index 00000000000..a6c8ebbc1b4 --- /dev/null +++ b/vnet/vnet/l2/l2_classify.c @@ -0,0 +1,551 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * l2_classify.c + */ + +#include <vnet/l2/l2_classify.h> +#include <vnet/api_errno.h> + +typedef struct { + /* per-pkt trace data */ + u32 sw_if_index; + u32 next_index; + u32 table_index; + u32 session_offset; +} l2_classify_trace_t; + +typedef struct { + vnet_classify_main_t * vcm; + l2_classify_main_t * l2cm; +} l2_classify_runtime_t; + +/* packet trace format function */ +static u8 * format_l2_classify_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_classify_trace_t * t = va_arg (*args, l2_classify_trace_t *); + + s = format (s, "l2-classify: sw_if_index %d, table %d, offset %x, next %d", + t->sw_if_index, t->table_index, t->session_offset, t->next_index); + return s; +} + +l2_classify_main_t l2_classify_main; + +vlib_node_registration_t l2_classify_node; + +#define foreach_l2_classify_error \ +_(MISS, "Classify misses") \ +_(HIT, "Classify hits") \ +_(CHAIN_HIT, "Classify hits after chain walk") \ +_(DROP, "L2 Classify Drops") + +typedef enum { +#define _(sym,str) L2_CLASSIFY_ERROR_##sym, + foreach_l2_classify_error +#undef _ + L2_CLASSIFY_N_ERROR, +} l2_classify_error_t; + +static char * l2_classify_error_strings[] = { +#define _(sym,string) string, + foreach_l2_classify_error +#undef _ +}; + +static uword +l2_classify_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_classify_next_t next_index; + l2_classify_main_t * cm = &l2_classify_main; + vnet_classify_main_t * vcm = cm->vnet_classify_main; + l2_classify_runtime_t * rt = (l2_classify_runtime_t *)node->runtime_data; + u32 feature_bitmap; + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + f64 now; + + now = vlib_time_now(vm); + + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + /* First pass: compute hash */ + + while (n_left_from > 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, bi1; + ethernet_header_t * h0, * h1; + u32 sw_if_index0, sw_if_index1; + u16 type0, type1; + int type_index0, type_index1; + vnet_classify_table_t * t0, * t1; + u32 table_index0, table_index1; + u64 hash0, hash1; + + + /* prefetch next iteration */ + { + vlib_buffer_t * p1, * p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = vlib_buffer_get_current (b1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + vnet_buffer(b0)->l2_classify.table_index = ~0; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + vnet_buffer(b1)->l2_classify.table_index = ~0; + + /* Select classifier table based on ethertype */ + type0 = clib_net_to_host_u16 (h0->type); + type1 = clib_net_to_host_u16 (h1->type); + + type_index0 = (type0 == ETHERNET_TYPE_IP4) + ? L2_CLASSIFY_TABLE_IP4 : L2_CLASSIFY_TABLE_OTHER; + type_index0 = (type0 == ETHERNET_TYPE_IP6) + ? L2_CLASSIFY_TABLE_IP6 : type_index0; + + type_index1 = (type1 == ETHERNET_TYPE_IP4) + ? L2_CLASSIFY_TABLE_IP4 : L2_CLASSIFY_TABLE_OTHER; + type_index1 = (type1 == ETHERNET_TYPE_IP6) + ? L2_CLASSIFY_TABLE_IP6 : type_index1; + + vnet_buffer(b0)->l2_classify.table_index = + table_index0 = + rt->l2cm->classify_table_index_by_sw_if_index + [type_index0][sw_if_index0]; + + if (table_index0 != ~0) + { + t0 = pool_elt_at_index (vcm->tables, table_index0); + + vnet_buffer(b0)->l2_classify.hash = hash0 = + vnet_classify_hash_packet (t0, (u8 *) h0); + vnet_classify_prefetch_bucket (t0, hash0); + } + + vnet_buffer(b1)->l2_classify.table_index = + table_index1 = + rt->l2cm->classify_table_index_by_sw_if_index + [type_index1][sw_if_index1]; + + if (table_index1 != ~0) + { + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer(b1)->l2_classify.hash = hash1 = + vnet_classify_hash_packet (t1, (u8 *) h1); + vnet_classify_prefetch_bucket (t1, hash1); + } + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t * b0; + u32 bi0; + ethernet_header_t * h0; + u32 sw_if_index0; + u16 type0; + u32 type_index0; + vnet_classify_table_t * t0; + u32 table_index0; + u64 hash0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + vnet_buffer(b0)->l2_classify.table_index = ~0; + + /* Select classifier table based on ethertype */ + type0 = clib_net_to_host_u16 (h0->type); + + type_index0 = (type0 == ETHERNET_TYPE_IP4) + ? L2_CLASSIFY_TABLE_IP4 : L2_CLASSIFY_TABLE_OTHER; + type_index0 = (type0 == ETHERNET_TYPE_IP6) + ? L2_CLASSIFY_TABLE_IP6 : type_index0; + + vnet_buffer(b0)->l2_classify.table_index = + table_index0 = rt->l2cm->classify_table_index_by_sw_if_index + [type_index0][sw_if_index0]; + + if (table_index0 != ~0) + { + t0 = pool_elt_at_index (vcm->tables, table_index0); + + vnet_buffer(b0)->l2_classify.hash = hash0 = + vnet_classify_hash_packet (t0, (u8 *) h0); + vnet_classify_prefetch_bucket (t0, hash0); + } + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = L2_CLASSIFY_NEXT_ETHERNET_INPUT; + ethernet_header_t * h0; + u32 table_index0; + u64 hash0; + vnet_classify_table_t * t0; + vnet_classify_entry_t * e0; + + if (PREDICT_TRUE (n_left_from > 2)) + { + vlib_buffer_t * p2 = vlib_get_buffer(vm, from[2]); + u64 phash2; + u32 table_index2; + vnet_classify_table_t * tp2; + + /* + * Prefetch table entry two ahead. Buffer / data + * were prefetched above... + */ + table_index2 = vnet_buffer(p2)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index2 != ~0)) + { + tp2 = pool_elt_at_index (vcm->tables, table_index2); + phash2 = vnet_buffer(p2)->l2_classify.hash; + vnet_classify_prefetch_entry (tp2, phash2); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current(b0); + table_index0 = vnet_buffer(b0)->l2_classify.table_index; + e0 = 0; + + if (PREDICT_TRUE(table_index0 != ~0)) + { + hash0 = vnet_buffer(b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, + hash0, now); + if (e0) + { + vnet_buffer(b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < L2_CLASSIFY_N_NEXT)? + e0->next_index:next0; + hits++; + } + else + { + while (1) + { + if (t0->next_table_index != ~0) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < L2_CLASSIFY_N_NEXT)? + t0->miss_next_index:next0; + misses++; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer(b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < L2_CLASSIFY_N_NEXT)? + e0->next_index:next0; + hits++; + chain_hits++; + break; + } + } + } + } + + if (PREDICT_FALSE(next0 == 0)) + b0->error = node->errors[L2_CLASSIFY_ERROR_DROP]; + + if (PREDICT_FALSE (next0 == ~0)) + { + + // Remove ourself from the feature bitmap + feature_bitmap = vnet_buffer(b0)->l2.feature_bitmap + & ~L2INPUT_FEAT_CLASSIFY; + + // save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap; + + // Determine the next node + next0 = feat_bitmap_get_next_node_index(cm->feat_next_node_index, + feature_bitmap); + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->table_index = table_index0; + t->next_index = next0; + t->session_offset = e0 ? vnet_classify_get_offset (t0, e0) : 0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + L2_CLASSIFY_ERROR_MISS, + misses); + vlib_node_increment_counter (vm, node->node_index, + L2_CLASSIFY_ERROR_HIT, + hits); + vlib_node_increment_counter (vm, node->node_index, + L2_CLASSIFY_ERROR_CHAIN_HIT, + chain_hits); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2_classify_node) = { + .function = l2_classify_node_fn, + .name = "l2-classify", + .vector_size = sizeof (u32), + .format_trace = format_l2_classify_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_classify_error_strings), + .error_strings = l2_classify_error_strings, + + .runtime_data_bytes = sizeof (l2_classify_runtime_t), + + .n_next_nodes = L2_CLASSIFY_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_CLASSIFY_NEXT_DROP] = "error-drop", + [L2_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2", + [L2_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input", + [L2_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input", + [L2_CLASSIFY_NEXT_LI] = "li-hit", + }, +}; + +clib_error_t *l2_classify_init (vlib_main_t *vm) +{ + l2_classify_main_t * cm = &l2_classify_main; + l2_classify_runtime_t * rt; + + rt = vlib_node_get_runtime_data (vm, l2_classify_node.index); + + cm->vlib_main = vm; + cm->vnet_main = vnet_get_main(); + cm->vnet_classify_main = &vnet_classify_main; + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_classify_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + cm->feat_next_node_index); + rt->l2cm = cm; + rt->vcm = cm->vnet_classify_main; + + return 0; +} + +VLIB_INIT_FUNCTION (l2_classify_init); + + +void vnet_l2_classify_enable_disable (u32 sw_if_index, + int enable_disable) +{ + vlib_main_t * vm = vlib_get_main(); + vnet_main_t * vnm = vnet_get_main(); + + if (enable_disable) + set_int_l2_mode (vm, vnm, MODE_L2_CLASSIFY, sw_if_index, + 0, 0, 0, 0); + else + set_int_l2_mode (vm, vnm, MODE_L3, sw_if_index, + 0, 0, 0, 0); +} + +int vnet_l2_classify_set_tables (u32 sw_if_index, + u32 ip4_table_index, + u32 ip6_table_index, + u32 other_table_index) +{ + l2_classify_main_t * cm = &l2_classify_main; + vnet_classify_main_t * vcm = cm->vnet_classify_main; + + /* Assume that we've validated sw_if_index in the API layer */ + + if (ip4_table_index != ~0 && + pool_is_free_index (vcm->tables, ip4_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE; + + if (ip6_table_index != ~0 && + pool_is_free_index (vcm->tables, ip6_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE2; + + if (other_table_index != ~0 && + pool_is_free_index (vcm->tables, other_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE3; + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP4], + sw_if_index); + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP6], + sw_if_index); + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_OTHER], + sw_if_index); + + cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP4] + [sw_if_index] = ip4_table_index; + + cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_IP6] + [sw_if_index] = ip6_table_index; + + cm->classify_table_index_by_sw_if_index[L2_CLASSIFY_TABLE_OTHER] + [sw_if_index] = other_table_index; + + return 0; +} + +static clib_error_t * +int_l2_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index = ~0; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + u32 other_table_index = ~0; + int rv; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else if (unformat (input, "ip4-table %d", &ip4_table_index)) + ; + else if (unformat (input, "ip6-table %d", &ip6_table_index)) + ; + else if (unformat (input, "other-table %d", &other_table_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "interface must be specified"); + + + if (ip4_table_index == ~0 && ip6_table_index == ~0 + && other_table_index == ~0) + { + vlib_cli_output (vm, "L2 classification disabled"); + vnet_l2_classify_enable_disable (sw_if_index, 0 /* enable */); + return 0; + } + + rv = vnet_l2_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, other_table_index); + switch(rv) + { + case 0: + vnet_l2_classify_enable_disable (sw_if_index, 1 /* enable */); + break; + + default: + return clib_error_return (0, "vnet_l2_classify_set_tables: %d", + rv); + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (int_l2_classify_cli, static) = { + .path = "set interface l2 classify", + .short_help = + "set interface l2 classify intfc <int> [ip4-table <n>]\n" + " [ip6-table <n>] [other-table <n>]", + .function = int_l2_classify_command_fn, +}; + + diff --git a/vnet/vnet/l2/l2_classify.h b/vnet/vnet/l2/l2_classify.h new file mode 100644 index 00000000000..55c2fc8b00d --- /dev/null +++ b/vnet/vnet/l2/l2_classify.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_vnet_l2_classify_h__ +#define __included_vnet_l2_classify_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#include <vnet/classify/vnet_classify.h> + +typedef enum { + L2_CLASSIFY_NEXT_DROP, + L2_CLASSIFY_NEXT_ETHERNET_INPUT, + L2_CLASSIFY_NEXT_IP4_INPUT, + L2_CLASSIFY_NEXT_IP6_INPUT, + L2_CLASSIFY_NEXT_LI, + L2_CLASSIFY_N_NEXT, +} l2_classify_next_t; + +typedef enum { + L2_CLASSIFY_TABLE_IP4, + L2_CLASSIFY_TABLE_IP6, + L2_CLASSIFY_TABLE_OTHER, + L2_CLASSIFY_N_TABLES, +} l2_classify_table_id_t; + +typedef struct { + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + /* Per-address-family classifier table vectors */ + u32 * classify_table_index_by_sw_if_index [L2_CLASSIFY_N_TABLES]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + vnet_classify_main_t * vnet_classify_main; +} l2_classify_main_t; + +l2_classify_main_t l2_classify_main; + +vlib_node_registration_t l2_classify_node; + +void vnet_l2_classify_enable_disable (u32 sw_if_index, + int enable_disable); + +int vnet_l2_classify_set_tables (u32 sw_if_index, u32 ip4_table_index, + u32 ip6_table_index, u32 other_table_index); + +#endif /* __included_vnet_l2_classify_h__ */ diff --git a/vnet/vnet/l2/l2_efp_filter.c b/vnet/vnet/l2/l2_efp_filter.c new file mode 100644 index 00000000000..a8bceca13fe --- /dev/null +++ b/vnet/vnet/l2/l2_efp_filter.c @@ -0,0 +1,572 @@ +/* + * l2_efp_filter.c : layer 2 egress EFP Filter processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_output.h> +#include <vnet/ethernet/ethernet.h> + +#include <vppinfra/error.h> +#include <vppinfra/cache.h> + +typedef struct { + + // Next nodes for features and output interfaces + l2_output_next_nodes_st next_nodes; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_efp_filter_main_t; + + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u8 raw[12]; // raw data (vlans) + u32 sw_if_index; +} l2_efp_filter_trace_t; + +/* packet trace format function */ +static u8 * format_l2_efp_filter_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_efp_filter_trace_t * t = va_arg (*args, l2_efp_filter_trace_t *); + + s = format (s, "l2-output-vtr: sw_if_index %d dst %U src %U data " + "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4], t->raw[5], + t->raw[6], t->raw[7], t->raw[8], t->raw[9], t->raw[10], t->raw[11]); + return s; +} + +l2_efp_filter_main_t l2_efp_filter_main; + +static vlib_node_registration_t l2_efp_filter_node; + +#define foreach_l2_efp_filter_error \ +_(L2_EFP_FILTER, "L2 EFP filter packets") \ +_(DROP, "L2 EFP filter post-rewrite drops") + +typedef enum { +#define _(sym,str) L2_EFP_FILTER_ERROR_##sym, + foreach_l2_efp_filter_error +#undef _ + L2_EFP_FILTER_N_ERROR, +} l2_efp_filter_error_t; + +static char * l2_efp_filter_error_strings[] = { +#define _(sym,string) string, + foreach_l2_efp_filter_error +#undef _ +}; + +typedef enum { + L2_EFP_FILTER_NEXT_DROP, + L2_EFP_FILTER_N_NEXT, +} l2_efp_filter_next_t; + + +// Extract fields from the packet that will be used in interface classification +static_always_inline void +extract_keys (vnet_main_t * vnet_main, + u32 sw_if_index0, + vlib_buffer_t * b0, + u32 * port_sw_if_index0, + u16 * first_ethertype0, + u16 * outer_id0, + u16 * inner_id0, + u32 * match_flags0) +{ + ethernet_header_t * e0; + ethernet_vlan_header_t * h0; + u32 tag_len; + u32 tag_num; + + *port_sw_if_index0 = vnet_get_sup_sw_interface (vnet_main, sw_if_index0)->sw_if_index; + + e0 = vlib_buffer_get_current (b0); + h0 = (ethernet_vlan_header_t *)(e0+1); + + *first_ethertype0 = clib_net_to_host_u16(e0->type); + *outer_id0 = clib_net_to_host_u16 (h0[0].priority_cfi_and_id); + *inner_id0 = clib_net_to_host_u16 (h0[1].priority_cfi_and_id); + + tag_len = vnet_buffer(b0)->l2.l2_len - sizeof(ethernet_header_t); + tag_num = tag_len / sizeof(ethernet_vlan_header_t); + *match_flags0 = eth_create_valid_subint_match_flags (tag_num); +} + +/* + * EFP filtering is a basic switch feature which prevents an interface from + * transmitting a packet that doesn't match the interface's ingress match + * criteria. The check has two parts, one performed before egress vlan tag + * rewrite and one after. + * + * The pre-rewrite check insures the packet matches what an ingress packet looks + * like after going through the interface's ingress tag rewrite operation. Only + * pushed tags are compared. So: + * - if the ingress vlan tag rewrite pushes no tags (or is not enabled), + * any packet passes the filter + * - if the ingress vlan tag rewrite pushes one tag, + * the packet must have at least one tag, and the outer tag must match the pushed tag + * - if the ingress vlan tag rewrite pushes two tags, + * the packet must have at least two tags, and the outer two tags must match the pushed tags + * + * The pre-rewrite check is performed in the l2-output node. + * + * The post-rewrite check insures the packet matches what an ingress packet looks + * like before going through the interface's ingress tag rewrite operation. It verifies + * that such a packet arriving on the wire at this port would be classified as arriving + * an input interface equal to the packet's output interface. This can be done by running + * the output packet's vlan tags and output port through the interface classification, + * and checking if the resulting interface matches the output interface. + * + * The post-rewrite check is performed here. + */ + +static uword +l2_efp_filter_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_efp_filter_next_t next_index; + l2_efp_filter_main_t * msm = &l2_efp_filter_main; + vlib_node_t *n = vlib_get_node (vm, l2_efp_filter_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + u32 cached_sw_if_index = ~0; + u32 cached_next_index = ~0; + + /* invalidate cache to begin with */ + cached_sw_if_index = ~0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + u32 feature_bitmap0, feature_bitmap1; + u16 first_ethertype0, first_ethertype1; + u16 outer_id0, inner_id0, outer_id1, inner_id1; + u32 match_flags0, match_flags1; + u32 port_sw_if_index0, subint_sw_if_index0, port_sw_if_index1, subint_sw_if_index1; + vnet_hw_interface_t * hi0, * hi1; + main_intf_t * main_intf0, * main_intf1; + vlan_intf_t * vlan_intf0, * vlan_intf1; + qinq_intf_t * qinq_intf0, * qinq_intf1; + u32 is_l20, is_l21; + __attribute__((unused)) u32 matched0, matched1; + u8 error0, error1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4, * p5; + __attribute__((unused)) u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header and packet for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + + // Prefetch the input config for the N+1 loop iteration + // This depends on the buffer header above + sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_TX]; + sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_TX]; + //TODO CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index2), CLIB_CACHE_LINE_BYTES, LOAD); + //TODO CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index3), CLIB_CACHE_LINE_BYTES, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* TX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; + + // process 2 packets + em->counters[node_counter_base_index + L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 2; + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER; + feature_bitmap1 = vnet_buffer(b1)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2_efp_filter_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2_efp_filter_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b1, + sw_if_index1, + feature_bitmap1, + &next1); + + // perform the efp filter check on two packets + + extract_keys (msm->vnet_main, + sw_if_index0, + b0, + &port_sw_if_index0, + &first_ethertype0, + &outer_id0, + &inner_id0, + &match_flags0); + + extract_keys (msm->vnet_main, + sw_if_index1, + b1, + &port_sw_if_index1, + &first_ethertype1, + &outer_id1, + &inner_id1, + &match_flags1); + + eth_vlan_table_lookups (ðernet_main, + msm->vnet_main, + port_sw_if_index0, + first_ethertype0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, + &vlan_intf0, + &qinq_intf0); + + eth_vlan_table_lookups (ðernet_main, + msm->vnet_main, + port_sw_if_index1, + first_ethertype1, + outer_id1, + inner_id1, + &hi1, + &main_intf1, + &vlan_intf1, + &qinq_intf1); + + matched0 = eth_identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, + &subint_sw_if_index0, + &error0, + &is_l20); + + matched1 = eth_identify_subint (hi1, + b1, + match_flags1, + main_intf1, + vlan_intf1, + qinq_intf1, + &subint_sw_if_index1, + &error1, + &is_l21); + + if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0)) { + // Drop packet + next0 = L2_EFP_FILTER_NEXT_DROP; + b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP]; + } + + if (PREDICT_FALSE (sw_if_index1 != subint_sw_if_index1)) { + // Drop packet + next1 = L2_EFP_FILTER_NEXT_DROP; + b1->error = node->errors[L2_EFP_FILTER_ERROR_DROP]; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) { + if (b0->flags & VLIB_BUFFER_IS_TRACED) { + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + l2_efp_filter_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + memcpy(t->raw, &h0->type, sizeof(t->raw)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) { + ethernet_header_t * h1 = vlib_buffer_get_current (b1); + l2_efp_filter_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + memcpy(t->raw, &h1->type, sizeof(t->raw)); + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + u32 feature_bitmap0; + u16 first_ethertype0; + u16 outer_id0, inner_id0; + u32 match_flags0; + u32 port_sw_if_index0, subint_sw_if_index0; + vnet_hw_interface_t * hi0; + main_intf_t * main_intf0; + vlan_intf_t * vlan_intf0; + qinq_intf_t * qinq_intf0; + u32 is_l20; + __attribute__((unused)) u32 matched0; + u8 error0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + + // process 1 packet + em->counters[node_counter_base_index + L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 1; + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_EFP_FILTER; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2_efp_filter_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + + // perform the efp filter check on one packet + + extract_keys (msm->vnet_main, + sw_if_index0, + b0, + &port_sw_if_index0, + &first_ethertype0, + &outer_id0, + &inner_id0, + &match_flags0); + + eth_vlan_table_lookups (ðernet_main, + msm->vnet_main, + port_sw_if_index0, + first_ethertype0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, + &vlan_intf0, + &qinq_intf0); + + matched0 = eth_identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, + &subint_sw_if_index0, + &error0, + &is_l20); + + if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0)) { + // Drop packet + next0 = L2_EFP_FILTER_NEXT_DROP; + b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP]; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + l2_efp_filter_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + memcpy(t->raw, &h0->type, sizeof(t->raw)); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2_efp_filter_node,static) = { + .function = l2_efp_filter_node_fn, + .name = "l2-efp-filter", + .vector_size = sizeof (u32), + .format_trace = format_l2_efp_filter_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_efp_filter_error_strings), + .error_strings = l2_efp_filter_error_strings, + + .n_next_nodes = L2_EFP_FILTER_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_EFP_FILTER_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2_efp_filter_init (vlib_main_t *vm) +{ + l2_efp_filter_main_t * mp = &l2_efp_filter_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_efp_filter_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names(), + mp->next_nodes.feat_next_node_index); + + // Initialize the output node mapping table + l2output_init_output_node_vec(&mp->next_nodes.output_node_index_vec); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_efp_filter_init); + + +// Enable/disable the EFP Filter check on the subinterface +void l2_efp_filter_configure (vnet_main_t * vnet_main, + u32 sw_if_index, + u32 enable) +{ + // set the interface flag + l2output_intf_bitmap_enable(sw_if_index, L2OUTPUT_FEAT_EFP_FILTER, enable); +} + + +// set subinterface egress efp filter enable/disable +// The CLI format is: +// set interface l2 efp-filter <interface> [disable]] +static clib_error_t * +int_l2_efp_filter (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // enable/disable the feature + l2_efp_filter_configure (vnm, sw_if_index, enable); + + done: + return error; +} + + +VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = { + .path = "set interface l2 efp-filter", + .short_help = "set interface l2 efp-filter <interface> [disable]", + .function = int_l2_efp_filter, +}; + diff --git a/vnet/vnet/l2/l2_efp_filter.h b/vnet/vnet/l2/l2_efp_filter.h new file mode 100644 index 00000000000..f8baf092fa8 --- /dev/null +++ b/vnet/vnet/l2/l2_efp_filter.h @@ -0,0 +1,28 @@ +/* + * l2_efp_filter.h : layer 2 egress EFP Filter processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef included_vnet_l2_efp_filter_h +#define included_vnet_l2_efp_filter_h + + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + + +#endif + diff --git a/vnet/vnet/l2/l2_fib.c b/vnet/vnet/l2/l2_fib.c new file mode 100644 index 00000000000..198ffd281bb --- /dev/null +++ b/vnet/vnet/l2/l2_fib.c @@ -0,0 +1,567 @@ +/* + * l2_fib.c : layer 2 forwarding table (aka mac table) + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_learn.h> +#include <vnet/l2/l2_bd.h> + +#include <vppinfra/bihash_template.c> + +typedef struct { + + /* hash table */ + BVT(clib_bihash) mac_table; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2fib_main_t; + +l2fib_main_t l2fib_main; + + +// Format sw_if_index. If the value is ~0, use the text "N/A" +u8 * format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + u32 sw_if_index = va_arg (*args, u32); + if (sw_if_index == ~0) + return format (s, "N/A"); + else + return format (s, "%U", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, sw_if_index)); +} + +void l2fib_table_dump (u32 bd_index, l2fib_entry_key_t **l2fe_key, + l2fib_entry_result_t **l2fe_res) +{ + l2fib_main_t * msm = &l2fib_main; + BVT(clib_bihash) * h = &msm->mac_table; + clib_bihash_bucket_t * b; + BVT(clib_bihash_value) * v; + l2fib_entry_key_t key; + l2fib_entry_result_t result; + int i, j, k; + + for (i = 0; i < h->nbuckets; i++) + { + b = &h->buckets[i]; + if (b->offset == 0) + continue; + v = BV(clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + key.raw = v->kvp[k].key; + result.raw = v->kvp[k].value; + + if ((bd_index == ~0) || (bd_index == key.fields.bd_index)) + { + vec_add1 (*l2fe_key, key); + vec_add1 (*l2fe_res, result); + } + } + v++; + } + } +} + +// Display the contents of the l2fib +static clib_error_t * +show_l2fib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + l2fib_main_t * msm = &l2fib_main; + BVT(clib_bihash) * h = &msm->mac_table; + clib_bihash_bucket_t * b; + BVT(clib_bihash_value) * v; + l2fib_entry_key_t key; + l2fib_entry_result_t result; + u32 first_entry = 1; + u64 total_entries = 0; + int i, j, k; + u8 verbose = 0; + u8 raw = 0; + u32 bd_id, bd_index = ~0; + + if (unformat (input, "raw")) + raw = 1; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "bd_index %d", &bd_index)) + verbose = 1; + else if (unformat (input, "bd_id %d", &bd_id)) + { + uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + { + verbose = 1; + bd_index = p[0]; + } + else + { + vlib_cli_output (vm, "no such bridge domain id"); + return 0; + } + } + + for (i = 0; i < h->nbuckets; i++) + { + b = &h->buckets[i]; + if (b->offset == 0) + continue; + v = BV(clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + if (verbose && first_entry) + { + first_entry=0; + vlib_cli_output (vm, + "%=19s%=7s%=30s%=7s%=8s%=8s%=5s%=9s%=11s", + "Mac Address", "BD Idx", "Interface", + "Index", "static", "filter", "bvi", + "refresh", "timestamp"); + } + + key.raw = v->kvp[k].key; + result.raw = v->kvp[k].value; + + if (verbose + & ((bd_index >>31) || (bd_index == key.fields.bd_index))) + { + vlib_cli_output (vm, + "%=19U%=7d%=30U%=7d%=8d%=8d%=5d%=9d%=11X", + format_ethernet_address, key.fields.mac, + key.fields.bd_index, + format_vnet_sw_if_index_name_with_NA, + msm->vnet_main, result.fields.sw_if_index, + result.fields.sw_if_index == ~0 + ? -1 : result.fields.sw_if_index, + result.fields.static_mac, + result.fields.filter, + result.fields.bvi, + result.fields.refresh, + result.fields.timestamp); + } + total_entries++; + } + v++; + } + } + + if (total_entries == 0) + vlib_cli_output (vm, "no l2fib entries"); + else + vlib_cli_output (vm, "%lld l2fib entries", total_entries); + + if (raw) + vlib_cli_output (vm, "Raw Hash Table:\n%U\n", + BV(format_bihash), h, 1 /* verbose */); + + return 0; +} + +VLIB_CLI_COMMAND (show_l2fib_cli, static) = { + .path = "show l2fib", + .short_help = "show l2fib [verbose | bd_id <nn> | bd_index <nn> | raw]", + .function = show_l2fib, +}; + + +// Remove all entries from the l2fib +void l2fib_clear_table (uint keep_static) +{ + l2fib_main_t * mp = &l2fib_main; + + if (keep_static) { + // TODO: remove only non-static entries + } else { + // Remove all entries + BV(clib_bihash_free) (&mp->mac_table); + BV(clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); + } + + l2learn_main.global_learn_count = 0; +} + +// Clear all entries in L2FIB +// TODO: Later we may want a way to remove only the non-static entries +static clib_error_t * +clear_l2fib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2fib_clear_table (0); + return 0; +} + +VLIB_CLI_COMMAND (clear_l2fib_cli, static) = { + .path = "clear l2fib", + .short_help = "Clear l2fib mac forwarding entries", + .function = clear_l2fib, +}; + + +// Add an entry to the l2fib. +// If the entry already exists then overwrite it +void l2fib_add_entry (u64 mac, + u32 bd_index, + u32 sw_if_index, + u32 static_mac, + u32 filter_mac, + u32 bvi_mac) { + l2fib_entry_key_t key; + l2fib_entry_result_t result; + __attribute__((unused)) u32 bucket_contents; + l2fib_main_t * mp = &l2fib_main; + BVT(clib_bihash_kv) kv; + + // set up key + key.raw = l2fib_make_key ((u8 *)&mac, bd_index); + + // set up result + result.raw = 0; // clear all fields + result.fields.sw_if_index = sw_if_index; + result.fields.static_mac = static_mac; + result.fields.filter = filter_mac; + result.fields.bvi = bvi_mac; + + kv.key = key.raw; + kv.value = result.raw; + + BV(clib_bihash_add_del) (&mp->mac_table, &kv, 1 /* is_add */); + + // increment counter if dynamically learned mac + if (result.fields.static_mac) { + l2learn_main.global_learn_count++; + } +} + +// Add an entry to the L2FIB +// The CLI format is: +// l2fib add <mac> <bd> <intf> [static] [bvi] +// l2fib add <mac> <bd> filter +// Note that filter and bvi entries are always static +static clib_error_t * +l2fib_add (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u64 mac; + u32 bd_id; + u32 bd_index; + u32 sw_if_index = ~0; + u32 filter_mac = 0; + u32 static_mac = 0; + u32 bvi_mac = 0; + uword * p; + + if (! unformat_user (input, unformat_ethernet_address, &mac)) + { + error = clib_error_return (0, "expected mac address `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) { + error = clib_error_return (0, "bridge domain ID %d invalid", bd_id); + goto done; + } + bd_index = p[0]; + + if (unformat (input, "filter")) { + filter_mac = 1; + static_mac = 1; + + } else { + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + if (unformat (input, "static")) { + static_mac = 1; + } else if (unformat (input, "bvi")) { + bvi_mac = 1; + static_mac = 1; + } + } + + l2fib_add_entry(mac, bd_index, sw_if_index, static_mac, filter_mac, bvi_mac); + + done: + return error; +} + +VLIB_CLI_COMMAND (l2fib_add_cli, static) = { + .path = "l2fib add", + .short_help = "Add l2fib mac forwarding entry <mac> <bd-id> filter | <intf> [static | bvi]", + .function = l2fib_add, +}; + + +static clib_error_t * +l2fib_test_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = 0; + u64 mac, save_mac; + u32 bd_index = 0; + u32 sw_if_index = 8; + u32 filter_mac = 0; + u32 bvi_mac = 0; + u32 is_add = 0; + u32 is_del = 0; + u32 is_check = 0; + u32 count = 1; + int mac_set = 0; + int i; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mac %U", unformat_ethernet_address, &mac)) + mac_set = 1; + else if (unformat (input, "add")) + is_add = 1; + else if (unformat (input, "del")) + is_del = 1; + else if (unformat (input, "check")) + is_check = 1; + else if (unformat (input, "count %d", &count)) + ; + else + break; + } + + if (mac_set == 0) + return clib_error_return (0, "mac not set"); + + if (is_add == 0 && is_del == 0 && is_check == 0) + return clib_error_return (0, "noop: pick at least one of (add,del,check)"); + + save_mac = mac; + + if (is_add) + { + for (i = 0; i < count; i++) + { + u64 tmp; + l2fib_add_entry(mac, bd_index, sw_if_index, mac, + filter_mac, bvi_mac); + tmp = clib_net_to_host_u64(mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + if (is_check) + { + BVT(clib_bihash_kv) kv; + l2fib_main_t * mp = &l2fib_main; + + mac = save_mac; + + for (i = 0; i < count; i++) + { + u64 tmp; + kv.key = l2fib_make_key ((u8 *)&mac, bd_index); + if (BV(clib_bihash_search) (&mp->mac_table, &kv, &kv)) + { + clib_warning ("key %U AWOL", format_ethernet_address, &mac); + break; + } + tmp = clib_net_to_host_u64(mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + if (is_del) + { + for (i = 0; i < count; i++) + { + u64 tmp; + + l2fib_del_entry (mac, bd_index); + + tmp = clib_net_to_host_u64(mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + return error; +} + +VLIB_CLI_COMMAND (l2fib_test_command, static) = { + .path = "test l2fib", + .short_help = "test l2fib [del] mac <base-addr> count <nn>", + .function = l2fib_test_command_fn, +}; + + +// Delete an entry from the l2fib. +// Return 0 if the entry was deleted, or 1 if it was not found +u32 l2fib_del_entry (u64 mac, + u32 bd_index) { + + l2fib_entry_result_t result; + l2fib_main_t * mp = &l2fib_main; + BVT(clib_bihash_kv) kv; + + // set up key + kv.key = l2fib_make_key ((u8 *)&mac, bd_index); + + if (BV(clib_bihash_search) (&mp->mac_table, &kv, &kv)) + return 1; + + result.raw = kv.value; + + // decrement counter if dynamically learned mac + if (result.fields.static_mac) { + if (l2learn_main.global_learn_count > 0) { + l2learn_main.global_learn_count--; + } + } + + // Remove entry from hash table + BV(clib_bihash_add_del) (&mp->mac_table, &kv, 0 /* is_add */); + return 0; +} + +// Delete an entry from the L2FIB +// The CLI format is: +// l2fib del <mac> <bd-id> +static clib_error_t * +l2fib_del (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bd_main_t * bdm = &bd_main; + clib_error_t * error = 0; + u64 mac; + u32 bd_id; + u32 bd_index; + uword * p; + + if (! unformat_user (input, unformat_ethernet_address, &mac)) + { + error = clib_error_return (0, "expected mac address `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) { + error = clib_error_return (0, "bridge domain ID %d invalid", bd_id); + goto done; + } + bd_index = p[0]; + + // Delete the entry + if (l2fib_del_entry(mac, bd_index)) { + error = clib_error_return (0, "mac entry not found"); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (l2fib_del_cli, static) = { + .path = "l2fib del", + .short_help = "Delete l2fib mac forwarding entry <mac> <bd-id>", + .function = l2fib_del, +}; + + +BVT(clib_bihash) *get_mac_table(void) { + l2fib_main_t * mp = &l2fib_main; + return &mp->mac_table; +} + +clib_error_t *l2fib_init (vlib_main_t *vm) +{ + l2fib_main_t * mp = &l2fib_main; + l2fib_entry_key_t test_key; + u8 test_mac[6]; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Create the hash table + BV(clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); + + // verify the key constructor is good, since it is endian-sensitive + test_mac[0] = 0x11; + test_key.raw = 0; + test_key.raw = l2fib_make_key ((u8 *)&test_mac, 0x1234); + ASSERT (test_key.fields.mac[0] == 0x11); + ASSERT (test_key.fields.bd_index == 0x1234); + + return 0; +} + +VLIB_INIT_FUNCTION (l2fib_init); + diff --git a/vnet/vnet/l2/l2_fib.h b/vnet/vnet/l2/l2_fib.h new file mode 100644 index 00000000000..1dcc0200f60 --- /dev/null +++ b/vnet/vnet/l2/l2_fib.h @@ -0,0 +1,226 @@ +/* + * l2_fib.h : layer 2 forwarding table (aka mac table) + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2fib_h +#define included_l2fib_h + +#include <vlib/vlib.h> +#include <vppinfra/bihash_8_8.h> + +/* + * The size of the hash table + */ +#define L2FIB_NUM_BUCKETS (64 * 1024) +#define L2FIB_MEMORY_SIZE (256<<20) + +/* + * The L2fib key is the mac address and bridge domain ID + */ +typedef struct { + union { + struct { + u16 bd_index; + u8 mac[6]; + } fields; + struct { + u32 w0; + u32 w1; + } words; + u64 raw; + }; +} l2fib_entry_key_t; + +/* + * The l2fib entry results + */ +typedef struct { + union { + struct { + u32 sw_if_index; // output sw_if_index (L3 interface if bvi==1) + + u8 static_mac:1; // static mac, no dataplane learning + u8 bvi:1; // mac is for a bridged virtual interface + u8 filter:1; // drop packets to/from this mac + u8 refresh:1; // refresh flag for aging + u8 unused1:4; + u8 timestamp; // timestamp for aging + u16 unused2; + } fields; + u64 raw; + }; +} l2fib_entry_result_t; + + +// Compute the hash for the given key and return the corresponding bucket index +always_inline +u32 l2fib_compute_hash_bucket (l2fib_entry_key_t *key) { + u32 result; + u32 temp_a; + u32 temp_b; + + result = 0xa5a5a5a5; // some seed + temp_a = key->words.w0; + temp_b = key->words.w1; + hash_mix32(temp_a, temp_b, result); + + return result % L2FIB_NUM_BUCKETS; +} + +always_inline +u64 l2fib_make_key (u8 * mac_address, u16 bd_index) { + u64 temp; + + // The mac address in memory is A:B:C:D:E:F + // The bd id in register is H:L +#if CLIB_ARCH_IS_LITTLE_ENDIAN + // Create the in-register key as F:E:D:C:B:A:H:L + // In memory the key is L:H:A:B:C:D:E:F + temp = *((u64 *)(mac_address - 2)); + temp = (temp & ~0xffff) | (u64)(bd_index); +#else + // Create the in-register key as H:L:A:B:C:D:E:F + // In memory the key is H:L:A:B:C:D:E:F + temp = *((u64 *)(mac_address)) >> 16; + temp = temp | (bd_index << 48); +#endif + + return temp; +} + + + +// Lookup the entry for mac and bd_index in the mac table for 1 packet. +// Cached_key and cached_result are used as a one-entry cache. +// The function reads and updates them as needed. +// +// mac0 and bd_index0 are the keys. The entry is written to result0. +// If the entry was not found, result0 is set to ~0. +// +// key0 and bucket0 return with the computed key and hash bucket, +// convenient if the entry needs to be updated afterward. +// If the cached_result was used, bucket0 is set to ~0. + +static_always_inline void +l2fib_lookup_1 (BVT(clib_bihash) * mac_table, + l2fib_entry_key_t * cached_key, + l2fib_entry_result_t * cached_result, + u8 * mac0, + u16 bd_index0, + l2fib_entry_key_t * key0, + u32 * bucket0, + l2fib_entry_result_t *result0) +{ + // set up key + key0->raw = l2fib_make_key (mac0, bd_index0); + *bucket0 = ~0; + + if (key0->raw == cached_key->raw) { + // Hit in the one-entry cache + result0->raw = cached_result->raw; + } else { + // Do a regular mac table lookup + BVT(clib_bihash_kv) kv; + + kv.key = key0->raw; + kv.value = ~0ULL; + BV(clib_bihash_search_inline) (mac_table, &kv); + result0->raw = kv.value; + + // Update one-entry cache + cached_key->raw = key0->raw; + cached_result->raw = result0->raw; + } +} + + +// Lookup the entry for mac and bd_index in the mac table for 2 packets. +// The lookups for the two packets are interleaved. +// +// Cached_key and cached_result are used as a one-entry cache. +// The function reads and updates them as needed. +// +// mac0 and bd_index0 are the keys. The entry is written to result0. +// If the entry was not found, result0 is set to ~0. The same +// holds for mac1/bd_index1/result1. + +static_always_inline void +l2fib_lookup_2 (BVT(clib_bihash) * mac_table, + l2fib_entry_key_t * cached_key, + l2fib_entry_result_t * cached_result, + u8 * mac0, + u8 * mac1, + u16 bd_index0, + u16 bd_index1, + l2fib_entry_key_t * key0, + l2fib_entry_key_t * key1, + u32 * bucket0, + u32 * bucket1, + l2fib_entry_result_t *result0, + l2fib_entry_result_t *result1) +{ + // set up key + key0->raw = l2fib_make_key (mac0, bd_index0); + key1->raw = l2fib_make_key (mac1, bd_index1); + + if ((key0->raw == cached_key->raw) && + (key1->raw == cached_key->raw)) { + // Both hit in the one-entry cache + result0->raw = cached_result->raw; + result1->raw = cached_result->raw; + *bucket0 = ~0; + *bucket1 = ~0; + + } else { + BVT(clib_bihash_kv) kv0, kv1; + + // Do a regular mac table lookup + // Interleave lookups for packet 0 and packet 1 + kv0.key = key0->raw; + kv1.key = key1->raw; + kv0.value = ~0ULL; + kv1.value = ~0ULL; + + BV(clib_bihash_search_inline) (mac_table, &kv0); + BV(clib_bihash_search_inline) (mac_table, &kv1); + + result0->raw = kv0.value; + result1->raw = kv1.value; + + // Update one-entry cache + cached_key->raw = key1->raw; + cached_result->raw = result1->raw; + } +} + + +BVT(clib_bihash) *get_mac_table(void); +void l2fib_clear_table (uint keep_static); +void l2fib_add_entry (u64 mac, + u32 bd_index, + u32 sw_if_index, + u32 static_mac, + u32 drop_mac, + u32 bvi_mac); +u32 l2fib_del_entry (u64 mac, + u32 bd_index); + +void l2fib_table_dump (u32 bd_index, l2fib_entry_key_t **l2fe_key, + l2fib_entry_result_t **l2fe_res); + +u8 * format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args); + +#endif diff --git a/vnet/vnet/l2/l2_flood.c b/vnet/vnet/l2/l2_flood.c new file mode 100644 index 00000000000..8a702168715 --- /dev/null +++ b/vnet/vnet/l2/l2_flood.c @@ -0,0 +1,520 @@ +/* + * l2_flood.c : layer 2 flooding + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_bvi.h> +#include <vnet/replication.h> +#include <vnet/l2/l2_fib.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> + + +/* + * Flooding uses the packet replication infrastructure to send a copy of the + * packet to each member interface. Logically the replication infrastructure + * expects two graph nodes: a prep node that initiates replication and sends the + * packet to the first destination, and a recycle node that is passed the packet + * after it has been transmitted. + * + * To decrease the amount of code, l2 flooding implements both functions in + * the same graph node. This node can tell if is it being called as the "prep" + * or "recycle" using replication_is_recycled(). + */ + + +typedef struct { + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + // next node index for the L3 input node of each ethertype + next_by_ethertype_t l3_next; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2flood_main_t; + +typedef struct { + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u16 bd_index; +} l2flood_trace_t; + + +/* packet trace format function */ +static u8 * format_l2flood_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2flood_trace_t * t = va_arg (*args, l2flood_trace_t *); + + s = format (s, "l2-flood: sw_if_index %d dst %U src %U bd_index %d", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->bd_index); + return s; +} + +l2flood_main_t l2flood_main; + +static vlib_node_registration_t l2flood_node; + +#define foreach_l2flood_error \ +_(L2FLOOD, "L2 flood packets") \ +_(REPL_FAIL, "L2 replication failures") \ +_(NO_MEMBERS, "L2 replication complete") \ +_(BVI_TAGGED, "BVI packet with vlan tag") \ +_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") + +typedef enum { +#define _(sym,str) L2FLOOD_ERROR_##sym, + foreach_l2flood_error +#undef _ + L2FLOOD_N_ERROR, +} l2flood_error_t; + +static char * l2flood_error_strings[] = { +#define _(sym,string) string, + foreach_l2flood_error +#undef _ +}; + +typedef enum { + L2FLOOD_NEXT_L2_OUTPUT, + L2FLOOD_NEXT_DROP, + L2FLOOD_N_NEXT, +} l2flood_next_t; + +/* + * Perform flooding on one packet + * + * Due to the way BVI processing can modify the packet, the BVI interface + * (if present) must be processed last in the replication. The member vector + * is arranged so that the BVI interface is always the first element. + * Flooding walks the vector in reverse. + * + * BVI processing causes the packet to go to L3 processing. This strips the + * L2 header, which is fine because the replication infrastructure restores + * it. However L3 processing can trigger larger changes to the packet. For + * example, an ARP request could be turned into an ARP reply, an ICMP request + * could be turned into an ICMP reply. If BVI processing is not performed + * last, the modified packet would be replicated to the remaining members. + */ + +static_always_inline void +l2flood_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + l2flood_main_t * msm, + u64 * counter_base, + vlib_buffer_t * b0, + u32 sw_if_index0, + l2fib_entry_key_t * key0, + u32 * bucket0, + l2fib_entry_result_t * result0, + u32 * next0) +{ + u16 bd_index0; + l2_bridge_domain_t *bd_config; + l2_flood_member_t * members; + i32 current_member; // signed + replication_context_t * ctx; + u8 in_shg = vnet_buffer(b0)->l2.shg; + + if (!replication_is_recycled(b0)) { + + // Do flood "prep node" processing + + // Get config for the bridge domain interface + bd_index0 = vnet_buffer(b0)->l2.bd_index; + bd_config = vec_elt_at_index(l2input_main.bd_configs, bd_index0); + members = bd_config->members; + + // Find first member that passes the reflection and SHG checks + current_member = vec_len(members) - 1; + while ((current_member >= 0) && + ((members[current_member].sw_if_index == sw_if_index0) || + (in_shg && members[current_member].shg == in_shg))) { + current_member--; + } + + if (current_member < 0) { + // No members to flood to + *next0 = L2FLOOD_NEXT_DROP; + b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS]; + return; + } + + if ((current_member > 0) && + ((current_member > 1) || + ((members[0].sw_if_index != sw_if_index0) && + (!in_shg || members[0].shg != in_shg)))) { + // If more than one member then initiate replication + ctx = replication_prep (vm, b0, l2flood_node.index, 1 /* l2_packet */); + ctx->feature_replicas = (u64) members; + ctx->feature_counter = current_member; + } + + } else { + + // Do flood "recycle node" processing + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) + { + (void)replication_recycle (vm, b0, 1 /* is_last */); + *next0 = L2FLOOD_NEXT_DROP; + b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL]; + return; + } + + ctx = replication_get_ctx (b0); + replication_clear_recycled (b0); + + members = (l2_flood_member_t *) ctx->feature_replicas; + current_member = (i32)ctx->feature_counter - 1; + + // Find next member that passes the reflection and SHG check + while ((current_member >= 0) && + ((members[current_member].sw_if_index == sw_if_index0) || + (in_shg && members[current_member].shg == in_shg))) { + current_member--; + } + + if (current_member < 0) { + // No more members to flood to. + // Terminate replication and drop packet. + + replication_recycle (vm, b0, 1 /* is_last */); + + *next0 = L2FLOOD_NEXT_DROP; + // Ideally we woudn't bump a counter here, just silently complete + b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS]; + return; + } + + // Restore packet and context and continue replication + ctx->feature_counter = current_member; + replication_recycle (vm, b0, + ((current_member == 0) || /*is_last */ + ((current_member == 1) && + ((members[0].sw_if_index == sw_if_index0) || + (in_shg && members[0].shg == in_shg))))); + } + + // Forward packet to the current member + + if (PREDICT_TRUE(members[current_member].flags == L2_FLOOD_MEMBER_NORMAL)) { + // Do normal L2 forwarding + vnet_buffer(b0)->sw_if_index[VLIB_TX] = members[current_member].sw_if_index; + *next0 = L2FLOOD_NEXT_L2_OUTPUT; + + } else { + // Do BVI processing + u32 rc; + rc = l2_to_bvi (vm, + msm->vnet_main, + b0, + members[current_member].sw_if_index, + &msm->l3_next, + next0); + + if (PREDICT_FALSE(rc)) { + if (rc == TO_BVI_ERR_TAGGED) { + b0->error = node->errors[L2FLOOD_ERROR_BVI_TAGGED]; + *next0 = L2FLOOD_NEXT_DROP; + } else if (rc == TO_BVI_ERR_ETHERTYPE) { + b0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE]; + *next0 = L2FLOOD_NEXT_DROP; + } + } + } + +} + + +static uword +l2flood_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2flood_next_t next_index; + l2flood_main_t * msm = &l2flood_main; + vlib_node_t *n = vlib_get_node (vm, l2flood_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + l2fib_entry_key_t key0, key1; + l2fib_entry_result_t result0, result1; + u32 bucket0, bucket1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4, * p5; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + // Prefetch the replication context for the N+1 loop iteration + // This depends on the buffer header above + replication_prefetch_ctx (p2); + replication_prefetch_ctx (p3); + + // Prefetch the packet for the N+1 loop iteration + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + /* Process 2 x pkts */ + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + /* process 2 pkts */ + em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 2; + + l2flood_process (vm, node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &bucket0, &result0, &next0); + + l2flood_process (vm, node, msm, &em->counters[node_counter_base_index], + b1, sw_if_index1, &key1, &bucket1, &result1, &next1); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + l2flood_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->bd_index = vnet_buffer(b1)->l2.bd_index; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + l2fib_entry_key_t key0; + l2fib_entry_result_t result0; + u32 bucket0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + + /* process 1 pkt */ + em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 1; + + l2flood_process (vm, node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &bucket0, &result0, &next0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2flood_node,static) = { + .function = l2flood_node_fn, + .name = "l2-flood", + .vector_size = sizeof (u32), + .format_trace = format_l2flood_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2flood_error_strings), + .error_strings = l2flood_error_strings, + + .n_next_nodes = L2FLOOD_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2FLOOD_NEXT_L2_OUTPUT] = "l2-output", + [L2FLOOD_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2flood_init (vlib_main_t *vm) +{ + l2flood_main_t * mp = &l2flood_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2flood_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2flood_init); + + + +// Add the L3 input node for this ethertype to the next nodes structure +void +l2flood_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index) +{ + l2flood_main_t * mp = &l2flood_main; + u32 next_index; + + next_index = vlib_node_add_next (vm, + l2flood_node.index, + node_index); + + next_by_ethertype_register (&mp->l3_next, type, next_index); +} + + +// set subinterface flood enable/disable +// The CLI format is: +// set interface l2 flood <interface> [disable] +static clib_error_t * +int_flood (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_FLOOD, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (int_flood_cli, static) = { + .path = "set interface l2 flood", + .short_help = "set interface l2 flood <interface> [disable]", + .function = int_flood, +}; diff --git a/vnet/vnet/l2/l2_flood.h b/vnet/vnet/l2/l2_flood.h new file mode 100644 index 00000000000..3c9273d48d5 --- /dev/null +++ b/vnet/vnet/l2/l2_flood.h @@ -0,0 +1,28 @@ +/* + * l2_flood.h : layer 2 flooding + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2flood_h +#define included_l2flood_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + +void +l2flood_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index); +#endif diff --git a/vnet/vnet/l2/l2_fwd.c b/vnet/vnet/l2/l2_fwd.c new file mode 100644 index 00000000000..089d4008ea8 --- /dev/null +++ b/vnet/vnet/l2/l2_fwd.c @@ -0,0 +1,446 @@ +/* + * l2_fwd.c : layer 2 forwarding using l2fib + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_bvi.h> +#include <vnet/l2/l2_fwd.h> +#include <vnet/l2/l2_fib.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/sparse_vec.h> + + +typedef struct { + + // Hash table + BVT(clib_bihash) *mac_table; + + // next node index for the L3 input node of each ethertype + next_by_ethertype_t l3_next; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2fwd_main_t; + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u16 bd_index; +} l2fwd_trace_t; + +/* packet trace format function */ +static u8 * format_l2fwd_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2fwd_trace_t * t = va_arg (*args, l2fwd_trace_t *); + + s = format (s, "l2-fwd: sw_if_index %d dst %U src %U bd_index %d", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->bd_index); + return s; +} + +l2fwd_main_t l2fwd_main; + +static vlib_node_registration_t l2fwd_node; + +#define foreach_l2fwd_error \ +_(L2FWD, "L2 forward packets") \ +_(FLOOD, "L2 forward misses") \ +_(HIT, "L2 forward hits") \ +_(BVI_TAGGED, "BVI packet with vlan tag") \ +_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") \ +_(FILTER_DROP, "Filter Mac Drop") \ +_(REFLECT_DROP, "Reflection Drop") + +typedef enum { +#define _(sym,str) L2FWD_ERROR_##sym, + foreach_l2fwd_error +#undef _ + L2FWD_N_ERROR, +} l2fwd_error_t; + +static char * l2fwd_error_strings[] = { +#define _(sym,string) string, + foreach_l2fwd_error +#undef _ +}; + +typedef enum { + L2FWD_NEXT_L2_OUTPUT, + L2FWD_NEXT_FLOOD, + L2FWD_NEXT_DROP, + L2FWD_N_NEXT, +} l2fwd_next_t; + +// Forward one packet based on the mac table lookup result + +static_always_inline void +l2fwd_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + l2fwd_main_t * msm, + vlib_error_main_t * em, + vlib_buffer_t * b0, + u32 sw_if_index0, + l2fib_entry_result_t * result0, + u32 * next0) +{ + if (PREDICT_FALSE (result0->raw == ~0)) { + // lookup miss, so flood + // TODO:replicate packet to each intf in bridge-domain + // For now just drop + if (vnet_buffer(b0)->l2.feature_bitmap & L2INPUT_FEAT_UU_FLOOD) { + *next0 = L2FWD_NEXT_FLOOD; + } else { + // Flooding is disabled + b0->error = node->errors[L2FWD_ERROR_FLOOD]; + *next0 = L2FWD_NEXT_DROP; + } + + } else { + + // lookup hit, forward packet +#ifdef COUNTERS + em->counters[node_counter_base_index + L2FWD_ERROR_HIT] += 1; +#endif + + vnet_buffer(b0)->sw_if_index[VLIB_TX] = result0->fields.sw_if_index; + *next0 = L2FWD_NEXT_L2_OUTPUT; + + // perform reflection check + if (PREDICT_FALSE (sw_if_index0 == result0->fields.sw_if_index)) { + b0->error = node->errors[L2FWD_ERROR_REFLECT_DROP]; + *next0 = L2FWD_NEXT_DROP; + + // perform filter check + } else if (PREDICT_FALSE (result0->fields.filter)) { + b0->error = node->errors[L2FWD_ERROR_FILTER_DROP]; + *next0 = L2FWD_NEXT_DROP; + + // perform BVI check + } else if (PREDICT_FALSE (result0->fields.bvi)) { + u32 rc; + rc = l2_to_bvi (vm, + msm->vnet_main, + b0, + vnet_buffer(b0)->sw_if_index[VLIB_TX], + &msm->l3_next, + next0); + + if (PREDICT_FALSE(rc)) { + if (rc == TO_BVI_ERR_TAGGED) { + b0->error = node->errors[L2FWD_ERROR_BVI_TAGGED]; + *next0 = L2FWD_NEXT_DROP; + } else if (rc == TO_BVI_ERR_ETHERTYPE) { + b0->error = node->errors[L2FWD_ERROR_BVI_ETHERTYPE]; + *next0 = L2FWD_NEXT_DROP; + } + } + } + } +} + + +static uword +l2fwd_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2fwd_next_t next_index; + l2fwd_main_t * msm = &l2fwd_main; + vlib_node_t *n = vlib_get_node (vm, l2fwd_node.index); + CLIB_UNUSED(u32 node_counter_base_index) = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + l2fib_entry_key_t cached_key; + l2fib_entry_result_t cached_result; + + // Clear the one-entry cache in case mac table was updated + cached_key.raw = ~0; + cached_result.raw = ~0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + l2fib_entry_key_t key0, key1; + l2fib_entry_result_t result0, result1; + u32 bucket0, bucket1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->bd_index = vnet_buffer(b1)->l2.bd_index; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + /* process 2 pkts */ +#ifdef COUNTERS + em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 2; +#endif + l2fib_lookup_2 (msm->mac_table, &cached_key, &cached_result, + h0->dst_address, + h1->dst_address, + vnet_buffer(b0)->l2.bd_index, + vnet_buffer(b1)->l2.bd_index, + &key0, // not used + &key1, // not used + &bucket0, // not used + &bucket1, // not used + &result0, + &result1); + l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0, &next0); + l2fwd_process (vm, node, msm, em, b1, sw_if_index1, &result1, &next1); + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + l2fib_entry_key_t key0; + l2fib_entry_result_t result0; + u32 bucket0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + /* process 1 pkt */ +#ifdef COUNTERS + em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 1; +#endif + l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result, + h0->dst_address, vnet_buffer(b0)->l2.bd_index, + &key0, // not used + &bucket0, // not used + &result0); + l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0, &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2fwd_node,static) = { + .function = l2fwd_node_fn, + .name = "l2-fwd", + .vector_size = sizeof (u32), + .format_trace = format_l2fwd_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2fwd_error_strings), + .error_strings = l2fwd_error_strings, + + .n_next_nodes = L2FWD_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2FWD_NEXT_L2_OUTPUT] = "l2-output", + [L2FWD_NEXT_FLOOD] = "l2-flood", + [L2FWD_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2fwd_init (vlib_main_t *vm) +{ + l2fwd_main_t * mp = &l2fwd_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + /* init the hash table ptr */ + mp->mac_table = get_mac_table(); + + // Initialize the next nodes for each ethertype + next_by_ethertype_init (&mp->l3_next); + + return 0; +} + +VLIB_INIT_FUNCTION (l2fwd_init); + + +// Add the L3 input node for this ethertype to the next nodes structure +void +l2fwd_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index) +{ + l2fwd_main_t * mp = &l2fwd_main; + u32 next_index; + + next_index = vlib_node_add_next (vm, + l2fwd_node.index, + node_index); + + next_by_ethertype_register (&mp->l3_next, type, next_index); +} + + +// set subinterface forward enable/disable +// The CLI format is: +// set interface l2 forward <interface> [disable] +static clib_error_t * +int_fwd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + if (l2input_intf_config(sw_if_index)->xconnect) { + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_XCONNECT, enable); + } else { + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_FWD, enable); + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_fwd_cli, static) = { + .path = "set interface l2 forward", + .short_help = "set interface l2 forward <interface> [disable]", + .function = int_fwd, +}; diff --git a/vnet/vnet/l2/l2_fwd.h b/vnet/vnet/l2/l2_fwd.h new file mode 100644 index 00000000000..f08717dfdf8 --- /dev/null +++ b/vnet/vnet/l2/l2_fwd.h @@ -0,0 +1,29 @@ +/* + * l2_fwd.c : layer 2 forwarding using l2fib + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2fwd_h +#define included_l2fwd_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + + +void +l2fwd_register_input_type (vlib_main_t * vm, + ethernet_type_t type, + u32 node_index); +#endif diff --git a/vnet/vnet/l2/l2_input.c b/vnet/vnet/l2/l2_input.c new file mode 100644 index 00000000000..34f8a77184f --- /dev/null +++ b/vnet/vnet/l2/l2_input.c @@ -0,0 +1,963 @@ +/* + * l2_input.c : layer 2 input packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_bvi.h> +#include <vnet/l2/l2_fib.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +extern clib_error_t * +ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags); + +// Feature graph node names +static char * l2input_feat_names[] = { +#define _(sym,name) name, + foreach_l2input_feat +#undef _ +}; + +char **l2input_get_feat_names(void) { + return l2input_feat_names; +} + + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 next_index; + u32 sw_if_index; +} l2input_trace_t; + +/* packet trace format function */ +static u8 * format_l2input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2input_trace_t * t = va_arg (*args, l2input_trace_t *); + + s = format (s, "l2-input: sw_if_index %d dst %U src %U", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src); + return s; +} + +l2input_main_t l2input_main; + +static vlib_node_registration_t l2input_node; + +#define foreach_l2input_error \ +_(L2INPUT, "L2 input packets") \ +_(DROP, "L2 input drops") + +typedef enum { +#define _(sym,str) L2INPUT_ERROR_##sym, + foreach_l2input_error +#undef _ + L2INPUT_N_ERROR, +} l2input_error_t; + +static char * l2input_error_strings[] = { +#define _(sym,string) string, + foreach_l2input_error +#undef _ +}; + +typedef enum { /* */ + L2INPUT_NEXT_LEARN, + L2INPUT_NEXT_FWD, + L2INPUT_NEXT_DROP, + L2INPUT_N_NEXT, +} l2input_next_t; + + +static_always_inline void +classify_and_dispatch (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 cpu_index, + l2input_main_t * msm, + vlib_buffer_t * b0, + u32 *next0) +{ + // Load L2 input feature struct + // Load bridge domain struct + // Parse ethernet header to determine unicast/mcast/broadcast + // take L2 input stat + // classify packet as IP/UDP/TCP, control, other + // mask feature bitmap + // go to first node in bitmap + // Later: optimize VTM + // + // For L2XC, + // set tx sw-if-handle + + u8 mcast_dmac; + __attribute__((unused)) u8 l2bcast; + __attribute__((unused)) u8 l2mcast; + __attribute__((unused)) u8 l2_stat_kind; + u16 ethertype; + u8 protocol; + l2_input_config_t *config; + l2_bridge_domain_t *bd_config; + u16 bd_index0; + u32 feature_bitmap; + u32 feat_mask; + ethernet_header_t * h0; + u8 * l3h0; + u32 sw_if_index0; + u8 bvi_flg = 0; + +#define get_u32(addr) ( *((u32 *)(addr)) ) +#define get_u16(addr) ( *((u16 *)(addr)) ) +#define STATS_IF_LAYER2_UCAST_INPUT_CNT 0 +#define STATS_IF_LAYER2_MCAST_INPUT_CNT 1 +#define STATS_IF_LAYER2_BCAST_INPUT_CNT 2 + + // Check for from-BVI processing + // When we come from ethernet-input, TX is ~0 + if (PREDICT_FALSE (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)) { + // Set up for a from-bvi packet + bvi_to_l2 (vm, + msm->vnet_main, + cpu_index, + b0, + vnet_buffer(b0)->sw_if_index[VLIB_TX]); + bvi_flg = 1; + } + + // The RX interface can be changed by bvi_to_l2() + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + l3h0 = (u8 *)h0 + vnet_buffer(b0)->l2.l2_len; + + // Determine L3 packet type. Only need to check the common types. + // Used to filter out features that don't apply to common packets. + ethertype = clib_net_to_host_u16(get_u16(l3h0 - 2)); + if (ethertype == ETHERNET_TYPE_IP4) { + protocol = ((ip4_header_t *)l3h0)->protocol; + if ((protocol == IP_PROTOCOL_UDP) || + (protocol == IP_PROTOCOL_TCP)) { + feat_mask = IP_UDP_TCP_FEAT_MASK; + } else { + feat_mask = IP4_FEAT_MASK; + } + } else if (ethertype == ETHERNET_TYPE_IP6) { + protocol = ((ip6_header_t *)l3h0)->protocol; + // Don't bother checking for extension headers for now + if ((protocol == IP_PROTOCOL_UDP) || + (protocol == IP_PROTOCOL_TCP)) { + feat_mask = IP_UDP_TCP_FEAT_MASK; + } else { + feat_mask = IP6_FEAT_MASK; + } + } else if (ethertype == ETHERNET_TYPE_MPLS_UNICAST) { + feat_mask = IP6_FEAT_MASK; + } else { + // allow all features + feat_mask = ~0; + } + + // determine layer2 kind for stat and mask + mcast_dmac = ethernet_address_cast(h0->dst_address); + l2bcast = 0; + l2mcast = 0; + l2_stat_kind = STATS_IF_LAYER2_UCAST_INPUT_CNT; + if (PREDICT_FALSE (mcast_dmac)) { + u32 *dsthi = (u32 *) &h0->dst_address[0]; + u32 *dstlo = (u32 *) &h0->dst_address[2]; + + // Disable bridge forwarding (flooding will execute instead if not xconnect) + feat_mask &= ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD); + if (ethertype != ETHERNET_TYPE_ARP) // Disable ARP-term for non-ARP packet + feat_mask &= ~(L2INPUT_FEAT_ARP_TERM); + + // dest mac is multicast or broadcast + if ((*dstlo == 0xFFFFFFFF) && (*dsthi == 0xFFFFFFFF)) { + // dest mac == FF:FF:FF:FF:FF:FF + l2_stat_kind = STATS_IF_LAYER2_BCAST_INPUT_CNT; + l2bcast=1; + } else { + l2_stat_kind = STATS_IF_LAYER2_MCAST_INPUT_CNT; + l2mcast=1; + } + } + // TODO: take l2 stat + + // Get config for the input interface + config = vec_elt_at_index(msm->configs, sw_if_index0); + + // Save split horizon group, use 0 for BVI to make sure not dropped + vnet_buffer(b0)->l2.shg = bvi_flg ? 0 : config->shg; + + if (config->xconnect) { + // Set the output interface + vnet_buffer(b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index; + + } else { + + // Do bridge-domain processing + bd_index0 = config->bd_index; + // save BD ID for next feature graph nodes + vnet_buffer(b0)->l2.bd_index = bd_index0; + + // Get config for the bridge domain interface + bd_config = vec_elt_at_index(msm->bd_configs, bd_index0); + + // Process bridge domain feature enables. + // To perform learning/flooding/forwarding, the corresponding bit + // must be enabled in both the input interface config and in the + // bridge domain config. In the bd_bitmap, bits for features other + // than learning/flooding/forwarding should always be set. + feat_mask = feat_mask & bd_config->feature_bitmap; + } + + // mask out features from bitmap using packet type and bd config + feature_bitmap = config->feature_bitmap & feat_mask; + + // save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap; + + // Determine the next node + *next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap); +} + + +static uword +l2input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2input_next_t next_index; + l2input_main_t * msm = &l2input_main; + vlib_node_t *n = vlib_get_node (vm, l2input_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4 , * p5; + u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header and packet for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + + // Prefetch the input config for the N+1 loop iteration + // This depends on the buffer header above + sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_RX]; + CLIB_PREFETCH (&msm->configs[sw_if_index2], CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&msm->configs[sw_if_index3], CLIB_CACHE_LINE_BYTES, LOAD); + + // Don't bother prefetching the bridge-domain config (which + // depends on the input config above). Only a small number of + // bridge domains are expected. Plus the structure is small + // and several fit in a cache line. + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) { + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + if (b0->flags & VLIB_BUFFER_IS_TRACED) { + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + l2input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) { + ethernet_header_t * h1 = vlib_buffer_get_current (b1); + l2input_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + em->counters[node_counter_base_index + L2INPUT_ERROR_L2INPUT] += 2; + + classify_and_dispatch (vm, + node, + cpu_index, + msm, + b0, + &next0); + + classify_and_dispatch (vm, + node, + cpu_index, + msm, + b1, + &next1); + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + l2input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + em->counters[node_counter_base_index + L2INPUT_ERROR_L2INPUT] += 1; + + classify_and_dispatch (vm, + node, + cpu_index, + msm, + b0, + &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2input_node,static) = { + .function = l2input_node_fn, + .name = "l2-input", + .vector_size = sizeof (u32), + .format_trace = format_l2input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2input_error_strings), + .error_strings = l2input_error_strings, + + .n_next_nodes = L2INPUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2INPUT_NEXT_LEARN] = "l2-learn", + [L2INPUT_NEXT_FWD] = "l2-fwd", + [L2INPUT_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2input_init (vlib_main_t *vm) +{ + l2input_main_t * mp = &l2input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Get packets RX'd from L2 interfaces + ethernet_register_l2_input (vm, l2input_node.index); + + // Create the config vector + vec_validate(mp->configs, 100); + // create 100 sw interface entries and zero them + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2input_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2input_init); + + +// Get a pointer to the config for the given interface +l2_input_config_t * l2input_intf_config (u32 sw_if_index) +{ + l2input_main_t * mp = &l2input_main; + + vec_validate(mp->configs, sw_if_index); + return vec_elt_at_index(mp->configs, sw_if_index); +} + +// Enable (or disable) the feature in the bitmap for the given interface +u32 l2input_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, + u32 enable) +{ + l2input_main_t * mp = &l2input_main; + l2_input_config_t *config; + + vec_validate(mp->configs, sw_if_index); + config = vec_elt_at_index(mp->configs, sw_if_index); + + if (enable) { + config->feature_bitmap |= feature_bitmap; + } else { + config->feature_bitmap &= ~feature_bitmap; + } + + return config->feature_bitmap; +} + + + +// Set the subinterface to run in l2 or l3 mode. +// for L3 mode, just the sw_if_index is specified +// for bridged mode, the bd id and bvi flag are also specified +// for xconnect mode, the peer sw_if_index is also specified +// Return 0 if ok, or non-0 if there was an error + +u32 set_int_l2_mode (vlib_main_t * vm, + vnet_main_t * vnet_main, + u32 mode, + u32 sw_if_index, + u32 bd_index, // for bridged interface + u32 bvi, // the bridged interface is the BVI + u32 shg, // the bridged interface's split horizon group + u32 xc_sw_if_index) // peer interface for xconnect +{ + l2input_main_t * mp = &l2input_main; + vnet_main_t * vnm = vnet_get_main(); + vnet_hw_interface_t * hi; + l2_output_config_t * out_config; + l2_input_config_t * config; + l2_bridge_domain_t * bd_config; + l2_flood_member_t member; + u64 mac; + i32 l2_if_adjust = 0; + + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + + vec_validate(mp->configs, sw_if_index); + config = vec_elt_at_index(mp->configs, sw_if_index); + + if (config->bridge) { + // Interface is already in bridge mode. Undo the existing config. + bd_config = vec_elt_at_index(mp->bd_configs, config->bd_index); + + // remove interface from flood vector + bd_remove_member (bd_config, sw_if_index); + + // undo any BVI-related config + if (bd_config->bvi_sw_if_index == sw_if_index) { + bd_config->bvi_sw_if_index = ~0; + config->bvi = 0; + + // restore output node + hi->output_node_index = bd_config->saved_bvi_output_node_index; + + // delete the l2fib entry for the bvi interface + mac = *((u64 *)hi->hw_address); + l2fib_del_entry (mac, config->bd_index); + } + l2_if_adjust--; + } else if (config->xconnect) { + l2_if_adjust--; + } + + // Initialize the l2-input configuration for the interface + if (mode == MODE_L3) { + config->xconnect = 0; + config->bridge = 0; + config->shg = 0; + config->bd_index = 0; + config->feature_bitmap = L2INPUT_FEAT_DROP; + } else if (mode == MODE_L2_CLASSIFY) { + config->xconnect = 1; + config->bridge = 0; + config->output_sw_if_index = xc_sw_if_index; + + // Make sure last-chance drop is configured + config->feature_bitmap |= L2INPUT_FEAT_DROP | L2INPUT_FEAT_CLASSIFY; + + // Make sure bridging features are disabled + config->feature_bitmap &= + ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD); + shg = 0; // not used in xconnect + + // Insure all packets go to ethernet-input + ethernet_set_rx_redirect (vnet_main, hi, 1); + } else { + + if (mode == MODE_L2_BRIDGE) { + /* + * Remove a check that the interface must be an Ethernet. + * Specifically so we can bridge to L3 tunnel interfaces. + * Here's the check: + * if (hi->hw_class_index != ethernet_hw_interface_class.index) + * + */ + if (!hi) + return MODE_ERROR_ETH; // non-ethernet + + config->xconnect = 0; + config->bridge = 1; + config->bd_index = bd_index; + + // Enable forwarding, flooding, learning and ARP termination by default + // (note that ARP term is disabled on BD feature bitmap by default) + config->feature_bitmap |= L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD | + L2INPUT_FEAT_FLOOD | L2INPUT_FEAT_LEARN | L2INPUT_FEAT_ARP_TERM; + + // Make sure last-chance drop is configured + config->feature_bitmap |= L2INPUT_FEAT_DROP; + + // Make sure xconnect is disabled + config->feature_bitmap &= ~L2INPUT_FEAT_XCONNECT; + + // Set up bridge domain + vec_validate(mp->bd_configs, bd_index); + bd_config = vec_elt_at_index(mp->bd_configs, bd_index); + bd_validate (bd_config); + + // TODO: think: add l2fib entry even for non-bvi interface? + + // Do BVI interface initializations + if (bvi) { + // insure BD has no bvi interface (or replace that one with this??) + if (bd_config->bvi_sw_if_index != ~0) { + return MODE_ERROR_BVI_DEF; // bd already has a bvi interface + } + bd_config->bvi_sw_if_index = sw_if_index; + config->bvi = 1; + + // make BVI outputs go to l2-input + bd_config->saved_bvi_output_node_index = hi->output_node_index; + hi->output_node_index = l2input_node.index; + + // create the l2fib entry for the bvi interface + mac = *((u64 *)hi->hw_address); + l2fib_add_entry (mac, bd_index, sw_if_index, 1, 0, 1); // static + bvi + + // Disable learning by default. no use since l2fib entry is static. + config->feature_bitmap &= ~L2INPUT_FEAT_LEARN; + + // Add BVI to arp_input_next_index_by_hw_if_index table so arp-input + // node can send out ARP response via BVI to BD + ethernet_arp_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0); + + } + + // Add interface to bridge-domain flood vector + member.sw_if_index = sw_if_index; + member.flags = bvi ? L2_FLOOD_MEMBER_BVI : L2_FLOOD_MEMBER_NORMAL; + member.shg = shg; + bd_add_member (bd_config, &member); + + } else { + config->xconnect = 1; + config->bridge = 0; + config->output_sw_if_index = xc_sw_if_index; + + // Make sure last-chance drop is configured + config->feature_bitmap |= L2INPUT_FEAT_DROP; + + // Make sure bridging features are disabled + config->feature_bitmap &= ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD); + + config->feature_bitmap |= L2INPUT_FEAT_XCONNECT; + shg = 0; // not used in xconnect + } + + // set up split-horizon group + config->shg = shg; + out_config = l2output_intf_config (sw_if_index); + out_config->shg = shg; + + // Test: remove this when non-IP features can be configured. + // Enable a non-IP feature to test IP feature masking + // config->feature_bitmap |= L2INPUT_FEAT_CTRL_PKT; + + l2_if_adjust++; + } + + // Adjust count of L2 interfaces + hi->l2_if_count += l2_if_adjust; + + if (hi->hw_class_index == ethernet_hw_interface_class.index) { + if ((hi->l2_if_count == 1) && (l2_if_adjust == 1)) { + // Just added first L2 interface on this port + + // Set promiscuous mode on the l2 interface + ethernet_set_flags (vnet_main, hi->hw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + + // Insure all packets go to ethernet-input + ethernet_set_rx_redirect (vnet_main, hi, 1); + + } else if ((hi->l2_if_count == 0) && (l2_if_adjust == -1)) { + // Just removed only L2 subinterface on this port + + // Disable promiscuous mode on the l2 interface + ethernet_set_flags (vnet_main, hi->hw_if_index, 0); + + // Allow ip packets to go directly to ip4-input etc + ethernet_set_rx_redirect (vnet_main, hi, 0); + } + } + + // Set up the L2/L3 flag in the interface parsing tables + ethernet_sw_interface_set_l2_mode(vnm, sw_if_index, (mode!=MODE_L3)); + + return 0; +} + +// set subinterface in bridging mode with a bridge-domain ID +// The CLI format is: +// set interface l2 bridge <interface> <bd> [bvi] [split-horizon-group] +static clib_error_t * +int_l2_bridge (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 bd_index, bd_id; + u32 sw_if_index; + u32 bvi; + u32 rc; + u32 shg; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + bd_index = bd_find_or_add_bd_index (&bd_main, bd_id); + + // optional bvi + bvi = unformat (input, "bvi"); + + // optional split horizon group + shg = 0; + (void) unformat (input, "%d", &shg); + + // set the interface mode + if ((rc = set_int_l2_mode(vm, vnm, MODE_L2_BRIDGE, sw_if_index, bd_index, bvi, shg, 0))) { + if (rc == MODE_ERROR_ETH) { + error = clib_error_return (0, "bridged interface must be ethernet", + format_unformat_error, input); + } else if (rc == MODE_ERROR_BVI_DEF) { + error = clib_error_return (0, "bridge-domain already has a bvi interface", + format_unformat_error, input); + } else { + error = clib_error_return (0, "invalid configuration for interface", + format_unformat_error, input); + } + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = { + .path = "set interface l2 bridge", + .short_help = "set interface to L2 bridging mode in <bridge-domain ID> [bvi] [shg]", + .function = int_l2_bridge, +}; + +// set subinterface in xconnect mode with another interface +// The CLI format is: +// set interface l2 xconnect <interface> <peer interface> +static clib_error_t * +int_l2_xc (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 xc_sw_if_index; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &xc_sw_if_index)) + { + error = clib_error_return (0, "unknown peer interface `%U'", + format_unformat_error, input); + goto done; + } + + // set the interface mode + if (set_int_l2_mode(vm, vnm, MODE_L2_XC, sw_if_index, 0, 0, 0, xc_sw_if_index)) { + error = clib_error_return (0, "invalid configuration for interface", + format_unformat_error, input); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_xc_cli, static) = { + .path = "set interface l2 xconnect", + .short_help = "set interface to L2 cross-connect mode with <peer interface>", + .function = int_l2_xc, +}; + +// set subinterface in L3 mode +// The CLI format is: +// set interface l3 <interface> +static clib_error_t * +int_l3 (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + // set the interface mode + if (set_int_l2_mode(vm, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0)) { + error = clib_error_return (0, "invalid configuration for interface", + format_unformat_error, input); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l3_cli, static) = { + .path = "set interface l3", + .short_help = "set interface to L3 mode", + .function = int_l3, +}; + +// The CLI format is: +// show mode [<if-name1> <if-name2> ...] +static clib_error_t * +show_int_mode (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + char * mode; + u8 * args; + vnet_interface_main_t * im = &vnm->interface_main; + vnet_sw_interface_t * si, * sis = 0; + l2input_main_t * mp = &l2input_main; + l2_input_config_t * config; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + u32 sw_if_index; + + /* See if user wants to show specific interface */ + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + si = pool_elt_at_index (im->sw_interfaces, sw_if_index); + vec_add1 (sis, si[0]); + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + + } + + if (vec_len (sis) == 0) /* Get all interfaces */ + { + /* Gather interfaces. */ + sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); + _vec_len (sis) = 0; + pool_foreach (si, im->sw_interfaces, ({ vec_add1 (sis, si[0]); })); + } + + vec_foreach (si, sis) + { + vec_validate(mp->configs, si->sw_if_index); + config = vec_elt_at_index(mp->configs, si->sw_if_index); + if (config->bridge) { + u32 bd_id; + mode = "l2 bridge"; + bd_id = l2input_main.bd_configs[config->bd_index].bd_id; + + args = format (0, "bd_id %d%s%d", bd_id, + config->bvi ? " bvi shg " : " shg ", config->shg); + } else if (config->xconnect) { + mode = "l2 xconnect"; + args = format (0, "%U", + format_vnet_sw_if_index_name, + vnm, config->output_sw_if_index); + } else { + mode = "l3"; + args = format (0, " "); + } + vlib_cli_output (vm, "%s %U %v\n", + mode, + format_vnet_sw_if_index_name, + vnm, si->sw_if_index, + args); + vec_free (args); + } + +done: + vec_free (sis); + + return error; +} + +VLIB_CLI_COMMAND (show_l2_mode, static) = { + .path = "show mode", + .short_help = "show mode [<if-name1> <if-name2> ...]", + .function = show_int_mode, +}; + +#define foreach_l2_init_function \ +_(feat_bitmap_drop_init) \ +_(l2fib_init) \ +_(l2_classify_init) \ +_(l2bd_init) \ +_(l2fwd_init) \ +_(l2_inacl_init) \ +_(l2input_init) \ +_(l2_vtr_init) \ +_(l2_invtr_init) \ +_(l2_efp_filter_init) \ +_(l2learn_init) \ +_(l2flood_init) \ +_(l2_outacl_init) \ +_(l2output_init) \ +_(l2_patch_init) \ +_(l2_xcrw_init) + +clib_error_t *l2_init (vlib_main_t * vm) +{ + clib_error_t * error; + +#define _(a) do { \ + if ((error = vlib_call_init_function (vm, a))) return error; } \ +while (0); + foreach_l2_init_function; +#undef _ + return 0; +} + +VLIB_INIT_FUNCTION (l2_init); diff --git a/vnet/vnet/l2/l2_input.h b/vnet/vnet/l2/l2_input.h new file mode 100644 index 00000000000..e650162b593 --- /dev/null +++ b/vnet/vnet/l2/l2_input.h @@ -0,0 +1,279 @@ +/* + * l2_input.h : layer 2 input packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_input_h +#define included_vnet_l2_input_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/l2_bd.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip.h> + +// Per-subinterface L2 feature configuration + +typedef struct { + + union { + u16 bd_index; // bridge domain id + u32 output_sw_if_index; // for xconnect + }; + + // Interface mode. If both are 0, this interface is in L3 mode + u8 xconnect; + u8 bridge; + + // this is the bvi interface for the bridge-domain + u8 bvi; + + // config for which input features are configured on this interface + u32 feature_bitmap; + + // some of these flags are also in the feature bitmap + u8 learn_enable; + u8 fwd_enable; + u8 flood_enable; + + // split horizon group + u8 shg; + +} l2_input_config_t; + + +typedef struct { + + // Next nodes for the feature bitmap + u32 feat_next_node_index[32]; + + /* config vector indexed by sw_if_index */ + l2_input_config_t *configs; + + /* bridge domain config vector indexed by BD ID */ + l2_bridge_domain_t *bd_configs; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2input_main_t; + +extern l2input_main_t l2input_main; + +static_always_inline l2_bridge_domain_t * +l2input_bd_config_from_index (l2input_main_t * l2im, u32 bd_index) +{ + l2_bridge_domain_t * bd_config; + + bd_config = vec_elt_at_index (l2im->bd_configs, bd_index); + return bd_is_valid (bd_config) ? bd_config : NULL; +} + +// L2 input features + +// Mappings from feature ID to graph node name +#define foreach_l2input_feat \ + _(DROP, "feature-bitmap-drop") \ + _(CLASSIFY, "l2-classify") \ + _(XCONNECT, "l2-output") \ + _(IPIW, "feature-bitmap-drop") \ + _(FLOOD, "l2-flood") \ + _(ARP_TERM, "arp-term-l2bd") \ + _(UU_FLOOD, "l2-flood") \ + _(FWD, "l2-fwd") \ + _(LEARN, "l2-learn") \ + _(VTR, "l2-input-vtr") \ + _(VPATH, "vpath-input-l2") \ + _(CTRL_PKT, "feature-bitmap-drop") \ + _(L2PT, "feature-bitmap-drop") \ + _(IGMP_SNOOP, "feature-bitmap-drop") \ + _(MLD_SNOOP, "feature-bitmap-drop") \ + _(DHCP_SNOOP, "feature-bitmap-drop") \ + _(DAI, "feature-bitmap-drop") \ + _(IPSG, "feature-bitmap-drop") \ + _(ACL, "l2-input-acl") \ + _(QOS, "feature-bitmap-drop") \ + _(CFM, "feature-bitmap-drop") \ + _(SPAN, "feature-bitmap-drop") + +// Feature bitmap positions +typedef enum { +#define _(sym,str) L2INPUT_FEAT_##sym##_BIT, + foreach_l2input_feat +#undef _ + L2INPUT_N_FEAT, +} l2input_feat_t; + +// Feature bit masks +typedef enum { +#define _(sym,str) L2INPUT_FEAT_##sym = (1<<L2INPUT_FEAT_##sym##_BIT), + foreach_l2input_feat +#undef _ +} l2input_feat_masks_t; + +// Return an array of strings containing graph node names of each feature +char **l2input_get_feat_names(void); + + +static_always_inline u8 bd_feature_flood (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD) == + L2INPUT_FEAT_FLOOD); +} + +static_always_inline u8 bd_feature_uu_flood (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD) == + L2INPUT_FEAT_UU_FLOOD); +} + +static_always_inline u8 bd_feature_forward (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_FWD) == + L2INPUT_FEAT_FWD); +} + +static_always_inline u8 bd_feature_learn (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_LEARN) == + L2INPUT_FEAT_LEARN); +} + +static_always_inline u8 bd_feature_arp_term (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM) == + L2INPUT_FEAT_ARP_TERM); +} + +// Masks for eliminating features that do not apply to a packet + +#define IP4_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \ + L2INPUT_FEAT_MLD_SNOOP | \ + L2INPUT_FEAT_L2PT | \ + L2INPUT_FEAT_CFM | \ + L2INPUT_FEAT_DAI) + +#define IP6_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \ + L2INPUT_FEAT_IGMP_SNOOP | \ + L2INPUT_FEAT_L2PT | \ + L2INPUT_FEAT_CFM | \ + L2INPUT_FEAT_DAI) + +#define IP_UDP_TCP_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \ + L2INPUT_FEAT_L2PT | \ + L2INPUT_FEAT_IGMP_SNOOP | \ + L2INPUT_FEAT_MLD_SNOOP | \ + L2INPUT_FEAT_DHCP_SNOOP | \ + L2INPUT_FEAT_CFM | \ + L2INPUT_FEAT_DAI) + +#define MPLS_FEAT_MASK ~(L2INPUT_FEAT_CTRL_PKT | \ + L2INPUT_FEAT_L2PT | \ + L2INPUT_FEAT_IGMP_SNOOP | \ + L2INPUT_FEAT_MLD_SNOOP | \ + L2INPUT_FEAT_DHCP_SNOOP | \ + L2INPUT_FEAT_CFM | \ + L2INPUT_FEAT_DAI) + + +// Get a pointer to the config for the given interface +l2_input_config_t * l2input_intf_config (u32 sw_if_index); + +// Enable (or disable) the feature in the bitmap for the given interface +u32 l2input_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, + u32 enable); + + +#define MODE_L3 0 +#define MODE_L2_BRIDGE 1 +#define MODE_L2_XC 2 +#define MODE_L2_CLASSIFY 3 + +#define MODE_ERROR_ETH 1 +#define MODE_ERROR_BVI_DEF 2 + +u32 set_int_l2_mode (vlib_main_t * vm, + vnet_main_t * vnet_main, + u32 mode, + u32 sw_if_index, + u32 bd_index, + u32 bvi, + u32 shg, + u32 xc_sw_if_index); + +static inline void +vnet_update_l2_len (vlib_buffer_t * b) +{ + ethernet_header_t * eth; + u16 ethertype; + + /* point at currrent l2 hdr */ + eth = vlib_buffer_get_current (b); + + /* + * l2-output pays no attention to this + * but the tag push/pop code on an l2 subif needs it. + * + * Determine l2 header len, check for up to 2 vlans + */ + vnet_buffer(b)->l2.l2_len = sizeof(ethernet_header_t); + ethertype = clib_net_to_host_u16(eth->type); + if ((ethertype == ETHERNET_TYPE_VLAN) || + (ethertype == ETHERNET_TYPE_DOT1AD) || + (ethertype == ETHERNET_TYPE_VLAN_9100) || + (ethertype == ETHERNET_TYPE_VLAN_9200)) { + ethernet_vlan_header_t * vlan; + vnet_buffer(b)->l2.l2_len += sizeof (*vlan); + vlan = (void *) (eth+1); + ethertype = clib_net_to_host_u16 (vlan->type); + if (ethertype == ETHERNET_TYPE_VLAN) { + vnet_buffer(b)->l2.l2_len += sizeof (*vlan); + } + } +} + +/* + * Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet + * is ip4 or ip6. Otherwise hash on smac/dmac/etype. + * The vlib buffer current pointer is expected to be at ethernet header + * and vnet l2.l2_len is exppected to be setup already. + */ +static inline u32 vnet_l2_compute_flow_hash (vlib_buffer_t *b) +{ + ethernet_header_t * eh = vlib_buffer_get_current(b); + u8 * l3h = (u8 *)eh + vnet_buffer(b)->l2.l2_len; + u16 ethertype = clib_net_to_host_u16(*(u16 *)(l3h - 2)); + + if (ethertype == ETHERNET_TYPE_IP4) + return ip4_compute_flow_hash((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT); + else if (ethertype == ETHERNET_TYPE_IP6) + return ip6_compute_flow_hash((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT); + else + { + u32 a, b, c; + u32 * ap = (u32 *) &eh->dst_address[2]; + u32 * bp = (u32 *) &eh->src_address[2]; + a = * ap; + b = * bp; + c = ethertype; + hash_v3_mix32 (a, b, c); + hash_v3_finalize32 (a, b, c); + return c; + } +} + +#endif + diff --git a/vnet/vnet/l2/l2_input_acl.c b/vnet/vnet/l2/l2_input_acl.c new file mode 100644 index 00000000000..77fa8944e9f --- /dev/null +++ b/vnet/vnet/l2/l2_input_acl.c @@ -0,0 +1,427 @@ +/* + * l2_input_acl.c : layer 2 input acl processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#include <vnet/classify/vnet_classify.h> +#include <vnet/classify/input_acl.h> + +typedef struct { + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_inacl_main_t; + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 table_index; + u32 offset; +} l2_inacl_trace_t; + +/* packet trace format function */ +static u8 * format_l2_inacl_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_inacl_trace_t * t = va_arg (*args, l2_inacl_trace_t *); + + s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d", + t->sw_if_index, t->next_index, t->table_index, t->offset); + return s; +} + +l2_inacl_main_t l2_inacl_main; + +static vlib_node_registration_t l2_inacl_node; + +#define foreach_l2_inacl_error \ +_(NONE, "valid input ACL packets") \ +_(MISS, "input ACL misses") \ +_(HIT, "input ACL hits") \ +_(CHAIN_HIT, "input ACL hits after chain walk") \ +_(TABLE_MISS, "input ACL table-miss drops") \ +_(SESSION_DENY, "input ACL session deny drops") + + +typedef enum { +#define _(sym,str) L2_INACL_ERROR_##sym, + foreach_l2_inacl_error +#undef _ + L2_INACL_N_ERROR, +} l2_inacl_error_t; + +static char * l2_inacl_error_strings[] = { +#define _(sym,string) string, + foreach_l2_inacl_error +#undef _ +}; + +static uword +l2_inacl_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + acl_next_index_t next_index; + l2_inacl_main_t * msm = &l2_inacl_main; + input_acl_main_t * am = &input_acl_main; + vnet_classify_main_t * vcm = am->vnet_classify_main; + input_acl_table_id_t tid = INPUT_ACL_TABLE_L2; + f64 now = vlib_time_now (vm); + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + /* First pass: compute hashes */ + while (n_left_from > 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, bi1; + u8 * h0, * h1; + u32 sw_if_index0, sw_if_index1; + u32 table_index0, table_index1; + vnet_classify_table_t * t0, * t1; + + /* prefetch next iteration */ + { + vlib_buffer_t * p1, * p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = b1->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + table_index1 = am->classify_table_index_by_sw_if_index[tid][sw_if_index1]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + vnet_buffer(b1)->l2_classify.hash = + vnet_classify_hash_packet (t1, (u8 *) h1); + + vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + + vnet_buffer(b1)->l2_classify.table_index = table_index1; + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t * b0; + u32 bi0; + u8 * h0; + u32 sw_if_index0; + u32 table_index0; + vnet_classify_table_t * t0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = ACL_NEXT_INDEX_DENY; + u32 table_index0; + vnet_classify_table_t * t0; + vnet_classify_entry_t * e0; + u64 hash0; + u8 * h0; + u8 error0; + + /* Stride 3 seems to work best */ + if (PREDICT_TRUE (n_left_from > 3)) + { + vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]); + vnet_classify_table_t * tp1; + u32 table_index1; + u64 phash1; + + table_index1 = vnet_buffer(p1)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index1 != ~0)) + { + tp1 = pool_elt_at_index (vcm->tables, table_index1); + phash1 = vnet_buffer(p1)->l2_classify.hash; + vnet_classify_prefetch_entry (tp1, phash1); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + table_index0 = vnet_buffer(b0)->l2_classify.table_index; + e0 = 0; + t0 = 0; + + /* Feature bitmap update */ + vnet_buffer(b0)->l2.feature_bitmap &= ~L2INPUT_FEAT_ACL; + + /* Determine the next node */ + next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + vnet_buffer(b0)->l2.feature_bitmap); + + if (PREDICT_TRUE(table_index0 != ~0)) + { + hash0 = vnet_buffer(b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, + now); + if (e0) + { + vlib_buffer_advance (b0, e0->advance); + + next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)? + e0->next_index:next0; + + hits++; + + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + L2_INACL_ERROR_SESSION_DENY:L2_INACL_ERROR_NONE; + b0->error = node->errors[error0]; + } + else + { + while (1) + { + if (PREDICT_TRUE(t0->next_table_index != ~0)) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < ACL_NEXT_INDEX_N_NEXT)? + t0->miss_next_index:next0; + + misses++; + + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + L2_INACL_ERROR_TABLE_MISS:L2_INACL_ERROR_NONE; + b0->error = node->errors[error0]; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry + (t0, (u8 *) h0, hash0, now); + if (e0) + { + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)? + e0->next_index:next0; + hits++; + chain_hits++; + + error0 = (next0 == ACL_NEXT_INDEX_DENY)? + L2_INACL_ERROR_SESSION_DENY:L2_INACL_ERROR_NONE; + b0->error = node->errors[error0]; + break; + } + } + } + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_inacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + t->table_index = t0 ? t0 - vcm->tables : ~0; + t->offset = e0 ? vnet_classify_get_offset (t0, e0): ~0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + L2_INACL_ERROR_MISS, + misses); + vlib_node_increment_counter (vm, node->node_index, + L2_INACL_ERROR_HIT, + hits); + vlib_node_increment_counter (vm, node->node_index, + L2_INACL_ERROR_CHAIN_HIT, + chain_hits); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2_inacl_node,static) = { + .function = l2_inacl_node_fn, + .name = "l2-input-acl", + .vector_size = sizeof (u32), + .format_trace = format_l2_inacl_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_inacl_error_strings), + .error_strings = l2_inacl_error_strings, + + .n_next_nodes = ACL_NEXT_INDEX_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [ACL_NEXT_INDEX_DENY] = "error-drop", + }, +}; + +clib_error_t *l2_inacl_init (vlib_main_t *vm) +{ + l2_inacl_main_t * mp = &l2_inacl_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_inacl_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_inacl_init); + + +// set subinterface inacl enable/disable +// The CLI format is: +// set interface acl input <interface> [disable] +static clib_error_t * +int_l2_inacl (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_ACL, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_inacl_cli, static) = { + .path = "set interface acl input", + .short_help = "set interface acl input <interface> [disable]", + .function = int_l2_inacl, +}; diff --git a/vnet/vnet/l2/l2_input_vtr.c b/vnet/vnet/l2/l2_input_vtr.c new file mode 100644 index 00000000000..d07a0287d04 --- /dev/null +++ b/vnet/vnet/l2/l2_input_vtr.c @@ -0,0 +1,314 @@ +/* + * l2_input_vtr.c : layer 2 input vlan tag rewrite processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/l2/l2_input_vtr.h> +#include <vnet/l2/l2_output.h> + +#include <vppinfra/error.h> +#include <vppinfra/cache.h> + + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u8 raw[12]; // raw data (vlans) + u32 sw_if_index; +} l2_invtr_trace_t; + +/* packet trace format function */ +static u8 * format_l2_invtr_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_invtr_trace_t * t = va_arg (*args, l2_invtr_trace_t *); + + s = format (s, "l2-input-vtr: sw_if_index %d dst %U src %U data " + "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4], t->raw[5], + t->raw[6], t->raw[7], t->raw[8], t->raw[9], t->raw[10], t->raw[11]); + return s; +} + +l2_invtr_main_t l2_invtr_main; + +static vlib_node_registration_t l2_invtr_node; + +#define foreach_l2_invtr_error \ +_(L2_INVTR, "L2 inverter packets") \ +_(DROP, "L2 input tag rewrite drops") + +typedef enum { +#define _(sym,str) L2_INVTR_ERROR_##sym, + foreach_l2_invtr_error +#undef _ + L2_INVTR_N_ERROR, +} l2_invtr_error_t; + +static char * l2_invtr_error_strings[] = { +#define _(sym,string) string, + foreach_l2_invtr_error +#undef _ +}; + +typedef enum { + L2_INVTR_NEXT_DROP, + L2_INVTR_N_NEXT, +} l2_invtr_next_t; + + +static uword +l2_invtr_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_invtr_next_t next_index; + l2_invtr_main_t * msm = &l2_invtr_main; + // vlib_node_t *n = vlib_get_node (vm, l2_invtr_node.index); + // u32 node_counter_base_index = n->error_heap_index; + // vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + u32 feature_bitmap0, feature_bitmap1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4, * p5; + u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header and packet for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + + // Prefetch the input config for the N+1 loop iteration + // This depends on the buffer header above + sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_RX]; + CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index2), CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index3), CLIB_CACHE_LINE_BYTES, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + // process 2 packets + // em->counters[node_counter_base_index + L2_INVTR_ERROR_L2_INVTR] += 2; + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR; + feature_bitmap1 = vnet_buffer(b1)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR; + + // save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap0; + vnet_buffer(b1)->l2.feature_bitmap = feature_bitmap1; + + // Determine the next node + next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap0); + next1 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap1); + + // perform the tag rewrite on two packets + if (l2_vtr_process(b0, &(vec_elt_at_index(l2output_main.configs, sw_if_index0)->input_vtr))) { + // Drop packet + next0 = L2_INVTR_NEXT_DROP; + b0->error = node->errors[L2_INVTR_ERROR_DROP]; + } + if (l2_vtr_process(b1, &(vec_elt_at_index(l2output_main.configs, sw_if_index1)->input_vtr))) { + // Drop packet + next1 = L2_INVTR_NEXT_DROP; + b1->error = node->errors[L2_INVTR_ERROR_DROP]; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) { + if (b0->flags & VLIB_BUFFER_IS_TRACED) { + l2_invtr_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + memcpy(t->raw, &h0->type, sizeof(t->raw)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) { + l2_invtr_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + ethernet_header_t * h1 = vlib_buffer_get_current (b1); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + memcpy(t->raw, &h1->type, sizeof(t->raw)); + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + u32 feature_bitmap0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + // process 1 packet + // em->counters[node_counter_base_index + L2_INVTR_ERROR_L2_INVTR] += 1; + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2INPUT_FEAT_VTR; + + // save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap0; + + // Determine the next node + next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap0); + + // perform the tag rewrite on one packet + if (l2_vtr_process(b0, &(vec_elt_at_index(l2output_main.configs, sw_if_index0)->input_vtr))) { + // Drop packet + next0 = L2_INVTR_NEXT_DROP; + b0->error = node->errors[L2_INVTR_ERROR_DROP]; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2_invtr_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + ethernet_header_t * h0 = vlib_buffer_get_current (b0); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + memcpy(t->raw, &h0->type, sizeof(t->raw)); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2_invtr_node,static) = { + .function = l2_invtr_node_fn, + .name = "l2-input-vtr", + .vector_size = sizeof (u32), + .format_trace = format_l2_invtr_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_invtr_error_strings), + .error_strings = l2_invtr_error_strings, + + .n_next_nodes = L2_INVTR_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_INVTR_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2_invtr_init (vlib_main_t *vm) +{ + l2_invtr_main_t * mp = &l2_invtr_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_invtr_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_invtr_init); + diff --git a/vnet/vnet/l2/l2_input_vtr.h b/vnet/vnet/l2/l2_input_vtr.h new file mode 100644 index 00000000000..57c8e409dea --- /dev/null +++ b/vnet/vnet/l2/l2_input_vtr.h @@ -0,0 +1,43 @@ +/* + * l2_input_vtr.h : layer 2 input vlan tag rewrite processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_input_vtr_h +#define included_vnet_l2_input_vtr_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> + + +typedef struct { + + // The input vtr data is located in l2_output_config_t because + // the same config data is used for the egress EFP Filter check. + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_invtr_main_t; + +extern l2_invtr_main_t l2_invtr_main; + +#endif // included_vnet_l2_input_vtr_h + diff --git a/vnet/vnet/l2/l2_learn.c b/vnet/vnet/l2/l2_learn.c new file mode 100644 index 00000000000..29315bedc98 --- /dev/null +++ b/vnet/vnet/l2/l2_learn.c @@ -0,0 +1,504 @@ +/* + * l2_learn.c : layer 2 learning using l2fib + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_learn.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> + +/* + * Ethernet bridge learning + * + * Populate the mac table with entries mapping the packet's source mac + bridge + * domain ID to the input sw_if_index. + * + * Note that learning and forwarding are separate graph nodes. This means that + * for a set of packets, all learning is performed first, then all nodes are + * forwarded. The forwarding is done based on the end-state of the mac table, + * instead of the state after each packet. Thus the forwarding results could + * differ in certain cases (mac move tests), but this not expected to cause + * problems in real-world networks. It is much simpler to separate learning + * and forwarding into separate nodes. + */ + + +typedef struct { + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u16 bd_index; +} l2learn_trace_t; + + +/* packet trace format function */ +static u8 * format_l2learn_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2learn_trace_t * t = va_arg (*args, l2learn_trace_t *); + + s = format (s, "l2-learn: sw_if_index %d dst %U src %U bd_index %d", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->bd_index); + return s; +} + +static vlib_node_registration_t l2learn_node; + +#define foreach_l2learn_error \ +_(L2LEARN, "L2 learn packets") \ +_(MISS, "L2 learn misses") \ +_(MAC_MOVE, "L2 mac moves") \ +_(MAC_MOVE_VIOLATE, "L2 mac move violations") \ +_(LIMIT, "L2 not learned due to limit") \ +_(HIT, "L2 learn hits") \ +_(FILTER_DROP, "L2 filter mac drops") + +typedef enum { +#define _(sym,str) L2LEARN_ERROR_##sym, + foreach_l2learn_error +#undef _ + L2LEARN_N_ERROR, +} l2learn_error_t; + +static char * l2learn_error_strings[] = { +#define _(sym,string) string, + foreach_l2learn_error +#undef _ +}; + +typedef enum { + L2LEARN_NEXT_L2FWD, + L2LEARN_NEXT_DROP, + L2LEARN_N_NEXT, +} l2learn_next_t; + + +// Perform learning on one packet based on the mac table lookup result + +static_always_inline void +l2learn_process (vlib_node_runtime_t * node, + l2learn_main_t * msm, + u64 * counter_base, + vlib_buffer_t * b0, + u32 sw_if_index0, + l2fib_entry_key_t * key0, + l2fib_entry_key_t * cached_key, + u32 * bucket0, + l2fib_entry_result_t * result0, + u32 * next0) +{ + u32 feature_bitmap; + + // Set up the default next node (typically L2FWD) + + // Remove ourself from the feature bitmap + feature_bitmap = vnet_buffer(b0)->l2.feature_bitmap & ~L2INPUT_FEAT_LEARN; + + // Save for next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap; + + // Determine the next node + *next0 = feat_bitmap_get_next_node_index(msm->feat_next_node_index, + feature_bitmap); + + // Check mac table lookup result + + if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0)) { + // The entry was in the table, and the sw_if_index matched, the normal case + + // TODO: for dataplane learning and aging, do this: + // if refresh=0 and not a static mac, set refresh=1 + counter_base[L2LEARN_ERROR_HIT] += 1; + + } else if (result0->raw == ~0) { + + // The entry was not in table, so add it + + counter_base[L2LEARN_ERROR_MISS] += 1; + + if (msm->global_learn_count == msm->global_learn_limit) { + // Global limit reached. Do not learn the mac but forward the packet. + // In the future, limits could also be per-interface or bridge-domain. + counter_base[L2LEARN_ERROR_LIMIT] += 1; + goto done; + + } else { + BVT(clib_bihash_kv) kv; + // It is ok to learn + + result0->raw = 0; // clear all fields + result0->fields.sw_if_index = sw_if_index0; + // TODO: set timestamp in entry to clock for dataplane aging + kv.key = key0->raw; + kv.value = result0->raw; + + BV(clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */); + + cached_key->raw = ~0; // invalidate the cache + msm->global_learn_count++; + } + + } else { + + // The entry was in the table, but with the wrong sw_if_index mapping (mac move) + counter_base[L2LEARN_ERROR_MAC_MOVE] += 1; + + if (result0->fields.static_mac) { + // Don't overwrite a static mac + // TODO: Check violation policy. For now drop the packet + b0->error = node->errors[L2LEARN_ERROR_MAC_MOVE_VIOLATE]; + *next0 = L2LEARN_NEXT_DROP; + } else { + // Update the entry + // TODO: may want to rate limit mac moves + // TODO: check global/bridge domain/interface learn limits + BVT(clib_bihash_kv) kv; + + result0->raw = 0; // clear all fields + result0->fields.sw_if_index = sw_if_index0; + + kv.key = key0->raw; + kv.value = result0->raw; + + cached_key->raw = ~0; // invalidate the cache + + BV(clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */); + } + } + + if (result0->fields.filter) { + // drop packet because lookup matched a filter mac entry + + if (*next0 != L2LEARN_NEXT_DROP) { + // if we're not already dropping the packet, do it now + b0->error = node->errors[L2LEARN_ERROR_FILTER_DROP]; + *next0 = L2LEARN_NEXT_DROP; + } + } + +done: + return; +} + + +static uword +l2learn_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2learn_next_t next_index; + l2learn_main_t * msm = &l2learn_main; + vlib_node_t *n = vlib_get_node (vm, l2learn_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + l2fib_entry_key_t cached_key; + l2fib_entry_result_t cached_result; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + // Clear the one-entry cache in case mac table was updated + cached_key.raw = ~0; + cached_result.raw = ~0; /* warning be gone */ + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + l2fib_entry_key_t key0, key1; + l2fib_entry_result_t result0, result1; + u32 bucket0, bucket1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + /* Process 2 x pkts */ + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->bd_index = vnet_buffer(b1)->l2.bd_index; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + /* process 2 pkts */ + em->counters[node_counter_base_index + L2LEARN_ERROR_L2LEARN] += 2; + + l2fib_lookup_2 (msm->mac_table, &cached_key, &cached_result, + h0->src_address, + h1->src_address, + vnet_buffer(b0)->l2.bd_index, + vnet_buffer(b1)->l2.bd_index, + &key0, + &key1, + &bucket0, + &bucket1, + &result0, + &result1); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &cached_key, + &bucket0, &result0, &next0); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b1, sw_if_index1, &key1, &cached_key, + &bucket1, &result1, &next1); + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + l2fib_entry_key_t key0; + l2fib_entry_result_t result0; + u32 bucket0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer(b0)->l2.bd_index; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + /* process 1 pkt */ + em->counters[node_counter_base_index + L2LEARN_ERROR_L2LEARN] += 1; + + l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result, + h0->src_address, vnet_buffer(b0)->l2.bd_index, + &key0, + &bucket0, + &result0); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &cached_key, + &bucket0, &result0, &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2learn_node,static) = { + .function = l2learn_node_fn, + .name = "l2-learn", + .vector_size = sizeof (u32), + .format_trace = format_l2learn_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2learn_error_strings), + .error_strings = l2learn_error_strings, + + .n_next_nodes = L2LEARN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2LEARN_NEXT_DROP] = "error-drop", + [L2LEARN_NEXT_L2FWD] = "l2-fwd", + }, +}; + + +clib_error_t *l2learn_init (vlib_main_t *vm) +{ + l2learn_main_t * mp = &l2learn_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2learn_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names(), + mp->feat_next_node_index); + + /* init the hash table ptr */ + mp->mac_table = get_mac_table(); + + // Set the default number of dynamically learned macs to the number + // of buckets. + mp->global_learn_limit = L2FIB_NUM_BUCKETS * 16; + + return 0; +} + +VLIB_INIT_FUNCTION (l2learn_init); + + +// set subinterface learn enable/disable +// The CLI format is: +// set interface l2 learn <interface> [disable] +static clib_error_t * +int_learn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + l2input_intf_bitmap_enable(sw_if_index, L2INPUT_FEAT_LEARN, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (int_learn_cli, static) = { + .path = "set interface l2 learn", + .short_help = "set interface l2 learn <interface> [disable]", + .function = int_learn, +}; + + +static clib_error_t * +l2learn_config (vlib_main_t * vm, unformat_input_t * input) +{ + l2learn_main_t *mp = &l2learn_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "limit %d", &mp->global_learn_limit)) + ; + + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + return 0; +} + +VLIB_CONFIG_FUNCTION (l2learn_config, "l2learn"); + diff --git a/vnet/vnet/l2/l2_learn.h b/vnet/vnet/l2/l2_learn.h new file mode 100644 index 00000000000..25674858fc9 --- /dev/null +++ b/vnet/vnet/l2/l2_learn.h @@ -0,0 +1,47 @@ +/* + * l2_learn.c : layer 2 learning using l2fib + * + * Copyright (c) 2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2learn_h +#define included_l2learn_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + + +typedef struct { + + // Hash table + BVT(clib_bihash) *mac_table; + + // number of dynamically learned mac entries + u32 global_learn_count; + + // maximum number of dynamically learned mac entries + u32 global_learn_limit; + + // Next nodes for each feature + u32 feat_next_node_index[32]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2learn_main_t; + + +l2learn_main_t l2learn_main; + +#endif diff --git a/vnet/vnet/l2/l2_output.c b/vnet/vnet/l2/l2_output.c new file mode 100644 index 00000000000..72c3d0374e3 --- /dev/null +++ b/vnet/vnet/l2/l2_output.c @@ -0,0 +1,541 @@ +/* + * l2_output.c : layer 2 output packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_output.h> + + +// Feature graph node names +static char * l2output_feat_names[] = { +#define _(sym,name) name, + foreach_l2output_feat +#undef _ +}; + +char **l2output_get_feat_names(void) { + return l2output_feat_names; +} + +l2output_main_t l2output_main; + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; +} l2output_trace_t; + +/* packet trace format function */ +static u8 * format_l2output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2output_trace_t * t = va_arg (*args, l2output_trace_t *); + + s = format (s, "l2-output: sw_if_index %d dst %U src %U", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src); + return s; +} + + +#define foreach_l2output_error \ +_(L2OUTPUT, "L2 output packets") \ +_(EFP_DROP, "L2 EFP filter pre-rewrite drops") \ +_(VTR_DROP, "L2 output tag rewrite drops") \ +_(SHG_DROP, "L2 split horizon drops") \ +_(DROP, "L2 output drops") + +typedef enum { +#define _(sym,str) L2OUTPUT_ERROR_##sym, + foreach_l2output_error +#undef _ + L2OUTPUT_N_ERROR, +} l2output_error_t; + +static char * l2output_error_strings[] = { +#define _(sym,string) string, + foreach_l2output_error +#undef _ +}; + +typedef enum { + L2OUTPUT_NEXT_DROP, + L2OUTPUT_N_NEXT, +} l2output_next_t; + +// Return 0 if split horizon check passes, otherwise return non-zero +// Packets should not be transmitted out an interface with the same +// split-horizon group as the input interface, except if the shg is 0 +// in which case the check always passes. +static_always_inline u32 +split_horizon_violation (u8 shg1, u8 shg2) +{ + if (PREDICT_TRUE (shg1 == 0)) { + return 0; + } else { + return shg1 == shg2; + } +} + + +static uword +l2output_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2output_next_t next_index; + l2output_main_t * msm = &l2output_main; + vlib_node_t *n = vlib_get_node (vm, l2output_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + u32 cached_sw_if_index; + u32 cached_next_index; + + /* Invalidate cache */ + cached_sw_if_index = ~0; + cached_next_index = ~0; /* warning be gone */ + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + l2_output_config_t * config0, * config1; + u32 feature_bitmap0, feature_bitmap1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, * p4 , * p5; + u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + // Prefetch the buffer header for the N+2 loop iteration + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + // Note: no need to prefetch packet data. This node doesn't reference it. + + // Prefetch the input config for the N+1 loop iteration + // This depends on the buffer header above + sw_if_index2 = vnet_buffer(p2)->sw_if_index[VLIB_TX]; + sw_if_index3 = vnet_buffer(p3)->sw_if_index[VLIB_TX]; + CLIB_PREFETCH (&msm->configs[sw_if_index2], CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&msm->configs[sw_if_index3], CLIB_CACHE_LINE_BYTES, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* TX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2output_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2output_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + em->counters[node_counter_base_index + L2OUTPUT_ERROR_L2OUTPUT] += 2; + + // Get config for the output interface + config0 = vec_elt_at_index(msm->configs, sw_if_index0); + config1 = vec_elt_at_index(msm->configs, sw_if_index1); + + // Get features from the config + // TODO: mask out any non-applicable features + feature_bitmap0 = config0->feature_bitmap; + feature_bitmap1 = config1->feature_bitmap; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2output_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2output_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b1, + sw_if_index1, + feature_bitmap1, + &next1); + + // Perform output vlan tag rewrite and the pre-vtr EFP filter check. + // The EFP Filter only needs to be run if there is an output VTR + // configured. The flag for the post-vtr EFP Filter node is used + // to trigger the pre-vtr check as well. + + if (PREDICT_FALSE (config0->output_vtr.push_and_pop_bytes)) { + // Perform pre-vtr EFP filter check if configured + u32 failed1 = (feature_bitmap0 & L2OUTPUT_FEAT_EFP_FILTER) && + (l2_efp_filter_process(b0, &(config0->input_vtr))); + u32 failed2 = l2_vtr_process(b0, &(config0->output_vtr)); + + if (PREDICT_FALSE (failed1 | failed2)) { + next0 = L2OUTPUT_NEXT_DROP; + if (failed2) { + b0->error = node->errors[L2OUTPUT_ERROR_VTR_DROP]; + } + if (failed1) { + b0->error = node->errors[L2OUTPUT_ERROR_EFP_DROP]; + } + } + } + + if (PREDICT_FALSE (config1->output_vtr.push_and_pop_bytes)) { + // Perform pre-vtr EFP filter check if configured + u32 failed1 = (feature_bitmap1 & L2OUTPUT_FEAT_EFP_FILTER) && + (l2_efp_filter_process(b1, &(config1->input_vtr))); + u32 failed2 = l2_vtr_process(b1, &(config1->output_vtr)); + + if (PREDICT_FALSE (failed1 | failed2)) { + next1 = L2OUTPUT_NEXT_DROP; + if (failed2) { + b1->error = node->errors[L2OUTPUT_ERROR_VTR_DROP]; + } + if (failed1) { + b1->error = node->errors[L2OUTPUT_ERROR_EFP_DROP]; + } + } + } + + // Perform the split horizon check + // The check can only fail for non-zero shg's + if (PREDICT_FALSE (config0->shg + config1->shg)) { + // one of the checks might fail, check both + if (split_horizon_violation (config0->shg, vnet_buffer(b0)->l2.shg)) { + next0 = L2OUTPUT_NEXT_DROP; + b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + if (split_horizon_violation (config1->shg, vnet_buffer(b1)->l2.shg)) { + next1 = L2OUTPUT_NEXT_DROP; + b1->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + l2_output_config_t *config0; + u32 feature_bitmap0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2output_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + h0 = vlib_buffer_get_current (b0); + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + em->counters[node_counter_base_index + L2OUTPUT_ERROR_L2OUTPUT] += 1; + + // Get config for the output interface + config0 = vec_elt_at_index(msm->configs, sw_if_index0); + + // Get features from the config + // TODO: mask out any non-applicable features + feature_bitmap0 = config0->feature_bitmap; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2output_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + + // Perform output vlan tag rewrite and the pre-vtr EFP filter check. + // The EFP Filter only needs to be run if there is an output VTR + // configured. The flag for the post-vtr EFP Filter node is used + // to trigger the pre-vtr check as well. + + if (config0->output_vtr.push_and_pop_bytes) { + // Perform pre-vtr EFP filter check if configured + u32 failed1 = (feature_bitmap0 & L2OUTPUT_FEAT_EFP_FILTER) && + (l2_efp_filter_process(b0, &(config0->input_vtr))); + u32 failed2 = l2_vtr_process(b0, &(config0->output_vtr)); + + if (PREDICT_FALSE (failed1 | failed2)) { + next0 = L2OUTPUT_NEXT_DROP; + if (failed2) { + b0->error = node->errors[L2OUTPUT_ERROR_VTR_DROP]; + } + if (failed1) { + b0->error = node->errors[L2OUTPUT_ERROR_EFP_DROP]; + } + } + } + + // Perform the split horizon check + if (PREDICT_FALSE (split_horizon_violation (config0->shg, vnet_buffer(b0)->l2.shg))) { + next0 = L2OUTPUT_NEXT_DROP; + b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2output_node) = { + .function = l2output_node_fn, + .name = "l2-output", + .vector_size = sizeof (u32), + .format_trace = format_l2output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2output_error_strings), + .error_strings = l2output_error_strings, + + .n_next_nodes = L2OUTPUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2OUTPUT_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2output_init (vlib_main_t *vm) +{ + l2output_main_t * mp = &l2output_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Create the config vector + vec_validate(mp->configs, 100); + // Until we hook up the CLI config, just create 100 sw interface entries and zero them + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2output_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names(), + mp->next_nodes.feat_next_node_index); + + // Initialize the output node mapping table + l2output_init_output_node_vec(&mp->next_nodes.output_node_index_vec); + + return 0; +} + +VLIB_INIT_FUNCTION (l2output_init); + +typedef struct { + u32 node_index; + u32 sw_if_index; +} output_node_mapping_rpc_args_t; + +#if DPDK > 0 +static void output_node_rpc_callback +( output_node_mapping_rpc_args_t * a); + +static void output_node_mapping_send_rpc +(u32 node_index, + u32 sw_if_index) +{ + output_node_mapping_rpc_args_t args; + + args.node_index = node_index; + args.sw_if_index = sw_if_index; + + vl_api_rpc_call_main_thread (output_node_rpc_callback, + (u8 *) &args, sizeof (args)); +} +#endif + + +// Create a mapping in the next node mapping table for the given sw_if_index +u32 l2output_create_output_node_mapping ( + vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 node_index, // index of current node + u32 * output_node_index_vec, + u32 sw_if_index) { + + u32 next; // index of next graph node + vnet_hw_interface_t *hw0; + u32 *node; +#if DPDK > 0 + uword cpu_number; + + cpu_number = os_get_cpu_number(); + + if (cpu_number) + { + output_node_mapping_send_rpc (node_index, sw_if_index); + return 0; + } +#endif + + hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + + // dynamically create graph node arc + next = vlib_node_add_next (vlib_main, + node_index, + hw0->output_node_index); + + // Initialize vector with the mapping + + node = vec_elt_at_index(output_node_index_vec, sw_if_index); + *node = next; + + return next; +} + +#if DPDK > 0 +void output_node_rpc_callback (output_node_mapping_rpc_args_t *a) +{ + vlib_main_t * vm = vlib_get_main(); + vnet_main_t * vnm = vnet_get_main(); + l2output_main_t * mp = &l2output_main; + + (void) l2output_create_output_node_mapping + (vm, vnm, a->node_index, mp->next_nodes.output_node_index_vec, + a->sw_if_index); +} +#endif + +// Get a pointer to the config for the given interface +l2_output_config_t * l2output_intf_config (u32 sw_if_index) +{ + l2output_main_t * mp = &l2output_main; + + vec_validate(mp->configs, sw_if_index); + return vec_elt_at_index(mp->configs, sw_if_index); +} + +// Enable (or disable) the feature in the bitmap for the given interface +void l2output_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, + u32 enable) +{ + l2output_main_t * mp = &l2output_main; + l2_output_config_t *config; + + vec_validate(mp->configs, sw_if_index); + config = vec_elt_at_index(mp->configs, sw_if_index); + + if (enable) { + config->feature_bitmap |= feature_bitmap; + } else { + config->feature_bitmap &= ~feature_bitmap; + } +} diff --git a/vnet/vnet/l2/l2_output.h b/vnet/vnet/l2/l2_output.h new file mode 100644 index 00000000000..0d171b82541 --- /dev/null +++ b/vnet/vnet/l2/l2_output.h @@ -0,0 +1,219 @@ +/* + * l2_output.h : layer 2 output packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_output_h +#define included_vnet_l2_output_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> + + +// The L2 output feature configuration, a per-interface struct +typedef struct { + + u32 feature_bitmap; + + // vlan tag rewrite for ingress and egress + // ingress vtr is located here because the same config data is used for + // the egress EFP filter check + vtr_config_t input_vtr; + vtr_config_t output_vtr; + + // some of these flags may get integrated into the feature bitmap + u8 fwd_enable; + u8 flood_enable; + + // split horizon group + u8 shg; + +} l2_output_config_t; + + +// The set of next nodes for features and interface output. +// Each output feature node should include this. +typedef struct { + // vector of output next node index, indexed by sw_if_index. + // used when all output features have been executed and the + // next nodes are the interface output nodes. + u32 * output_node_index_vec; + + // array of next node index for each output feature, indexed + // by l2output_feat_t. Used to determine next feature node. + u32 feat_next_node_index[32]; + +} l2_output_next_nodes_st; + + +typedef struct { + // Next nodes for features and output interfaces + l2_output_next_nodes_st next_nodes; + + /* config vector indexed by sw_if_index */ + l2_output_config_t *configs; + + /* Convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2output_main_t; + +l2output_main_t l2output_main; +vlib_node_registration_t l2output_node; + +// L2 output features + +// Mappings from feature ID to graph node name +#define foreach_l2output_feat \ + _(SPAN, "feature-bitmap-drop") \ + _(CFM, "feature-bitmap-drop") \ + _(QOS, "feature-bitmap-drop") \ + _(ACL, "l2-output-acl") \ + _(L2PT, "feature-bitmap-drop") \ + _(EFP_FILTER, "l2-efp-filter") \ + _(IPIW, "feature-bitmap-drop") \ + _(STP_BLOCKED, "feature-bitmap-drop") \ + _(LINESTATUS_DOWN, "feature-bitmap-drop") \ + _(XCRW, "l2-xcrw") + +// Feature bitmap positions +typedef enum { +#define _(sym,str) L2OUTPUT_FEAT_##sym##_BIT, + foreach_l2output_feat +#undef _ + L2OUTPUT_N_FEAT, +} l2output_feat_t; + +// Feature bit masks +typedef enum { +#define _(sym,str) L2OUTPUT_FEAT_##sym = (1<<L2OUTPUT_FEAT_##sym##_BIT), + foreach_l2output_feat +#undef _ +} l2output_feat_masks_t; + +// Return an array of strings containing graph node names of each feature +char **l2output_get_feat_names(void); + + +// The next set of functions is for use by output feature graph nodes. +// When the last bit has been cleared from the output feature bitmap, +// the next node is the output graph node for the TX sw_if_index. +// These functions help the feature nodes get that node index. + +// Create a mapping to the output graph node for the given sw_if_index +u32 l2output_create_output_node_mapping ( + vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 node_index, // index of current node + u32 * output_node_index_vec, + u32 sw_if_index); + +// Initialize the next node mapping table +always_inline +void l2output_init_output_node_vec (u32 **output_node_index_vec) { + + // Size it at 100 sw_if_indexes initially + // Uninitialized mappings are set to ~0 + vec_validate_init_empty(*output_node_index_vec, 100, ~0); +} + + +// Get a mapping from the output node mapping table, +// creating the entry if necessary. +always_inline +u32 l2output_get_output_node (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 node_index, // index of current node + u32 sw_if_index, + u32 ** output_node_index_vec) // may be updated +{ + u32 next; // index of next graph node + + // Insure the vector is big enough + vec_validate_init_empty(*output_node_index_vec, sw_if_index, ~0); + + // Get the mapping for the sw_if_index + next = vec_elt(*output_node_index_vec, sw_if_index); + + if (next == ~0) { + // Mapping doesn't exist so create it + next = l2output_create_output_node_mapping (vlib_main, + vnet_main, + node_index, + *output_node_index_vec, + sw_if_index); + } + + return next; +} + + +// Determine the next L2 node based on the output feature bitmap +always_inline void +l2_output_dispatch (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + vlib_node_runtime_t * node, + u32 node_index, + u32 * cached_sw_if_index, + u32 * cached_next_index, + l2_output_next_nodes_st *next_nodes, + vlib_buffer_t * b0, + u32 sw_if_index, + u32 feature_bitmap, + u32 *next0) +{ + if (feature_bitmap) { + // There are some features to execute + + // Save bitmap for the next feature graph nodes + vnet_buffer(b0)->l2.feature_bitmap = feature_bitmap; + + // Determine the next node + *next0 = feat_bitmap_get_next_node_index(next_nodes->feat_next_node_index, + feature_bitmap); + } else { + // There are no features. Send packet to TX node for sw_if_index0 + // This is a little tricky in that the output interface next node indexes + // are not precomputed at init time. + + if (sw_if_index == *cached_sw_if_index) { + // We hit in the one-entry cache. Use it. + *next0 = *cached_next_index; + } else { + // Look up the output TX node + *next0 = l2output_get_output_node(vlib_main, + vnet_main, + node_index, + sw_if_index, + &next_nodes->output_node_index_vec); + + // Update the one-entry cache + *cached_sw_if_index = sw_if_index; + *cached_next_index = *next0; + } + } +} + +// Get a pointer to the config for the given interface +l2_output_config_t * l2output_intf_config (u32 sw_if_index); + +// Enable (or disable) the feature in the bitmap for the given interface +void l2output_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, + u32 enable); + +#endif diff --git a/vnet/vnet/l2/l2_output_acl.c b/vnet/vnet/l2/l2_output_acl.c new file mode 100644 index 00000000000..2f6c1dce41f --- /dev/null +++ b/vnet/vnet/l2/l2_output_acl.c @@ -0,0 +1,335 @@ +/* + * l2_output_acl.c : layer 2 output acl processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_output.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + + +typedef struct { + // Next nodes for features and output interfaces + l2_output_next_nodes_st next_nodes; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_outacl_main_t; + + + +typedef struct { + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 next_index; + u32 sw_if_index; +} l2_outacl_trace_t; + +/* packet trace format function */ +static u8 * format_l2_outacl_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_outacl_trace_t * t = va_arg (*args, l2_outacl_trace_t *); + + s = format (s, "l2-output-acl: sw_if_index %d dst %U src %U", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src); + return s; +} + +l2_outacl_main_t l2_outacl_main; + +static vlib_node_registration_t l2_outacl_node; + +#define foreach_l2_outacl_error \ +_(L2_OUTACL, "L2 output ACL packets") \ +_(DROP, "L2 output drops") + +typedef enum { +#define _(sym,str) L2_OUTACL_ERROR_##sym, + foreach_l2_outacl_error +#undef _ + L2_OUTACL_N_ERROR, +} l2_outacl_error_t; + +static char * l2_outacl_error_strings[] = { +#define _(sym,string) string, + foreach_l2_outacl_error +#undef _ +}; + +typedef enum { + L2_OUTACL_NEXT_DROP, + L2_OUTACL_N_NEXT, +} l2_outacl_next_t; + + + +static uword +l2_outacl_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_outacl_next_t next_index; + l2_outacl_main_t * msm = &l2_outacl_main; + vlib_node_t *n = vlib_get_node (vm, l2_outacl_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + u32 cached_sw_if_index = (u32)~0; + u32 cached_next_index = (u32)~0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t * h0, * h1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* TX interface handles */ + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_outacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2_outacl_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + memcpy(t->src, h1->src_address, 6); + memcpy(t->dst, h1->dst_address, 6); + } + } + + em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] += 2; + + /* add core loop code here */ + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t * h0; + u32 feature_bitmap0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + l2_outacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + memcpy(t->src, h0->src_address, 6); + memcpy(t->dst, h0->dst_address, 6); + } + + em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] += 1; + + // L2_OUTACL code + // Dummy for now, just go to next feature node + + + // Remove ourself from the feature bitmap + feature_bitmap0 = vnet_buffer(b0)->l2.feature_bitmap & ~L2OUTPUT_FEAT_ACL; + + // Determine next node + l2_output_dispatch (msm->vlib_main, + msm->vnet_main, + node, + l2_outacl_node.index, + &cached_sw_if_index, + &cached_next_index, + &msm->next_nodes, + b0, + sw_if_index0, + feature_bitmap0, + &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (l2_outacl_node,static) = { + .function = l2_outacl_node_fn, + .name = "l2-output-acl", + .vector_size = sizeof (u32), + .format_trace = format_l2_outacl_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_outacl_error_strings), + .error_strings = l2_outacl_error_strings, + + .n_next_nodes = L2_OUTACL_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_OUTACL_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2_outacl_init (vlib_main_t *vm) +{ + l2_outacl_main_t * mp = &l2_outacl_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes(vm, + l2_outacl_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names(), + mp->next_nodes.feat_next_node_index); + + // Initialize the output node mapping table + l2output_init_output_node_vec(&mp->next_nodes.output_node_index_vec); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_outacl_init); + +// set subinterface outacl enable/disable +// The CLI format is: +// set interface acl output <interface> [disable] +static clib_error_t * +int_l2_outacl (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 enable; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) { + enable = 0; + } + + // set the interface flag + l2output_intf_bitmap_enable(sw_if_index, L2OUTPUT_FEAT_ACL, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_outacl_cli, static) = { + .path = "set interface acl output", + .short_help = "set interface acl output <interface> [disable]", + .function = int_l2_outacl, +}; diff --git a/vnet/vnet/l2/l2_patch.c b/vnet/vnet/l2/l2_patch.c new file mode 100644 index 00000000000..63be409d3b8 --- /dev/null +++ b/vnet/vnet/l2/l2_patch.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/error.h> + +typedef struct { + u32 cached_next_index; + u32 cached_rx_sw_if_index; + + /* vector of dispositions, indexed by rx_sw_if_index */ + u32 *tx_next_by_rx_sw_if_index; + u32 *tx_sw_if_index_by_rx_sw_if_index; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_patch_main_t; + +typedef struct { + u32 rx_sw_if_index; + u32 tx_sw_if_index; +} l2_patch_trace_t; + +/* packet trace format function */ +static u8 * format_l2_patch_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_patch_trace_t * t = va_arg (*args, l2_patch_trace_t *); + + s = format (s, "L2_PATCH: rx %d tx %d", t->rx_sw_if_index, + t->tx_sw_if_index); + return s; +} + +l2_patch_main_t l2_patch_main; + +static vlib_node_registration_t l2_patch_node; + +#define foreach_l2_patch_error \ +_(PATCHED, "L2 patch packets") \ +_(DROPPED, "L2 patch misconfigured drops") + +typedef enum { +#define _(sym,str) L2_PATCH_ERROR_##sym, + foreach_l2_patch_error +#undef _ + L2_PATCH_N_ERROR, +} l2_patch_error_t; + +static char * l2_patch_error_strings[] = { +#define _(sym,string) string, + foreach_l2_patch_error +#undef _ +}; + +typedef enum { + L2_PATCH_NEXT_DROP, + L2_PATCH_N_NEXT, +} l2_patch_next_t; + +static uword +l2_patch_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_patch_next_t next_index; + l2_patch_main_t * l2pm = &l2_patch_main; + vlib_node_t *n = vlib_get_node (vm, l2_patch_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* So stupid / simple, we don't need to prefetch data */ + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + ASSERT(l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0); + ASSERT(l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0); + ASSERT(l2pm->tx_next_by_rx_sw_if_index[sw_if_index1] != ~0); + ASSERT(l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1] != ~0); + + if (PREDICT_TRUE (sw_if_index0 == l2pm->cached_rx_sw_if_index)) + next0 = l2pm->cached_next_index; + else + { + next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0]; + l2pm->cached_rx_sw_if_index = sw_if_index0; + l2pm->cached_next_index = next0; + } + + if (PREDICT_TRUE (sw_if_index1 == l2pm->cached_rx_sw_if_index)) + next1 = l2pm->cached_next_index; + else + next1 = l2pm->tx_next_by_rx_sw_if_index [sw_if_index1]; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rx_sw_if_index = sw_if_index0; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index [sw_if_index0]; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->rx_sw_if_index = sw_if_index1; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index [sw_if_index1]; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + ASSERT(l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0); + ASSERT(l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0); + + if (PREDICT_TRUE (sw_if_index0 == l2pm->cached_rx_sw_if_index)) + next0 = l2pm->cached_next_index; + else + { + next0 = l2pm->tx_next_by_rx_sw_if_index [sw_if_index0]; + l2pm->cached_rx_sw_if_index = sw_if_index0; + l2pm->cached_next_index = next0; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rx_sw_if_index = sw_if_index0; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index [sw_if_index0]; + } + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + em->counters[node_counter_base_index + L2_PATCH_ERROR_PATCHED] += + frame->n_vectors; + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2_patch_node, static) = { + .function = l2_patch_node_fn, + .name = "l2_patch", + .vector_size = sizeof (u32), + .format_trace = format_l2_patch_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_patch_error_strings), + .error_strings = l2_patch_error_strings, + + .n_next_nodes = L2_PATCH_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_PATCH_NEXT_DROP] = "error-drop", + }, +}; + +int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add) +{ + l2_patch_main_t * l2pm = &l2_patch_main; + vnet_hw_interface_t * rxhi, *txhi; + u32 tx_next_index; + + /* + * We assume that the API msg handler has used 2x VALIDATE_SW_IF_INDEX + * macros... + */ + + rxhi = vnet_get_sup_hw_interface (l2pm->vnet_main, rx_sw_if_index); + + /* Make sure caller didn't pass a vlan subif, etc. */ + if (rxhi->sw_if_index != rx_sw_if_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + txhi = vnet_get_sup_hw_interface (l2pm->vnet_main, tx_sw_if_index); + if (txhi->sw_if_index != tx_sw_if_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX_2; + + if (is_add) + { + tx_next_index = vlib_node_add_next (l2pm->vlib_main, + l2_patch_node.index, + txhi->output_node_index); + + vec_validate_init_empty (l2pm->tx_next_by_rx_sw_if_index, + rx_sw_if_index, ~0); + + l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = tx_next_index; + vec_validate_init_empty (l2pm->tx_sw_if_index_by_rx_sw_if_index, + rx_sw_if_index, ~0); + l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index] + = txhi->sw_if_index; + + ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + + vnet_hw_interface_rx_redirect_to_node (l2pm->vnet_main, + rxhi->hw_if_index, + l2_patch_node.index); + } + else + { + ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index, + 0 /* disable promiscuous mode */); + + vnet_hw_interface_rx_redirect_to_node (l2pm->vnet_main, + rxhi->hw_if_index, + ~0 /* disable */); + if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index) + { + l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0; + l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index] = ~0; + } + } + + return 0; +} + +static clib_error_t * +test_patch_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2_patch_main_t * l2pm = &l2_patch_main; + unformat_input_t _line_input, * line_input = &_line_input; + u32 rx_sw_if_index, tx_sw_if_index; + int rv; + int rx_set = 0; + int tx_set = 0; + int is_add = 1; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "rx %U", unformat_vnet_sw_interface, + l2pm->vnet_main, &rx_sw_if_index)) + rx_set = 1; + else if (unformat (line_input, "tx %U", unformat_vnet_sw_interface, + l2pm->vnet_main, &tx_sw_if_index)) + tx_set = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else break; + } + + if (rx_set == 0) + return clib_error_return (0, "rx interface not set"); + + if (tx_set == 0) + return clib_error_return (0, "tx interface not set"); + + rv = vnet_l2_patch_add_del (rx_sw_if_index, tx_sw_if_index, is_add); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "rx interface not a physical port"); + + case VNET_API_ERROR_INVALID_SW_IF_INDEX_2: + return clib_error_return (0, "tx interface not a physical port"); + + default: + return clib_error_return + (0, "WARNING: vnet_l2_patch_add_del returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (test_patch_command, static) = { + .path = "test l2patch", + .short_help = + "rx <intfc> tx <intfc> [del]", + .function = test_patch_command_fn, +}; + +// Display the contents of the l2patch table. +static clib_error_t * +show_l2patch (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2_patch_main_t * l2pm = &l2_patch_main; + u32 rx_sw_if_index; + u32 no_entries = 1; + + ASSERT(vec_len(l2pm->tx_next_by_rx_sw_if_index) == + vec_len(l2pm->tx_sw_if_index_by_rx_sw_if_index)); + + for (rx_sw_if_index = 0; + rx_sw_if_index < vec_len (l2pm->tx_sw_if_index_by_rx_sw_if_index); + rx_sw_if_index++) + { + u32 tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index]; + if (tx_sw_if_index != ~0) + { + no_entries = 0; + vlib_cli_output (vm, "%26U -> %U", + format_vnet_sw_if_index_name, + l2pm->vnet_main, rx_sw_if_index, + format_vnet_sw_if_index_name, + l2pm->vnet_main,tx_sw_if_index); + } + } + + if (no_entries) + vlib_cli_output (vm, "no l2patch entries"); + + return 0; +} + +VLIB_CLI_COMMAND (show_l2patch_cli, static) = { + .path = "show l2patch", + .short_help = "Show l2 interface cross-connect entries", + .function = show_l2patch, +}; + +clib_error_t *l2_patch_init (vlib_main_t *vm) +{ + l2_patch_main_t * mp = &l2_patch_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main(); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_patch_init); diff --git a/vnet/vnet/l2/l2_vtr.c b/vnet/vnet/l2/l2_vtr.c new file mode 100644 index 00000000000..a7499041009 --- /dev/null +++ b/vnet/vnet/l2/l2_vtr.c @@ -0,0 +1,448 @@ +/* + * l2_vtr.c : layer 2 vlan tag rewrite configuration + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/l2/l2_input_vtr.h> +#include <vnet/l2/l2_output.h> + +#include <vppinfra/error.h> +#include <vlib/cli.h> + + +// Just a placeholder. Also insures file is not eliminated by linker. +clib_error_t *l2_vtr_init (vlib_main_t *vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION(l2_vtr_init); + + +// Configure vtag tag rewrite on the given interface. +// Return 1 if there is an error, 0 if ok +u32 l2vtr_configure (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 vtr_op, + u32 push_dot1q, // ethertype of first pushed tag is dot1q/dot1ad + u32 vtr_tag1, // first pushed tag + u32 vtr_tag2) // second pushed tag +{ + vnet_hw_interface_t * hi; + vnet_sw_interface_t * si; + u32 hw_no_tags; + u32 error = 0; + vtr_config_t * in_config; + vtr_config_t * out_config; + u32 enable; + u32 push_inner_et; + u32 push_outer_et; + u32 cfg_tags; + + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) { + error = VNET_API_ERROR_INVALID_INTERFACE; // non-ethernet interface + goto done; + } + + // Init the config for this interface + vec_validate (l2output_main.configs, sw_if_index); + in_config = &(vec_elt_at_index(l2output_main.configs, sw_if_index)->input_vtr); + out_config = &(vec_elt_at_index(l2output_main.configs, sw_if_index)->output_vtr); + in_config->raw_tags = 0; + out_config->raw_tags = 0; + + // Get the configured tags for the interface + si = vnet_get_sw_interface (vnet_main, sw_if_index); + hw_no_tags = (si->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + + // Construct the input tag-rewrite config + + push_outer_et = clib_net_to_host_u16 (push_dot1q ? ETHERNET_TYPE_VLAN : ETHERNET_TYPE_DOT1AD); + push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN); + vtr_tag1 = clib_net_to_host_u16 (vtr_tag1); + vtr_tag2 = clib_net_to_host_u16 (vtr_tag2); + + // Determine number of vlan tags with explictly configured values + cfg_tags = 0; + if (hw_no_tags || si->sub.eth.flags.no_tags) { + cfg_tags = 0; + } else if (si->sub.eth.flags.one_tag) { + cfg_tags = 1; + if (si->sub.eth.flags.outer_vlan_id_any) { + cfg_tags = 0; + } + } else if (si->sub.eth.flags.two_tags) { + cfg_tags = 2; + if (si->sub.eth.flags.inner_vlan_id_any) { + cfg_tags = 1; + } + if (si->sub.eth.flags.outer_vlan_id_any) { + cfg_tags = 0; + } + } + + switch (vtr_op) { + case L2_VTR_DISABLED: + in_config->push_and_pop_bytes = 0; + break; + + case L2_VTR_POP_1: + if (cfg_tags < 1) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need one or two tags + goto done; + } + in_config->pop_bytes = 4; + in_config->push_bytes = 0; + break; + + case L2_VTR_POP_2: + if (cfg_tags < 2) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need two tags + goto done; + } + in_config->pop_bytes = 8; + in_config->push_bytes = 0; + + out_config->push_bytes = in_config->pop_bytes; + out_config->pop_bytes = in_config->push_bytes; + break; + + case L2_VTR_PUSH_1: + in_config->pop_bytes = 0; + in_config->push_bytes = 4; + in_config->tags[1].priority_cfi_and_id = vtr_tag1; + in_config->tags[1].type = push_outer_et; + break; + + case L2_VTR_PUSH_2: + in_config->pop_bytes = 0; + in_config->push_bytes = 8; + in_config->tags[0].priority_cfi_and_id = vtr_tag1; + in_config->tags[0].type = push_outer_et; + in_config->tags[1].priority_cfi_and_id = vtr_tag2; + in_config->tags[1].type = push_inner_et; + break; + + case L2_VTR_TRANSLATE_1_1: + if (cfg_tags < 1) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need one or two tags + goto done; + } + in_config->pop_bytes = 4; + in_config->push_bytes = 4; + in_config->tags[1].priority_cfi_and_id = vtr_tag1; + in_config->tags[1].type = push_outer_et; + break; + + case L2_VTR_TRANSLATE_1_2: + if (cfg_tags < 1) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need one or two tags + goto done; + } + in_config->pop_bytes = 4; + in_config->push_bytes = 8; + in_config->tags[0].priority_cfi_and_id = vtr_tag1; + in_config->tags[0].type = push_outer_et; + in_config->tags[1].priority_cfi_and_id = vtr_tag2; + in_config->tags[1].type = push_inner_et; + break; + + case L2_VTR_TRANSLATE_2_1: + if (cfg_tags < 2) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need two tags + goto done; + } + in_config->pop_bytes = 8; + in_config->push_bytes = 4; + in_config->tags[1].priority_cfi_and_id = vtr_tag1; + in_config->tags[1].type = push_outer_et; + break; + + case L2_VTR_TRANSLATE_2_2: + if (cfg_tags < 2) { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; // Need two tags + goto done; + } + in_config->pop_bytes = 8; + in_config->push_bytes = 8; + in_config->tags[0].priority_cfi_and_id = vtr_tag1; + in_config->tags[0].type = push_outer_et; + in_config->tags[1].priority_cfi_and_id = vtr_tag2; + in_config->tags[1].type = push_inner_et; + break; + } + + // Construct the output tag-rewrite config + + // The push/pop values are always reversed + out_config->push_bytes = in_config->pop_bytes; + out_config->pop_bytes = in_config->push_bytes; + + // Any pushed tags are derived from the subinterface config + push_outer_et = clib_net_to_host_u16 (si->sub.eth.flags.dot1ad ? ETHERNET_TYPE_DOT1AD : ETHERNET_TYPE_VLAN); + push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN); + vtr_tag1 = clib_net_to_host_u16 (si->sub.eth.outer_vlan_id); + vtr_tag2 = clib_net_to_host_u16 (si->sub.eth.inner_vlan_id); + + if (out_config->push_bytes == 4) { + out_config->tags[1].priority_cfi_and_id = vtr_tag1; + out_config->tags[1].type = push_outer_et; + } else if (out_config->push_bytes == 8) { + out_config->tags[0].priority_cfi_and_id = vtr_tag1; + out_config->tags[0].type = push_outer_et; + out_config->tags[1].priority_cfi_and_id = vtr_tag2; + out_config->tags[1].type = push_inner_et; + } + + // set the interface enable flags + enable = (vtr_op != L2_VTR_DISABLED); + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VTR, enable); + // output vtr enable is checked explicitly in l2_output + + done: + return error; +} + +// Get vtag tag rewrite on the given interface. +// Return 1 if there is an error, 0 if ok +u32 l2vtr_get (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 *vtr_op, + u32 *push_dot1q, // ethertype of first pushed tag is dot1q/dot1ad + u32 *vtr_tag1, // first pushed tag + u32 *vtr_tag2) // second pushed tag +{ + vnet_hw_interface_t * hi; + u32 error = 0; + vtr_config_t * in_config; + + if (!vtr_op || !push_dot1q || !vtr_tag1 || !vtr_tag2) { + clib_warning ("invalid arguments"); + error = VNET_API_ERROR_INVALID_ARGUMENT; + goto done; + } + + *vtr_op = L2_VTR_DISABLED; + *vtr_tag1 = 0; + *vtr_tag2 = 0; + *push_dot1q = 0; + + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) { + // non-ethernet interface + goto done; + } + + if (sw_if_index >= vec_len(l2output_main.configs)) { + // no specific config (return disabled) + goto done; + } + + // Get the config for this interface + in_config = &(vec_elt_at_index(l2output_main.configs, sw_if_index)->input_vtr); + + // DISABLED + if (in_config->push_and_pop_bytes == 0) { + goto done; + } + + // find out vtr_op + switch (in_config->pop_bytes) { + case 0: + switch (in_config->push_bytes) { + case 0: + // DISABLED + goto done; + case 4: + *vtr_op = L2_VTR_PUSH_1; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[1].type)); + break; + case 8: + *vtr_op = L2_VTR_PUSH_2; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id); + *vtr_tag2 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[0].type)); + break; + default: + clib_warning ("invalid push_bytes count: %d", in_config->push_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + break; + + case 4: + switch (in_config->push_bytes) { + case 0: + *vtr_op = L2_VTR_POP_1; + break; + case 4: + *vtr_op = L2_VTR_TRANSLATE_1_1; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[1].type)); + break; + case 8: + *vtr_op = L2_VTR_TRANSLATE_1_2; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id); + *vtr_tag2 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[0].type)); + break; + default: + clib_warning ("invalid push_bytes count: %d", in_config->push_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + break; + + case 8: + switch (in_config->push_bytes) { + case 0: + *vtr_op = L2_VTR_POP_2; + break; + case 4: + *vtr_op = L2_VTR_TRANSLATE_2_1; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[1].type)); + break; + case 8: + *vtr_op = L2_VTR_TRANSLATE_2_2; + *vtr_tag1 = clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id); + *vtr_tag2 = clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = (ETHERNET_TYPE_VLAN == clib_host_to_net_u16 (in_config->tags[0].type)); + break; + default: + clib_warning ("invalid push_bytes count: %d", in_config->push_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + break; + + default: + clib_warning ("invalid pop_bytes count: %d", in_config->pop_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + + done: + return error; +} + +// set subinterface vtr enable/disable +// The CLI format is: +// set interface l2 tag-rewrite <interface> [disable | pop 1 | pop 2 | push {dot1q|dot1ad} <tag> [<tag>]] +// "push" can also be replaced by "translate-{1|2}-{1|2}" +static clib_error_t * +int_l2_vtr (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 vtr_op; + u32 push_dot1q = 0; + u32 tag1 = 0, tag2 = 0; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + vtr_op = L2_VTR_DISABLED; + + if (unformat (input, "disable")) { + vtr_op = L2_VTR_DISABLED; + } else if (unformat (input, "pop 1")) { + vtr_op = L2_VTR_POP_1; + } else if (unformat (input, "pop 2")) { + vtr_op = L2_VTR_POP_2; + + } else if (unformat (input, "push dot1q %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_PUSH_2; + push_dot1q = 1; + } else if (unformat (input, "push dot1ad %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_PUSH_2; + + } else if (unformat (input, "push dot1q %d", &tag1)) { + vtr_op = L2_VTR_PUSH_1; + push_dot1q = 1; + } else if (unformat (input, "push dot1ad %d", &tag1)) { + vtr_op = L2_VTR_PUSH_1; + + } else if (unformat (input, "translate 1-1 dot1q %d", &tag1)) { + vtr_op = L2_VTR_TRANSLATE_1_1; + push_dot1q = 1; + } else if (unformat (input, "translate 1-1 dot1ad %d", &tag1)) { + vtr_op = L2_VTR_TRANSLATE_1_1; + + } else if (unformat (input, "translate 2-1 dot1q %d", &tag1)) { + vtr_op = L2_VTR_TRANSLATE_2_1; + push_dot1q = 1; + } else if (unformat (input, "translate 2-1 dot1ad %d", &tag1)) { + vtr_op = L2_VTR_TRANSLATE_2_1; + + } else if (unformat (input, "translate 2-2 dot1q %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_TRANSLATE_2_2; + push_dot1q = 1; + } else if (unformat (input, "translate 2-2 dot1ad %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_TRANSLATE_2_2; + + } else if (unformat (input, "translate 1-2 dot1q %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_TRANSLATE_1_2; + push_dot1q = 1; + } else if (unformat (input, "translate 1-2 dot1ad %d %d", &tag1, &tag2)) { + vtr_op = L2_VTR_TRANSLATE_1_2; + + } else { + error = clib_error_return (0, "expecting [disable | pop 1 | pop 2 | push {dot1q|dot1ah} <tag> [<tag>]\n" + " | translate {1|2}-{1|2} {dot1q|dot1ah} <tag> [<tag>]] but got `%U'", + format_unformat_error, input); + goto done; + } + + if (l2vtr_configure (vm, + vnm, + sw_if_index, + vtr_op, + push_dot1q, + tag1, + tag2)) { + error = clib_error_return (0, "vlan tag rewrite is not compatible with interface"); + goto done; + } + + done: + return error; +} + +VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = { + .path = "set interface l2 tag-rewrite", + .short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]", + .function = int_l2_vtr, +}; + diff --git a/vnet/vnet/l2/l2_vtr.h b/vnet/vnet/l2/l2_vtr.h new file mode 100644 index 00000000000..aef6c6d255e --- /dev/null +++ b/vnet/vnet/l2/l2_vtr.h @@ -0,0 +1,167 @@ +/* + * l2_vtr.h : layer 2 vlan tag rewrite processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_vtr_h +#define included_vnet_l2_vtr_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/l2_vtr.h> + +// VTR config options for API and CLI support +typedef enum { + L2_VTR_DISABLED, + L2_VTR_PUSH_1, + L2_VTR_PUSH_2, + L2_VTR_POP_1, + L2_VTR_POP_2, + L2_VTR_TRANSLATE_1_1, + L2_VTR_TRANSLATE_1_2, + L2_VTR_TRANSLATE_2_1, + L2_VTR_TRANSLATE_2_2 +} l2_vtr_op_t; + +// Per-interface vlan tag rewrite configuration +// There will be one instance of this struct for each sw_if_index +// for both input vtr and output vtr +typedef struct { + union { + // Up to two vlan tags to push. + // if there is only one vlan tag to push, it is in tags[1]. + ethernet_vlan_header_tv_t tags[2]; + u64 raw_tags; + }; + + union { + struct { + u8 push_bytes; // number of bytes to push for up to 2 vlans (0,4,8) + u8 pop_bytes; // number of bytes to pop for up to 2 vlans (0,4,8) + }; + u16 push_and_pop_bytes; // if 0 then the feature is disabled + }; +} vtr_config_t; + + +// Perform the configured tag rewrite on the packet. +// Return 0 if ok, 1 if packet should be dropped (e.g. tried to pop too many tags) +always_inline u32 +l2_vtr_process (vlib_buffer_t * b0, + vtr_config_t * config) +{ + u64 temp_8; + u32 temp_4; + u8 * eth; + + eth = vlib_buffer_get_current (b0); + + // copy the 12B dmac and smac to a temporary location + temp_8 = *((u64 *)eth); + temp_4 = *((u32 *)(eth+8)); + + // adjust for popped tags + eth += config->pop_bytes; + + // if not enough tags to pop then drop packet + if (PREDICT_FALSE ((vnet_buffer(b0)->l2.l2_len - 12) < config->pop_bytes)) { + return 1; + } + + // copy the 2 new tags to the start of the packet + *((u64 *)(eth + 12 - 8)) = config->raw_tags; + + // TODO: set cos bits + + // adjust for pushed tags: + eth -= config->push_bytes; + + // copy the 12 dmac and smac back to the packet + *((u64 *)eth) = temp_8; + *((u32 *)(eth+8)) = temp_4; + + // Update l2_len + vnet_buffer(b0)->l2.l2_len += (word)config->push_bytes - (word)config->pop_bytes; + + // Update packet len + vlib_buffer_advance(b0, (word)config->pop_bytes - (word)config->push_bytes); + + return 0; +} + + +// Perform the egress pre-vlan tag rewrite EFP Filter check. The post-vlan tag rewrite +// check is a separate graph node. +// +// This check insures that a packet being output to an interface (before output vtr +// is performed) has vlan tags that match those on a packet received from that +// interface (after vtr has been performed). +// This means verifying that any tags pushed by input vtr are present on the packet. +// +// Return 0 if ok, 1 if packet should be dropped. +// This function should be passed the input vtr config for the interface. +always_inline u8 +l2_efp_filter_process (vlib_buffer_t * b0, + vtr_config_t * in_config) +{ + u8 * eth; + u64 packet_tags; + u64 tag_mask; + + eth = vlib_buffer_get_current (b0); + + // If there are 2 tags pushed, they must match config->tags[0] and config->tags[1]. + // If there is one tag pushed, it must match config->tag[1]. + // If there are 0 tags pushed, the check passes. + + // mask for two vlan id and ethertypes, no cos bits + tag_mask = clib_net_to_host_u64(0xFFFF0FFFFFFF0FFF); + // mask for one vlan id and ethertype, no cos bits + tag_mask = (in_config->push_bytes == 4) ? clib_net_to_host_u64(0xFFFF0FFF) : tag_mask; + // mask for always match + tag_mask = (in_config->push_bytes == 0) ? 0 : tag_mask; + + // Read 8B from the packet, getting the proper set of vlan tags + // For 0 push bytes, the address doesn't matter since the mask clears the data to 0. + packet_tags = *((u64 *)(eth + 4 + in_config->push_bytes)); + + // Check if the packet tags match the configured tags + return (packet_tags & tag_mask) != in_config->raw_tags; +} + + +// Configure vtag tag rewrite on the given interface. +// Return 1 if there is an error, 0 if ok +u32 l2vtr_configure(vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 vtr_op, + u32 push_dot1q, + u32 vtr_tag1, + u32 vtr_tag2); + +// Get vtag tag rewrite on the given interface. +// Return 1 if there is an error, 0 if ok +u32 l2vtr_get (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 *vtr_op, + u32 *push_dot1q, + u32 *vtr_tag1, + u32 *vtr_tag2); + +#endif // included_vnet_l2_vtr_h + diff --git a/vnet/vnet/l2/l2_xcrw.c b/vnet/vnet/l2/l2_xcrw.c new file mode 100644 index 00000000000..f5fe3ca14e4 --- /dev/null +++ b/vnet/vnet/l2/l2_xcrw.c @@ -0,0 +1,559 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/l2/l2_xcrw.h> + +/* + * General L2 / L3 cross-connect, used to set up + * "L2 interface <--> your-favorite-tunnel-encap" tunnels. + * + * We set up a typical L2 cross-connect or (future) bridge + * to hook L2 interface(s) up to the L3 stack in arbitrary ways. + * + * Each l2_xcrw adjacency specifies 3 things: + * + * 1. The next graph node (presumably in the L3 stack) to + * process the (L2 -> L3) packet + * + * 2. A new value for vnet_buffer(b)->sw_if_index[VLIB_TX] + * (i.e. a lookup FIB index), + * + * 3. A rewrite string to apply. + * + * Example: to cross-connect an L2 interface or (future) bridge + * to an mpls-o-gre tunnel, set up the L2 rewrite string as shown in + * mpls_gre_rewrite, and use "mpls-post-rewrite" to fix the + * GRE IP header checksum and length fields. + */ + +typedef struct { + u32 next_index; + u32 tx_fib_index; +} l2_xcrw_trace_t; + +/* packet trace format function */ +static u8 * format_l2_xcrw_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_xcrw_trace_t * t = va_arg (*args, l2_xcrw_trace_t *); + + s = format (s, "L2_XCRW: next index %d tx_fib_index %d", + t->next_index, t->tx_fib_index); + return s; +} + +l2_xcrw_main_t l2_xcrw_main; + +static vlib_node_registration_t l2_xcrw_node; + +static char * l2_xcrw_error_strings[] = { +#define _(sym,string) string, + foreach_l2_xcrw_error +#undef _ +}; + +static uword +l2_xcrw_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2_xcrw_next_t next_index; + l2_xcrw_main_t * xcm = &l2_xcrw_main; + vlib_node_t *n = vlib_get_node (vm, l2_xcrw_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + l2_xcrw_adjacency_t * adj0, * adj1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0); + adj1 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index1); + + next0 = adj0->rewrite_header.next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + adj0->rewrite_header.sw_if_index; + + next1 = adj1->rewrite_header.next_index; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = + adj1->rewrite_header.sw_if_index; + + em->counters[node_counter_base_index + next1]++; + + if (PREDICT_TRUE(next0 > 0)) + { + u8 * h0 = vlib_buffer_get_current (b0); + vnet_rewrite_one_header (adj0[0], h0, + adj0->rewrite_header.data_bytes); + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++; + } + + if (PREDICT_TRUE(next1 > 0)) + { + u8 * h1 = vlib_buffer_get_current (b1); + vnet_rewrite_one_header (adj1[0], h1, + adj1->rewrite_header.data_bytes); + vlib_buffer_advance (b1, -adj1->rewrite_header.data_bytes); + em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++; + } + + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_xcrw_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->tx_fib_index = adj0->rewrite_header.sw_if_index; + } + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_xcrw_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->next_index = next1; + t->tx_fib_index = adj1->rewrite_header.sw_if_index; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + l2_xcrw_adjacency_t * adj0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0); + + next0 = adj0->rewrite_header.next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + adj0->rewrite_header.sw_if_index; + + if (PREDICT_TRUE(next0 > 0)) + { + u8 *h0 = vlib_buffer_get_current (b0); + vnet_rewrite_one_header (adj0[0], h0, + adj0->rewrite_header.data_bytes); + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_xcrw_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->tx_fib_index = adj0->rewrite_header.sw_if_index; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2_xcrw_node, static) = { + .function = l2_xcrw_node_fn, + .name = "l2-xcrw", + .vector_size = sizeof (u32), + .format_trace = format_l2_xcrw_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_xcrw_error_strings), + .error_strings = l2_xcrw_error_strings, + + .n_next_nodes = L2_XCRW_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_XCRW_NEXT_DROP] = "error-drop", + }, +}; + +clib_error_t *l2_xcrw_init (vlib_main_t *vm) +{ + l2_xcrw_main_t * mp = &l2_xcrw_main; + + mp->vlib_main = vm; + mp->vnet_main = &vnet_main; + mp->tunnel_index_by_l2_sw_if_index = hash_create (0, sizeof(uword)); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_xcrw_init); + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +static u8 * format_xcrw_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "xcrw%d", dev_instance); +} + +VNET_DEVICE_CLASS (xcrw_device_class,static) = { + .name = "Xcrw", + .format_device_name = format_xcrw_name, + .tx_function = dummy_interface_tx, +}; + +/* Create a sham tunnel interface and return its sw_if_index */ +static u32 +create_xcrw_interface (vlib_main_t * vm) +{ + vnet_main_t * vnm = vnet_get_main(); + static u32 instance; + u8 address[6]; + u32 hw_if_index; + vnet_hw_interface_t * hi; + u32 sw_if_index; + + /* mac address doesn't really matter */ + memset (address, 0, sizeof (address)); + address[2] = 0x12; + + /* can returns error iff phy != 0 */ + (void) ethernet_register_interface + (vnm, + xcrw_device_class.index, + instance++, + address, + &hw_if_index, + /* flag change */ 0); + + hi = vnet_get_hw_interface (vnm, hw_if_index); + sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + /* Output to the sham tunnel invokes the encap node */ + hi->output_node_index = l2_xcrw_node.index; + + return sw_if_index; +} + +int vnet_configure_l2_xcrw (vlib_main_t * vm, vnet_main_t *vnm, + u32 l2_sw_if_index, u32 tx_fib_index, + u8 * rewrite, u32 next_node_index, int is_add) +{ + l2_xcrw_main_t * xcm = &l2_xcrw_main; + l2_xcrw_adjacency_t * a; + l2_xcrw_tunnel_t * t; + uword * p; + + if (is_add) + { + + pool_get (xcm->tunnels, t); + + /* No interface allocated? Do it. Otherwise, set admin up */ + if (t->tunnel_sw_if_index == 0) + t->tunnel_sw_if_index = create_xcrw_interface (vm); + else + vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + t->l2_sw_if_index = l2_sw_if_index; + + vec_validate (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + + a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + memset (a, 0, sizeof (*a)); + + a->rewrite_header.sw_if_index = tx_fib_index; + + /* + * Add or find a dynamic disposition for the successor node, + * e.g. so we can ship pkts to mpls_post_rewrite... + */ + a->rewrite_header.next_index = + vlib_node_add_next (vm, l2_xcrw_node.index, next_node_index); + + if (vec_len (rewrite)) + vnet_rewrite_set_data (a[0], rewrite, vec_len(rewrite)); + + set_int_l2_mode (vm, vnm, MODE_L2_XC, t->l2_sw_if_index, 0, 0, 0, + t->tunnel_sw_if_index); + hash_set (xcm->tunnel_index_by_l2_sw_if_index, + t->l2_sw_if_index, t - xcm->tunnels); + return 0; + } + else + { + p = hash_get (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index); + if (p == 0) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + t = pool_elt_at_index (xcm->tunnels, p[0]); + + a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + /* Reset adj to drop traffic */ + memset (a, 0, sizeof (*a)); + + set_int_l2_mode (vm, vnm, MODE_L3, t->l2_sw_if_index, 0, 0, 0, 0); + + vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index, 0 /* down */); + + hash_unset (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index); + pool_put (xcm->tunnels, t); + } + return 0; +} + + +static clib_error_t * +set_l2_xcrw_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + int is_add = 1; + int is_ipv6 = 0; /* for fib id -> fib index mapping */ + u32 tx_fib_id = ~0; + u32 tx_fib_index = ~0; + u32 next_node_index = ~0; + u32 l2_sw_if_index; + u8 * rw = 0; + vnet_main_t * vnm = vnet_get_main(); + int rv; + + + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (! unformat (line_input, "%U", + unformat_vnet_sw_interface, vnm, &l2_sw_if_index)) + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "next %U", + unformat_vlib_node, vm, &next_node_index)) + ; + else if (unformat (line_input, "tx-fib-id %d", &tx_fib_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "ipv6")) + is_ipv6 = 1; + else if (unformat (line_input, "rw %U", + unformat_hex_string, &rw)); + else + break; + } + + if (next_node_index == ~0) + return clib_error_return (0, "next node not specified"); + + if (tx_fib_id != ~0) + { + uword * p; + + if (is_ipv6) + p = hash_get (ip6_main.fib_index_by_table_id, tx_fib_id); + else + p = hash_get (ip4_main.fib_index_by_table_id, tx_fib_id); + + if (p == 0) + return clib_error_return (0, "nonexistent tx_fib_id %d", + tx_fib_id); + + tx_fib_index = p[0]; + } + + rv = vnet_configure_l2_xcrw (vm, vnm, l2_sw_if_index, tx_fib_index, + rw, next_node_index, is_add); + + switch (rv) + { + + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "%U not cross-connected", + format_vnet_sw_if_index_name, + vnm, l2_sw_if_index); + default: + return clib_error_return (0, "vnet_configure_l2_xcrw returned %d", + rv); + } + + vec_free (rw); + + return 0; +} + +VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = { + .path = "set interface l2 xcrw", + .short_help = + "set int l2 xcrw <interface> next <node-name>\n" + " [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>", + .function = set_l2_xcrw_command_fn, +}; + +static u8 * format_l2xcrw (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + l2_xcrw_tunnel_t * t = va_arg (*args, l2_xcrw_tunnel_t *); + l2_xcrw_main_t * xcm = &l2_xcrw_main; + vlib_main_t * vm = vlib_get_main (); + l2_xcrw_adjacency_t * a; + u8 * rewrite_string; + + if (t == 0) + { + s = format (s, "%-25s%s", "L2 interface", "Tunnel Details"); + return s; + } + + s = format (s, "%-25U %U ", + format_vnet_sw_if_index_name, vnm, t->l2_sw_if_index, + format_vnet_sw_if_index_name, vnm, t->tunnel_sw_if_index); + + a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + + s = format (s, "next %U ", + format_vlib_next_node_name, vm, l2_xcrw_node.index, + a->rewrite_header.next_index); + + if (a->rewrite_header.sw_if_index != ~0) + s = format (s, "tx fib index %d ", a->rewrite_header.sw_if_index); + + if (a->rewrite_header.data_bytes) + { + rewrite_string = (u8 *)(a + 1); + rewrite_string -= a->rewrite_header.data_bytes; + s = format (s, "rewrite data: %U ", + format_hex_bytes, rewrite_string, + a->rewrite_header.data_bytes); + } + + s = format (s, "\n"); + + return s; +} + + +static clib_error_t * +show_l2xcrw_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + l2_xcrw_main_t * xcm = &l2_xcrw_main; + l2_xcrw_tunnel_t * t; + + if (pool_elts (xcm->tunnels) == 0) + { + vlib_cli_output (vm, "No L2 / L3 rewrite cross-connects configured"); + return 0; + } + + vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0); + + pool_foreach (t, xcm->tunnels, + ({ + vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_l2xcrw_command, static) = { + .path = "show l2xcrw", + .short_help = "Display L2/L3 rewrite cross-connects", + .function = show_l2xcrw_command_fn, +}; diff --git a/vnet/vnet/l2/l2_xcrw.h b/vnet/vnet/l2/l2_xcrw.h new file mode 100644 index 00000000000..d32d1e8df5c --- /dev/null +++ b/vnet/vnet/l2/l2_xcrw.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_l2_xcrw_h__ +#define __included_l2_xcrw_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/api_errno.h> +#include <vnet/ethernet/ethernet.h> + +typedef struct { + /* + * Let: rewrite_header.sw_if_index = tx_fib_index or ~0. + * rewrite_header.next_index = L2_XCRW_NEXT_XXX + */ + vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); +} l2_xcrw_adjacency_t; + +typedef struct { + /* L2 interface */ + u32 l2_sw_if_index; + + /* Tunnel interface */ + u32 tunnel_sw_if_index; /* This field remains set in freed pool elts */ + +} l2_xcrw_tunnel_t; + +typedef struct { + u32 cached_next_index; + + /* Vector of cross-connect rewrites */ + l2_xcrw_adjacency_t * adj_by_sw_if_index; + + /* Pool of xcrw tunnels */ + l2_xcrw_tunnel_t * tunnels; + + /* Tunnel index by tunnel sw_if_index */ + uword * tunnel_index_by_l2_sw_if_index; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} l2_xcrw_main_t; + +typedef enum { + L2_XCRW_NEXT_DROP, + L2_XCRW_N_NEXT, +} l2_xcrw_next_t; + +#define foreach_l2_xcrw_error \ +_(DROP, "Packets dropped") \ +_(FWD, "Packets forwarded") + +typedef enum { +#define _(sym,str) L2_XCRW_ERROR_##sym, + foreach_l2_xcrw_error +#undef _ + L2_XCRW_N_ERROR, +} l2_xcrw_error_t; + +#endif /* __included_l2_xcrw_h__ */ |