diff options
Diffstat (limited to 'src/vnet/l2')
37 files changed, 15327 insertions, 0 deletions
diff --git a/src/vnet/l2/dir.dox b/src/vnet/l2/dir.dox new file mode 100644 index 00000000..8497a2f6 --- /dev/null +++ b/src/vnet/l2/dir.dox @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2013 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +@dir +@brief Layer 2 Forwarding Code. + +This directory contains the source code for basic Layer 2 forwarding. + +*/ +/*? %%clicmd:group_label Layer 2 CLI %% ?*/ diff --git a/src/vnet/l2/feat_bitmap.c b/src/vnet/l2/feat_bitmap.c new file mode 100644 index 00000000..6c046467 --- /dev/null +++ b/src/vnet/l2/feat_bitmap.c @@ -0,0 +1,185 @@ +/* + * feat_bitmap.c: bitmap for managing feature invocation + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + + +/* + * Drop node for feature bitmaps + * For features that just do a drop, or are not yet implemented. + * Initial feature dispatch nodes don't need to set b0->error + * in case of a possible drop because that will be done here. + *The next node is always error-drop. + */ + +static vlib_node_registration_t feat_bitmap_drop_node; + +#define foreach_feat_bitmap_drop_error \ +_(NO_FWD, "L2 feature forwarding disabled") \ +_(NYI, "L2 feature not implemented") + +typedef enum +{ +#define _(sym,str) FEAT_BITMAP_DROP_ERROR_##sym, + foreach_feat_bitmap_drop_error +#undef _ + FEAT_BITMAP_DROP_N_ERROR, +} feat_bitmap_drop_error_t; + +static char *feat_bitmap_drop_error_strings[] = { +#define _(sym,string) string, + foreach_feat_bitmap_drop_error +#undef _ +}; + +typedef enum +{ + FEAT_BITMAP_DROP_NEXT_DROP, + FEAT_BITMAP_DROP_N_NEXT, +} feat_bitmap_drop_next_t; + +typedef struct +{ + u32 feature_bitmap; +} feat_bitmap_drop_trace_t; + +/* packet trace format function */ +static u8 * +format_feat_bitmap_drop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + feat_bitmap_drop_trace_t *t = va_arg (*args, feat_bitmap_drop_trace_t *); + + s = + format (s, "feat_bitmap_drop: feature bitmap 0x%08x", t->feature_bitmap); + return s; +} + +static uword +feat_bitmap_drop_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + feat_bitmap_drop_next_t next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + feat_bitmap_drop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->feature_bitmap = vnet_buffer (b0)->l2.feature_bitmap; + } + + if (vnet_buffer (b0)->l2.feature_bitmap == 1) + { + /* + * If we are executing the last feature, this is the + * No forwarding catch-all + */ + b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NO_FWD]; + } + else + { + b0->error = node->errors[FEAT_BITMAP_DROP_ERROR_NYI]; + } + next0 = FEAT_BITMAP_DROP_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +clib_error_t * +feat_bitmap_drop_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (feat_bitmap_drop_init); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = { + .function = feat_bitmap_drop_node_fn, + .name = "feature-bitmap-drop", + .vector_size = sizeof (u32), + .format_trace = format_feat_bitmap_drop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(feat_bitmap_drop_error_strings), + .error_strings = feat_bitmap_drop_error_strings, + + .n_next_nodes = FEAT_BITMAP_DROP_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/feat_bitmap.h b/src/vnet/l2/feat_bitmap.h new file mode 100644 index 00000000..5940ff7e --- /dev/null +++ b/src/vnet/l2/feat_bitmap.h @@ -0,0 +1,110 @@ +/* + * feat_bitmap.h: bitmap for managing feature invocation + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_feat_bitmap_h +#define included_vnet_l2_feat_bitmap_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +/* + * The feature bitmap is a way of organizing input and output feature graph nodes. + * The set of features to be executed are arranged in a bitmap with one bit per + * feature and each bit positioned in the same order that the features should be + * executed. Features can be dynamically removed from the set by masking off their + * corresponding bits. The bitmap is stored in packet context. Each feature clears + * its bit and then calls feat_bitmap_get_next_node_index() to go to the next + * graph node. + */ + + +/* 32 features in a u32 bitmap */ +#define FEAT_MAX 32 + +/** + Initialize the feature next-node indexes of a graph node. + Should be called by the init function of each feature graph node. +*/ +always_inline void +feat_bitmap_init_next_nodes (vlib_main_t * vm, u32 node_index, /* the current graph node index */ + u32 num_features, /* number of entries in feat_names */ + char **feat_names, /* array of feature graph node names */ + u32 * next_nodes) /* array of 32 next indexes to init */ +{ + u32 idx; + + ASSERT (num_features <= FEAT_MAX); + + for (idx = 0; idx < num_features; idx++) + { + if (vlib_get_node_by_name (vm, (u8 *) feat_names[idx])) + { + next_nodes[idx] = + vlib_node_add_named_next (vm, node_index, feat_names[idx]); + } + else + { // Node may be in plugin which is not installed, use drop node + next_nodes[idx] = + vlib_node_add_named_next (vm, node_index, "feature-bitmap-drop"); + } + } + + /* All unassigned bits go to the drop node */ + for (; idx < FEAT_MAX; idx++) + { + next_nodes[idx] = vlib_node_add_named_next (vm, node_index, + "feature-bitmap-drop"); + } +} + +/** + Return the graph node index for the feature corresponding to the + first set bit in the bitmap. +*/ +always_inline u32 +feat_bitmap_get_next_node_index (u32 * next_nodes, u32 bitmap) +{ + u32 first_bit; + + count_leading_zeros (first_bit, bitmap); + first_bit = uword_bits - 1 - first_bit; + return next_nodes[first_bit]; +} + +/** + Return the graph node index for the feature corresponding to the next + set bit after clearing the current feature bit in the feature_bitmap + of the current packet. +*/ +always_inline u32 +vnet_l2_feature_next (vlib_buffer_t * b, u32 * next_nodes, u32 feat_bit) +{ + vnet_buffer (b)->l2.feature_bitmap &= ~feat_bit; + u32 fb = vnet_buffer (b)->l2.feature_bitmap; + ASSERT (fb != 0); + return feat_bitmap_get_next_node_index (next_nodes, fb); +} + +#endif /* included_vnet_l2_feat_bitmap_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api new file mode 100644 index 00000000..ac923de4 --- /dev/null +++ b/src/vnet/l2/l2.api @@ -0,0 +1,385 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Reply to l2_xconnect_dump + @param context - sender context which was passed in the request + @param rx_sw_if_index - Receive interface index + @param tx_sw_if_index - Transmit interface index + */ +define l2_xconnect_details +{ + u32 context; + u32 rx_sw_if_index; + u32 tx_sw_if_index; +}; + +/** \brief Dump L2 XConnects + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define l2_xconnect_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief l2 fib table details structure + @param bd_id - the l2 fib / bridge domain table id + @param mac - the entry's mac address + @param sw_if_index - index of the interface + @param static_mac - the entry is statically configured. + @param filter_mac - the entry is a mac filter entry. + @param bvi_mac - the mac address is a bridge virtual interface +*/ +define l2_fib_table_details +{ + u32 context; + u32 bd_id; + u64 mac; + u32 sw_if_index; + u8 static_mac; + u8 filter_mac; + u8 bvi_mac; +}; + +/** \brief Dump l2 fib (aka bridge domain) table + @param client_index - opaque cookie to identify the sender + @param bd_id - the l2 fib / bridge domain table identifier +*/ +define l2_fib_table_dump +{ + u32 client_index; + u32 context; + u32 bd_id; +}; + +/** \brief L2 fib clear table request, clear all mac entries in the l2 fib + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +autoreply define l2_fib_clear_table +{ + u32 client_index; + u32 context; +}; + +/** \brief L2 FIB flush all entries + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +autoreply define l2fib_flush_all +{ + u32 client_index; + u32 context; +}; + +/** \brief L2 FIB flush bridge domain entries + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the entry's bridge domain id +*/ +autoreply define l2fib_flush_bd +{ + u32 client_index; + u32 context; + u32 bd_id; +}; + +/** \brief L2 FIB flush interface entries + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the entry's bridge domain id +*/ +autoreply define l2fib_flush_int +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief L2 FIB add entry request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mac - the entry's mac address + @param bd_id - the entry's bridge domain id + @param sw_if_index - the interface + @param is_add - If non zero add the entry, else delete it + @param static_mac - + @param filter_mac - +*/ +autoreply define l2fib_add_del +{ + u32 client_index; + u32 context; + u64 mac; + u32 bd_id; + u32 sw_if_index; + u8 is_add; + u8 static_mac; + u8 filter_mac; + u8 bvi_mac; +}; + +/** \brief Register to recive L2 MAC events for leanred and aged MAC + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param learn_limit - MAC learn limit, 0 => default to 1000 + @param scan_delay - event scan delay in 10 msec unit, 0 => default to 100 msec + @param max_macs_in_event - in units of 10 mac entries, 0 => default to 100 entries + @param enable_disable - 1 => register for MAC events, 0 => cancel registration + @param pid - sender's pid +*/ +autoreply define want_l2_macs_events +{ + u32 client_index; + u32 context; + u32 learn_limit; + u8 scan_delay; + u8 max_macs_in_event; + u8 enable_disable; + u32 pid; +}; + +/** \brief Entry for learned or aged MAC in L2 MAC Events + @param sw_if_index - sw_if_index in the domain + @param mac_addr - mac_address + @is_del - 0 => newly learned MAC, 1 => aged out MAC +*/ +typeonly define mac_entry +{ + u32 sw_if_index; + u8 mac_addr[6]; + u8 is_del; + u8 spare; +}; + +/** \brief L2 MAC event for a list of learned or aged MACs + @param client_index - opaque cookie to identify the sender + @param pid - client pid registered to receive notification + @param n_macs - number of learned/aged MAC enntries + @param mac - array of learned/aged MAC entries +*/ +define l2_macs_event +{ + u32 client_index; + u32 pid; + u32 n_macs; + vl_api_mac_entry_t mac[n_macs]; +}; + +/** \brief Set interface L2 flags (such as L2_LEARN, L2_FWD, + L2_FLOOD, L2_UU_FLOOD, or L2_ARP_TERM bits). This can be used + to disable one or more of the features represented by the + flag bits on an interface to override what is set as default + for all interfaces in the bridge domain + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface + @param is_set - if non-zero, set the bits, else clear them + @param feature_bitmap - non-zero bits (as above) to set or clear +*/ +define l2_flags +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 is_set; + u32 feature_bitmap; +}; + +/** \brief Set interface L2 flags response + @param context - sender context, to match reply w/ request + @param retval - return code for the set l2 bits request + @param resulting_feature_bitmap - the internal l2 feature bitmap after the request is implemented +*/ +define l2_flags_reply +{ + u32 context; + i32 retval; + u32 resulting_feature_bitmap; +}; + +/** \brief L2 bridge domain set mac age + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain to create + @param mac_age - mac aging time in min, 0 for disabled +*/ +autoreply define bridge_domain_set_mac_age +{ + u32 client_index; + u32 context; + u32 bd_id; + u8 mac_age; +}; + +/** \brief L2 bridge domain add or delete request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain to create + @param flood - enable/disable bcast/mcast flooding in the bd + @param uu_flood - enable/disable uknown unicast flood in the bd + @param forward - enable/disable forwarding on all interfaces in the bd + @param learn - enable/disable learning on all interfaces in the bd + @param arp_term - enable/disable arp termination in the bd + @param mac_age - mac aging time in min, 0 for disabled + @param is_add - add or delete flag +*/ +autoreply define bridge_domain_add_del +{ + u32 client_index; + u32 context; + u32 bd_id; + u8 flood; + u8 uu_flood; + u8 forward; + u8 learn; + u8 arp_term; + u8 mac_age; + u8 bd_tag[64]; + u8 is_add; +}; + +/** \brief L2 bridge domain request operational state details + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain id desired or ~0 to request all bds +*/ +define bridge_domain_dump +{ + u32 client_index; + u32 context; + u32 bd_id; +}; + +/** \brief L2 bridge domain sw interface operational state response + @param bd_id - the bridge domain id + @param sw_if_index - sw_if_index in the domain + @param shg - split horizon group for the interface +*/ +typeonly manual_print manual_endian define bridge_domain_sw_if +{ + u32 context; + u32 sw_if_index; + u8 shg; +}; + +/** \brief L2 bridge domain operational state response + @param bd_id - the bridge domain id + @param flood - bcast/mcast flooding state on all interfaces in the bd + @param uu_flood - uknown unicast flooding state on all interfaces in the bd + @param forward - forwarding state on all interfaces in the bd + @param learn - learning state on all interfaces in the bd + @param arp_term - arp termination state on all interfaces in the bd + @param mac_age - mac aging time in min, 0 for disabled + @param bd_tag - optional textual tag for the bridge domain + @param n_sw_ifs - number of sw_if_index's in the domain +*/ +manual_print manual_endian define bridge_domain_details +{ + u32 context; + u32 bd_id; + u8 flood; + u8 uu_flood; + u8 forward; + u8 learn; + u8 arp_term; + u8 mac_age; + u8 bd_tag[64]; + u32 bvi_sw_if_index; + u32 n_sw_ifs; + vl_api_bridge_domain_sw_if_t sw_if_details[n_sw_ifs]; +}; + +/** \brief Set bridge flags (such as L2_LEARN, L2_FWD, L2_FLOOD, + L2_UU_FLOOD, or L2_ARP_TERM bits) request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain to set the flags for + @param is_set - if non-zero, set the flags, else clear them + @param feature_bitmap - bits (as above) that are non-zero to set or clear +*/ +define bridge_flags +{ + u32 client_index; + u32 context; + u32 bd_id; + u8 is_set; + u32 feature_bitmap; +}; + +/** \brief Set bridge flags response + @param context - sender context, to match reply w/ request + @param retval - return code for the set bridge flags request + @param resulting_feature_bitmap - the internal L2 feature bitmap after the request is implemented +*/ +define bridge_flags_reply +{ + u32 context; + i32 retval; + u32 resulting_feature_bitmap; +}; + +/** \brief L2 interface vlan tag rewrite configure request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface the operation is applied to + @param vtr_op - Choose from l2_vtr_op_t enum values + @param push_dot1q - first pushed flag dot1q id set, else dot1ad + @param tag1 - Needed for any push or translate vtr op + @param tag2 - Needed for any push 2 or translate x-2 vtr ops +*/ +autoreply define l2_interface_vlan_tag_rewrite +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 vtr_op; + u32 push_dot1q; // ethertype of first pushed tag is dot1q/dot1ad + u32 tag1; // first pushed tag + u32 tag2; // second pushed tag +}; + +/** \brief L2 interface pbb tag rewrite configure request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface the operation is applied to + @param vtr_op - Choose from l2_vtr_op_t enum values + @param inner_tag - needed for translate_qinq vtr op only + @param outer_tag - needed for translate_qinq vtr op only + @param b_dmac - B-tag remote mac address, needed for any push or translate_qinq vtr op + @param b_smac - B-tag local mac address, needed for any push or translate qinq vtr op + @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op + @param i_sid - I-tag service id, needed for any push or translate qinq vtr op +*/ +autoreply define l2_interface_pbb_tag_rewrite +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 vtr_op; + u16 outer_tag; + u8 b_dmac[6]; + u8 b_smac[6]; + u16 b_vlanid; + u32 i_sid; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c new file mode 100644 index 00000000..20d6ab32 --- /dev/null +++ b/src/vnet/l2/l2_api.c @@ -0,0 +1,679 @@ +/* + *------------------------------------------------------------------ + * l2_api.c - layer 2 forwarding api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/l2/l2_learn.h> + +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +#define vl_api_bridge_domain_details_t_endian vl_noop_handler +#define vl_api_bridge_domain_details_t_print vl_noop_handler + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> + +#define foreach_vpe_api_msg \ +_(L2_XCONNECT_DUMP, l2_xconnect_dump) \ +_(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ +_(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ +_(L2FIB_FLUSH_ALL, l2fib_flush_all) \ +_(L2FIB_FLUSH_INT, l2fib_flush_int) \ +_(L2FIB_FLUSH_BD, l2fib_flush_bd) \ +_(L2FIB_ADD_DEL, l2fib_add_del) \ +_(WANT_L2_MACS_EVENTS, want_l2_macs_events) \ +_(L2_FLAGS, l2_flags) \ +_(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ +_(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ +_(BRIDGE_FLAGS, bridge_flags) \ +_(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ +_(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) \ +_(BRIDGE_DOMAIN_SET_MAC_AGE, bridge_domain_set_mac_age) + +static void +send_l2_xconnect_details (unix_shared_memory_queue_t * q, u32 context, + u32 rx_sw_if_index, u32 tx_sw_if_index) +{ + vl_api_l2_xconnect_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_L2_XCONNECT_DETAILS); + mp->context = context; + mp->rx_sw_if_index = htonl (rx_sw_if_index); + mp->tx_sw_if_index = htonl (tx_sw_if_index); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + l2input_main_t *l2im = &l2input_main; + vnet_sw_interface_t *swif; + l2_input_config_t *config; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (swif, im->sw_interfaces, + ({ + config = vec_elt_at_index (l2im->configs, swif->sw_if_index); + if (config->xconnect) + send_l2_xconnect_details (q, mp->context, swif->sw_if_index, + config->output_sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_l2_fib_clear_table_t_handler (vl_api_l2_fib_clear_table_t * mp) +{ + int rv = 0; + vl_api_l2_fib_clear_table_reply_t *rmp; + + /* Clear all MACs including static MACs */ + l2fib_clear_table (); + + REPLY_MACRO (VL_API_L2_FIB_CLEAR_TABLE_REPLY); +} + +static void +send_l2fib_table_entry (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + l2fib_entry_key_t * l2fe_key, + l2fib_entry_result_t * l2fe_res, u32 context) +{ + vl_api_l2_fib_table_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_L2_FIB_TABLE_DETAILS); + + mp->bd_id = + ntohl (l2input_main.bd_configs[l2fe_key->fields.bd_index].bd_id); + + mp->mac = l2fib_make_key (l2fe_key->fields.mac, 0); + mp->sw_if_index = ntohl (l2fe_res->fields.sw_if_index); + mp->static_mac = l2fe_res->fields.static_mac; + mp->filter_mac = l2fe_res->fields.filter; + mp->bvi_mac = l2fe_res->fields.bvi; + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_l2_fib_table_dump_t_handler (vl_api_l2_fib_table_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + bd_main_t *bdm = &bd_main; + l2fib_entry_key_t *l2fe_key = NULL; + l2fib_entry_result_t *l2fe_res = NULL; + u32 ni, bd_id = ntohl (mp->bd_id); + u32 bd_index; + unix_shared_memory_queue_t *q; + uword *p; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* see l2fib_table_dump: ~0 means "any" */ + if (bd_id == ~0) + bd_index = ~0; + else + { + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + return; + + bd_index = p[0]; + } + + l2fib_table_dump (bd_index, &l2fe_key, &l2fe_res); + + vec_foreach_index (ni, l2fe_key) + { + send_l2fib_table_entry (am, q, vec_elt_at_index (l2fe_key, ni), + vec_elt_at_index (l2fe_res, ni), mp->context); + } + vec_free (l2fe_key); + vec_free (l2fe_res); +} + +static void +vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) +{ + bd_main_t *bdm = &bd_main; + l2input_main_t *l2im = &l2input_main; + vl_api_l2fib_add_del_reply_t *rmp; + int rv = 0; + u32 bd_id = ntohl (mp->bd_id); + uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (!p) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto bad_sw_if_index; + } + u32 bd_index = p[0]; + + u64 mac = mp->mac; + if (mp->is_add) + { + if (mp->filter_mac) + l2fib_add_filter_entry (mac, bd_index); + else + { + u32 sw_if_index = ntohl (mp->sw_if_index); + VALIDATE_SW_IF_INDEX (mp); + if (vec_len (l2im->configs) <= sw_if_index) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto bad_sw_if_index; + } + else + { + l2_input_config_t *config; + config = vec_elt_at_index (l2im->configs, sw_if_index); + if (config->bridge == 0) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto bad_sw_if_index; + } + } + u8 static_mac = mp->static_mac ? 1 : 0; + u8 bvi_mac = mp->bvi_mac ? 1 : 0; + l2fib_add_fwd_entry (mac, bd_index, sw_if_index, static_mac, + bvi_mac); + } + } + else + { + l2fib_del_entry (mac, bd_index); + } + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2FIB_ADD_DEL_REPLY); +} + +static void +vl_api_want_l2_macs_events_t_handler (vl_api_want_l2_macs_events_t * mp) +{ + int rv = 0; + vl_api_want_l2_macs_events_reply_t *rmp; + l2learn_main_t *lm = &l2learn_main; + l2fib_main_t *fm = &l2fib_main; + u32 pid = ntohl (mp->pid); + u32 learn_limit = ntohl (mp->learn_limit); + + if (mp->enable_disable) + { + if (lm->client_pid == 0) + { + lm->client_pid = pid; + lm->client_index = mp->client_index; + + if (mp->max_macs_in_event) + fm->max_macs_in_event = mp->max_macs_in_event * 10; + else + fm->max_macs_in_event = L2FIB_EVENT_MAX_MACS_DEFAULT; + + if (mp->scan_delay) + fm->event_scan_delay = (f64) (mp->scan_delay) * 10e-3; + else + fm->event_scan_delay = L2FIB_EVENT_SCAN_DELAY_DEFAULT; + + /* change learn limit and flush all learned MACs */ + if (learn_limit && (learn_limit < L2LEARN_DEFAULT_LIMIT)) + lm->global_learn_limit = learn_limit; + else + lm->global_learn_limit = L2FIB_EVENT_LEARN_LIMIT_DEFAULT; + + l2fib_flush_all_mac (vlib_get_main ()); + } + else if (lm->client_pid != pid) + { + rv = VNET_API_ERROR_L2_MACS_EVENT_CLINET_PRESENT; + goto exit; + } + } + else if (lm->client_pid) + { + lm->client_pid = 0; + lm->client_index = 0; + if (learn_limit && (learn_limit < L2LEARN_DEFAULT_LIMIT)) + lm->global_learn_limit = learn_limit; + else + lm->global_learn_limit = L2LEARN_DEFAULT_LIMIT; + } + +exit: + REPLY_MACRO (VL_API_WANT_L2_MACS_EVENTS_REPLY); +} + +static void +vl_api_l2fib_flush_int_t_handler (vl_api_l2fib_flush_int_t * mp) +{ + int rv = 0; + vlib_main_t *vm = vlib_get_main (); + vl_api_l2fib_flush_int_reply_t *rmp; + + VALIDATE_SW_IF_INDEX (mp); + + u32 sw_if_index = ntohl (mp->sw_if_index); + l2fib_flush_int_mac (vm, sw_if_index); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_L2FIB_FLUSH_INT_REPLY); +} + +static void +vl_api_l2fib_flush_all_t_handler (vl_api_l2fib_flush_all_t * mp) +{ + int rv = 0; + vl_api_l2fib_flush_all_reply_t *rmp; + + l2fib_flush_all_mac (vlib_get_main ()); + REPLY_MACRO (VL_API_L2FIB_FLUSH_ALL_REPLY); +} + +static void +vl_api_l2fib_flush_bd_t_handler (vl_api_l2fib_flush_bd_t * mp) +{ + int rv = 0; + vlib_main_t *vm = vlib_get_main (); + bd_main_t *bdm = &bd_main; + vl_api_l2fib_flush_bd_reply_t *rmp; + + u32 bd_id = ntohl (mp->bd_id); + uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + l2fib_flush_bd_mac (vm, *p); +out: + REPLY_MACRO (VL_API_L2FIB_FLUSH_BD_REPLY); +} + +static void +vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) +{ + vl_api_l2_flags_reply_t *rmp; + int rv = 0; + u32 rbm = 0; + + VALIDATE_SW_IF_INDEX (mp); + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 flags = ntohl (mp->feature_bitmap); + u32 bitmap = 0; + + if (flags & L2_LEARN) + bitmap |= L2INPUT_FEAT_LEARN; + + if (flags & L2_FWD) + bitmap |= L2INPUT_FEAT_FWD; + + if (flags & L2_FLOOD) + bitmap |= L2INPUT_FEAT_FLOOD; + + if (flags & L2_UU_FLOOD) + bitmap |= L2INPUT_FEAT_UU_FLOOD; + + if (flags & L2_ARP_TERM) + bitmap |= L2INPUT_FEAT_ARP_TERM; + + rbm = l2input_intf_bitmap_enable (sw_if_index, bitmap, mp->is_set); + + BAD_SW_IF_INDEX_LABEL; + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_L2_FLAGS_REPLY, + ({ + rmp->resulting_feature_bitmap = ntohl(rbm); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_bridge_domain_set_mac_age_t_handler (vl_api_bridge_domain_set_mac_age_t + * mp) +{ + vlib_main_t *vm = vlib_get_main (); + bd_main_t *bdm = &bd_main; + vl_api_bridge_domain_set_mac_age_reply_t *rmp; + int rv = 0; + u32 bd_id = ntohl (mp->bd_id); + uword *p; + + if (bd_id == 0) + { + rv = VNET_API_ERROR_BD_NOT_MODIFIABLE; + goto out; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + bd_set_mac_age (vm, *p, mp->mac_age); +out: + REPLY_MACRO (VL_API_BRIDGE_DOMAIN_SET_MAC_AGE_REPLY); +} + +static void +vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) +{ + l2_bridge_domain_add_del_args_t a = { + .is_add = mp->is_add, + .flood = mp->flood, + .uu_flood = mp->uu_flood, + .forward = mp->forward, + .learn = mp->learn, + .arp_term = mp->arp_term, + .mac_age = mp->mac_age, + .bd_id = ntohl (mp->bd_id), + .bd_tag = mp->bd_tag + }; + + int rv = bd_add_del (&a); + + vl_api_bridge_domain_add_del_reply_t *rmp; + REPLY_MACRO (VL_API_BRIDGE_DOMAIN_ADD_DEL_REPLY); +} + +static void +send_bridge_domain_details (l2input_main_t * l2im, + unix_shared_memory_queue_t * q, + l2_bridge_domain_t * bd_config, + u32 n_sw_ifs, u32 context) +{ + vl_api_bridge_domain_details_t *mp; + l2_flood_member_t *m; + vl_api_bridge_domain_sw_if_t *sw_ifs; + l2_input_config_t *input_cfg; + + mp = vl_msg_api_alloc (sizeof (*mp) + + (n_sw_ifs * sizeof (vl_api_bridge_domain_sw_if_t))); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_DETAILS); + mp->bd_id = ntohl (bd_config->bd_id); + mp->flood = bd_feature_flood (bd_config); + mp->uu_flood = bd_feature_uu_flood (bd_config); + mp->forward = bd_feature_forward (bd_config); + mp->learn = bd_feature_learn (bd_config); + mp->arp_term = bd_feature_arp_term (bd_config); + mp->bvi_sw_if_index = ntohl (bd_config->bvi_sw_if_index); + mp->mac_age = bd_config->mac_age; + if (bd_config->bd_tag) + { + strncpy ((char *) mp->bd_tag, (char *) bd_config->bd_tag, + ARRAY_LEN (mp->bd_tag) - 1); + mp->bd_tag[ARRAY_LEN (mp->bd_tag) - 1] = 0; + } + + mp->context = context; + + sw_ifs = (vl_api_bridge_domain_sw_if_t *) mp->sw_if_details; + vec_foreach (m, bd_config->members) + { + sw_ifs->sw_if_index = ntohl (m->sw_if_index); + input_cfg = vec_elt_at_index (l2im->configs, m->sw_if_index); + sw_ifs->shg = input_cfg->shg; + sw_ifs++; + mp->n_sw_ifs++; + } + mp->n_sw_ifs = htonl (mp->n_sw_ifs); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_bridge_domain_dump_t_handler (vl_api_bridge_domain_dump_t * mp) +{ + bd_main_t *bdm = &bd_main; + l2input_main_t *l2im = &l2input_main; + + unix_shared_memory_queue_t *q = + vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + u32 bd_id = ntohl (mp->bd_id); + if (bd_id == 0) + return; + + u32 bd_index, end; + if (bd_id == ~0) + bd_index = 0, end = vec_len (l2im->bd_configs); + else + { + bd_index = bd_find_index (bdm, bd_id); + if (bd_index == ~0) + return; + + end = bd_index + 1; + } + + for (; bd_index < end; bd_index++) + { + l2_bridge_domain_t *bd_config = + l2input_bd_config_from_index (l2im, bd_index); + /* skip dummy bd_id 0 */ + if (bd_config && (bd_config->bd_id > 0)) + send_bridge_domain_details (l2im, q, bd_config, + vec_len (bd_config->members), + mp->context); + } +} + +static void +vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + bd_main_t *bdm = &bd_main; + vl_api_bridge_flags_reply_t *rmp; + int rv = 0; + + u32 flags = ntohl (mp->feature_bitmap); + u32 bd_id = ntohl (mp->bd_id); + if (bd_id == 0) + { + rv = VNET_API_ERROR_BD_NOT_MODIFIABLE; + goto out; + } + + u32 bd_index = bd_find_index (bdm, bd_id); + if (bd_index == ~0) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + + u32 bitmap = bd_set_flags (vm, bd_index, flags, mp->is_set); + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY, + ({ + rmp->resulting_feature_bitmap = ntohl(bitmap); + })); + /* *INDENT-ON* */ +} + +static void + vl_api_l2_interface_vlan_tag_rewrite_t_handler + (vl_api_l2_interface_vlan_tag_rewrite_t * mp) +{ + int rv = 0; + vl_api_l2_interface_vlan_tag_rewrite_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + u32 vtr_op; + + VALIDATE_SW_IF_INDEX (mp); + + vtr_op = ntohl (mp->vtr_op); + + /* The L2 code is unsuspicious */ + switch (vtr_op) + { + case L2_VTR_DISABLED: + case L2_VTR_PUSH_1: + case L2_VTR_PUSH_2: + case L2_VTR_POP_1: + case L2_VTR_POP_2: + case L2_VTR_TRANSLATE_1_1: + case L2_VTR_TRANSLATE_1_2: + case L2_VTR_TRANSLATE_2_1: + case L2_VTR_TRANSLATE_2_2: + break; + + default: + rv = VNET_API_ERROR_INVALID_VALUE; + goto bad_sw_if_index; + } + + rv = l2vtr_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op, + ntohl (mp->push_dot1q), ntohl (mp->tag1), + ntohl (mp->tag2)); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2_INTERFACE_VLAN_TAG_REWRITE_REPLY); +} + +static void + vl_api_l2_interface_pbb_tag_rewrite_t_handler + (vl_api_l2_interface_pbb_tag_rewrite_t * mp) +{ + vl_api_l2_interface_pbb_tag_rewrite_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + u32 vtr_op; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + vtr_op = ntohl (mp->vtr_op); + + switch (vtr_op) + { + case L2_VTR_DISABLED: + case L2_VTR_PUSH_2: + case L2_VTR_POP_2: + case L2_VTR_TRANSLATE_2_1: + break; + + default: + rv = VNET_API_ERROR_INVALID_VALUE; + goto bad_sw_if_index; + } + + rv = l2pbb_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op, + mp->b_dmac, mp->b_smac, ntohs (mp->b_vlanid), + ntohl (mp->i_sid), ntohs (mp->outer_tag)); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2_INTERFACE_PBB_TAG_REWRITE_REPLY); +} + +/* + * l2_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include <vnet/vnet_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_l2; +#undef _ +} + +static clib_error_t * +l2_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (l2_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c new file mode 100644 index 00000000..b1abb4c0 --- /dev/null +++ b/src/vnet/l2/l2_bd.c @@ -0,0 +1,1359 @@ +/* + * l2_bd.c : layer 2 bridge domain + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vlib/cli.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/format.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_bd.h> +#include <vnet/l2/l2_learn.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/vec.h> + +/** + * @file + * @brief Ethernet Bridge Domain. + * + * Code in this file manages Layer 2 bridge domains. + * + */ + +bd_main_t bd_main; + +/** + Init bridge domain if not done already. + For feature bitmap, set all bits except ARP termination +*/ +void +bd_validate (l2_bridge_domain_t * bd_config) +{ + if (bd_is_valid (bd_config)) + return; + bd_config->feature_bitmap = ~L2INPUT_FEAT_ARP_TERM; + bd_config->bvi_sw_if_index = ~0; + bd_config->members = 0; + bd_config->flood_count = 0; + bd_config->tun_master_count = 0; + bd_config->tun_normal_count = 0; + bd_config->mac_by_ip4 = 0; + bd_config->mac_by_ip6 = hash_create_mem (0, sizeof (ip6_address_t), + sizeof (uword)); +} + +u32 +bd_find_index (bd_main_t * bdm, u32 bd_id) +{ + u32 *p = (u32 *) hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) + return ~0; + return p[0]; +} + +u32 +bd_add_bd_index (bd_main_t * bdm, u32 bd_id) +{ + ASSERT (!hash_get (bdm->bd_index_by_bd_id, bd_id)); + u32 rv = clib_bitmap_first_clear (bdm->bd_index_bitmap); + + /* mark this index taken */ + bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, rv, 1); + + hash_set (bdm->bd_index_by_bd_id, bd_id, rv); + + vec_validate (l2input_main.bd_configs, rv); + l2input_main.bd_configs[rv].bd_id = bd_id; + + return rv; +} + +static int +bd_delete (bd_main_t * bdm, u32 bd_index) +{ + l2_bridge_domain_t *bd = &l2input_main.bd_configs[bd_index]; + u32 bd_id = bd->bd_id; + u64 mac_addr; + ip6_address_t *ip6_addr_key; + + /* flush non-static MACs in BD and removed bd_id from hash table */ + l2fib_flush_bd_mac (vlib_get_main (), bd_index); + hash_unset (bdm->bd_index_by_bd_id, bd_id); + + /* mark this index clear */ + bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, bd_index, 0); + + /* clear BD config for reuse: bd_id to -1 and clear feature_bitmap */ + bd->bd_id = ~0; + bd->feature_bitmap = 0; + + /* free BD tag */ + vec_free (bd->bd_tag); + + /* free memory used by BD */ + vec_free (bd->members); + hash_free (bd->mac_by_ip4); + /* *INDENT-OFF* */ + hash_foreach_mem (ip6_addr_key, mac_addr, bd->mac_by_ip6, + ({ + clib_mem_free (ip6_addr_key); /* free memory used for ip6 addr key */ + })); + /* *INDENT-ON* */ + hash_free (bd->mac_by_ip6); + + return 0; +} + +static void +update_flood_count (l2_bridge_domain_t * bd_config) +{ + bd_config->flood_count = vec_len (bd_config->members) - + (bd_config->tun_master_count ? bd_config->tun_normal_count : 0); +} + +void +bd_add_member (l2_bridge_domain_t * bd_config, l2_flood_member_t * member) +{ + u32 ix; + vnet_sw_interface_t *sw_if = vnet_get_sw_interface + (vnet_get_main (), member->sw_if_index); + + /* + * Add one element to the vector + * vector is ordered [ bvi, normal/tun_masters..., tun_normals... ] + * When flooding, the bvi interface (if present) must be the last member + * processed due to how BVI processing can change the packet. To enable + * this order, we make the bvi interface the first in the vector and + * flooding walks the vector in reverse. + */ + switch (sw_if->flood_class) + { + case VNET_FLOOD_CLASS_TUNNEL_MASTER: + bd_config->tun_master_count++; + /* Fall through */ + default: + /* Fall through */ + case VNET_FLOOD_CLASS_NORMAL: + ix = (member->flags & L2_FLOOD_MEMBER_BVI) ? 0 : + vec_len (bd_config->members) - bd_config->tun_normal_count; + break; + case VNET_FLOOD_CLASS_TUNNEL_NORMAL: + ix = vec_len (bd_config->members); + bd_config->tun_normal_count++; + break; + } + + vec_insert_elts (bd_config->members, member, 1, ix); + update_flood_count (bd_config); +} + +#define BD_REMOVE_ERROR_OK 0 +#define BD_REMOVE_ERROR_NOT_FOUND 1 + +u32 +bd_remove_member (l2_bridge_domain_t * bd_config, u32 sw_if_index) +{ + u32 ix; + + /* Find and delete the member */ + vec_foreach_index (ix, bd_config->members) + { + l2_flood_member_t *m = vec_elt_at_index (bd_config->members, ix); + if (m->sw_if_index == sw_if_index) + { + vnet_sw_interface_t *sw_if = vnet_get_sw_interface + (vnet_get_main (), sw_if_index); + + if (sw_if->flood_class != VNET_FLOOD_CLASS_NORMAL) + { + if (sw_if->flood_class == VNET_FLOOD_CLASS_TUNNEL_MASTER) + bd_config->tun_master_count--; + else if (sw_if->flood_class == VNET_FLOOD_CLASS_TUNNEL_NORMAL) + bd_config->tun_normal_count--; + } + vec_delete (bd_config->members, 1, ix); + update_flood_count (bd_config); + + return BD_REMOVE_ERROR_OK; + } + } + + return BD_REMOVE_ERROR_NOT_FOUND; +} + + +clib_error_t * +l2bd_init (vlib_main_t * vm) +{ + bd_main_t *bdm = &bd_main; + bdm->bd_index_by_bd_id = hash_create (0, sizeof (uword)); + /* + * create a dummy bd with bd_id of 0 and bd_index of 0 with feature set + * to packet drop only. Thus, packets received from any L2 interface with + * uninitialized bd_index of 0 can be dropped safely. + */ + u32 bd_index = bd_add_bd_index (bdm, 0); + ASSERT (bd_index == 0); + l2input_main.bd_configs[0].feature_bitmap = L2INPUT_FEAT_DROP; + + bdm->vlib_main = vm; + return 0; +} + +VLIB_INIT_FUNCTION (l2bd_init); + + +/** + Set the learn/forward/flood flags for the bridge domain. + Return 0 if ok, non-zero if for an error. +*/ +u32 +bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable) +{ + + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); + bd_validate (bd_config); + u32 feature_bitmap = 0; + + if (flags & L2_LEARN) + { + feature_bitmap |= L2INPUT_FEAT_LEARN; + } + if (flags & L2_FWD) + { + feature_bitmap |= L2INPUT_FEAT_FWD; + } + if (flags & L2_FLOOD) + { + feature_bitmap |= L2INPUT_FEAT_FLOOD; + } + if (flags & L2_UU_FLOOD) + { + feature_bitmap |= L2INPUT_FEAT_UU_FLOOD; + } + if (flags & L2_ARP_TERM) + { + feature_bitmap |= L2INPUT_FEAT_ARP_TERM; + } + + if (enable) + { + bd_config->feature_bitmap |= feature_bitmap; + } + else + { + bd_config->feature_bitmap &= ~feature_bitmap; + } + + return bd_config->feature_bitmap; +} + +/** + Set the mac age for the bridge domain. +*/ +void +bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age) +{ + l2_bridge_domain_t *bd_config; + int enable = 0; + + vec_validate (l2input_main.bd_configs, bd_index); + bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); + bd_config->mac_age = age; + + /* check if there is at least one bd with mac aging enabled */ + vec_foreach (bd_config, l2input_main.bd_configs) + enable |= bd_config->bd_id != ~0 && bd_config->mac_age != 0; + + vlib_process_signal_event (vm, l2fib_mac_age_scanner_process_node.index, + enable ? L2_MAC_AGE_PROCESS_EVENT_START : + L2_MAC_AGE_PROCESS_EVENT_STOP, 0); +} + +/** + Set the tag for the bridge domain. +*/ + +static void +bd_set_bd_tag (vlib_main_t * vm, u32 bd_index, u8 * bd_tag) +{ + u8 *old; + l2_bridge_domain_t *bd_config; + vec_validate (l2input_main.bd_configs, bd_index); + bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); + + old = bd_config->bd_tag; + + if (bd_tag[0]) + { + bd_config->bd_tag = format (0, "%s%c", bd_tag, 0); + } + else + { + bd_config->bd_tag = NULL; + } + + vec_free (old); +} + +/** + Set bridge-domain learn enable/disable. + The CLI format is: + set bridge-domain learn <bd_id> [disable] +*/ +static clib_error_t * +bd_learn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index, bd_id; + u32 enable; + uword *p; + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) + { + enable = 0; + } + + /* set the bridge domain flag */ + bd_set_flags (vm, bd_index, L2_LEARN, enable); + +done: + return error; +} + +/*? + * Layer 2 learning can be enabled and disabled on each + * interface and on each bridge-domain. Use this command to + * manage bridge-domains. It is enabled by default. + * + * @cliexpar + * Example of how to enable learning (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain learn 200} + * Example of how to disable learning (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain learn 200 disable} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bd_learn_cli, static) = { + .path = "set bridge-domain learn", + .short_help = "set bridge-domain learn <bridge-domain-id> [disable]", + .function = bd_learn, +}; +/* *INDENT-ON* */ + +/** + Set bridge-domain forward enable/disable. + The CLI format is: + set bridge-domain forward <bd_index> [disable] +*/ +static clib_error_t * +bd_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index, bd_id; + u32 enable; + uword *p; + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) + { + enable = 0; + } + + /* set the bridge domain flag */ + bd_set_flags (vm, bd_index, L2_FWD, enable); + +done: + return error; +} + + +/*? + * Layer 2 unicast forwarding can be enabled and disabled on each + * interface and on each bridge-domain. Use this command to + * manage bridge-domains. It is enabled by default. + * + * @cliexpar + * Example of how to enable forwarding (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain forward 200} + * Example of how to disable forwarding (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain forward 200 disable} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bd_fwd_cli, static) = { + .path = "set bridge-domain forward", + .short_help = "set bridge-domain forward <bridge-domain-id> [disable]", + .function = bd_fwd, +}; +/* *INDENT-ON* */ + +/** + Set bridge-domain flood enable/disable. + The CLI format is: + set bridge-domain flood <bd_index> [disable] +*/ +static clib_error_t * +bd_flood (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index, bd_id; + u32 enable; + uword *p; + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) + { + enable = 0; + } + + /* set the bridge domain flag */ + bd_set_flags (vm, bd_index, L2_FLOOD, enable); + +done: + return error; +} + +/*? + * Layer 2 flooding can be enabled and disabled on each + * interface and on each bridge-domain. Use this command to + * manage bridge-domains. It is enabled by default. + * + * @cliexpar + * Example of how to enable flooding (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain flood 200} + * Example of how to disable flooding (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain flood 200 disable} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bd_flood_cli, static) = { + .path = "set bridge-domain flood", + .short_help = "set bridge-domain flood <bridge-domain-id> [disable]", + .function = bd_flood, +}; +/* *INDENT-ON* */ + +/** + Set bridge-domain unkown-unicast flood enable/disable. + The CLI format is: + set bridge-domain uu-flood <bd_index> [disable] +*/ +static clib_error_t * +bd_uu_flood (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index, bd_id; + u32 enable; + uword *p; + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + enable = 1; + if (unformat (input, "disable")) + { + enable = 0; + } + + /* set the bridge domain flag */ + bd_set_flags (vm, bd_index, L2_UU_FLOOD, enable); + +done: + return error; +} + +/*? + * Layer 2 unknown-unicast flooding can be enabled and disabled on each + * bridge-domain. It is enabled by default. + * + * @cliexpar + * Example of how to enable unknown-unicast flooding (where 200 is the + * bridge-domain-id): + * @cliexcmd{set bridge-domain uu-flood 200} + * Example of how to disable unknown-unicast flooding (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain uu-flood 200 disable} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = { + .path = "set bridge-domain uu-flood", + .short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]", + .function = bd_uu_flood, +}; +/* *INDENT-ON* */ + +/** + Set bridge-domain arp term enable/disable. + The CLI format is: + set bridge-domain arp term <bridge-domain-id> [disable] +*/ +static clib_error_t * +bd_arp_term (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index, bd_id; + u32 enable; + uword *p; + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + bd_index = *p; + else + return clib_error_return (0, "No such bridge domain %d", bd_id); + + enable = 1; + if (unformat (input, "disable")) + enable = 0; + + /* set the bridge domain flag */ + bd_set_flags (vm, bd_index, L2_ARP_TERM, enable); + +done: + return error; +} + +static clib_error_t * +bd_mac_age (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index, bd_id; + u32 age; + uword *p; + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p == 0) + return clib_error_return (0, "No such bridge domain %d", bd_id); + + bd_index = p[0]; + + if (!unformat (input, "%u", &age)) + { + error = + clib_error_return (0, "expecting ageing time in minutes but got `%U'", + format_unformat_error, input); + goto done; + } + + /* set the bridge domain flag */ + if (age > 255) + { + error = + clib_error_return (0, "mac aging time cannot be bigger than 255"); + goto done; + } + bd_set_mac_age (vm, bd_index, (u8) age); + +done: + return error; +} + +/*? + * Layer 2 mac aging can be enabled and disabled on each + * bridge-domain. Use this command to set or disable mac aging + * on specific bridge-domains. It is disabled by default. + * + * @cliexpar + * Example of how to set mac aging (where 200 is the bridge-domain-id and + * 5 is aging time in minutes): + * @cliexcmd{set bridge-domain mac-age 200 5} + * Example of how to disable mac aging (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain flood 200 0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bd_mac_age_cli, static) = { + .path = "set bridge-domain mac-age", + .short_help = "set bridge-domain mac-age <bridge-domain-id> <mins>", + .function = bd_mac_age, +}; +/* *INDENT-ON* */ + +/*? + * Modify whether or not an existing bridge-domain should terminate and respond + * to ARP Requests. ARP Termination is disabled by default. + * + * @cliexpar + * Example of how to enable ARP termination (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain arp term 200} + * Example of how to disable ARP termination (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain arp term 200 disable} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bd_arp_term_cli, static) = { + .path = "set bridge-domain arp term", + .short_help = "set bridge-domain arp term <bridge-domain-id> [disable]", + .function = bd_arp_term, +}; +/* *INDENT-ON* */ + + +/** + * Add/delete IP address to MAC address mapping. + * + * The clib hash implementation stores uword entries in the hash table. + * The hash table mac_by_ip4 is keyed via IP4 address and store the + * 6-byte MAC address directly in the hash table entry uword. + * + * @warning This only works for 64-bit processor with 8-byte uword; + * which means this code *WILL NOT WORK* for a 32-bit prcessor with + * 4-byte uword. + */ +u32 +bd_add_del_ip_mac (u32 bd_index, + u8 * ip_addr, u8 * mac_addr, u8 is_ip6, u8 is_add) +{ + l2_bridge_domain_t *bd_cfg = l2input_bd_config (bd_index); + u64 new_mac = *(u64 *) mac_addr; + u64 *old_mac; + u16 *mac16 = (u16 *) & new_mac; + + ASSERT (sizeof (uword) == sizeof (u64)); /* make sure uword is 8 bytes */ + ASSERT (bd_is_valid (bd_cfg)); + + mac16[3] = 0; /* Clear last 2 unsed bytes of the 8-byte MAC address */ + if (is_ip6) + { + ip6_address_t *ip6_addr_key; + hash_pair_t *hp; + old_mac = (u64 *) hash_get_mem (bd_cfg->mac_by_ip6, ip_addr); + if (is_add) + { + if (old_mac == 0) + { /* new entry - allocate and craete ip6 address key */ + ip6_addr_key = clib_mem_alloc (sizeof (ip6_address_t)); + clib_memcpy (ip6_addr_key, ip_addr, sizeof (ip6_address_t)); + } + else if (*old_mac == new_mac) + { /* same mac entry already exist for ip6 address */ + return 0; + } + else + { /* updat mac for ip6 address */ + hp = hash_get_pair (bd_cfg->mac_by_ip6, ip_addr); + ip6_addr_key = (ip6_address_t *) hp->key; + } + hash_set_mem (bd_cfg->mac_by_ip6, ip6_addr_key, new_mac); + } + else + { + if (old_mac && (*old_mac == new_mac)) + { + hp = hash_get_pair (bd_cfg->mac_by_ip6, ip_addr); + ip6_addr_key = (ip6_address_t *) hp->key; + hash_unset_mem (bd_cfg->mac_by_ip6, ip_addr); + clib_mem_free (ip6_addr_key); + } + else + return 1; + } + } + else + { + ip4_address_t ip4_addr = *(ip4_address_t *) ip_addr; + old_mac = (u64 *) hash_get (bd_cfg->mac_by_ip4, ip4_addr.as_u32); + if (is_add) + { + if (old_mac && (*old_mac == new_mac)) + return 0; /* mac entry already exist */ + hash_set (bd_cfg->mac_by_ip4, ip4_addr.as_u32, new_mac); + } + else + { + if (old_mac && (*old_mac == new_mac)) + hash_unset (bd_cfg->mac_by_ip4, ip4_addr.as_u32); + else + return 1; + } + } + return 0; +} + +/** + Set bridge-domain arp entry add/delete. + The CLI format is: + set bridge-domain arp entry <bridge-domain-id> <ip-addr> <mac-addr> [del] +*/ +static clib_error_t * +bd_arp_entry (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index, bd_id; + u8 is_add = 1; + u8 is_ip6 = 0; + u8 ip_addr[16]; + u8 mac_addr[6]; + uword *p; + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + + if (p) + bd_index = *p; + else + return clib_error_return (0, "No such bridge domain %d", bd_id); + + if (unformat (input, "%U", unformat_ip4_address, ip_addr)) + { + is_ip6 = 0; + } + else if (unformat (input, "%U", unformat_ip6_address, ip_addr)) + { + is_ip6 = 1; + } + else + { + error = clib_error_return (0, "expecting IP address but got `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%U", unformat_ethernet_address, mac_addr)) + { + error = clib_error_return (0, "expecting MAC address but got `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "del")) + { + is_add = 0; + } + + /* set the bridge domain flagAdd IP-MAC entry into bridge domain */ + if (bd_add_del_ip_mac (bd_index, ip_addr, mac_addr, is_ip6, is_add)) + { + error = clib_error_return (0, "MAC %s for IP %U and MAC %U failed", + is_add ? "add" : "del", + is_ip6 ? + format_ip4_address : format_ip6_address, + ip_addr, format_ethernet_address, mac_addr); + } + +done: + return error; +} + +/*? + * Add an ARP entry to an existing bridge-domain. + * + * @cliexpar + * Example of how to add an ARP entry (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a} + * Example of how to delete an ARP entry (where 200 is the bridge-domain-id): + * @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a del} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = { + .path = "set bridge-domain arp entry", + .short_help = "set bridge-domain arp entry <bridge-domain-id> <ip-addr> <mac-addr> [del]", + .function = bd_arp_entry, +}; +/* *INDENT-ON* */ + +u8 * +format_vtr (u8 * s, va_list * args) +{ + u32 vtr_op = va_arg (*args, u32); + u32 dot1q = va_arg (*args, u32); + u32 tag1 = va_arg (*args, u32); + u32 tag2 = va_arg (*args, u32); + switch (vtr_op) + { + case L2_VTR_DISABLED: + return format (s, "none"); + case L2_VTR_PUSH_1: + return format (s, "push-1 %s %d", dot1q ? "dot1q" : "dot1ad", tag1); + case L2_VTR_PUSH_2: + return format (s, "push-2 %s %d %d", dot1q ? "dot1q" : "dot1ad", tag1, + tag2); + case L2_VTR_POP_1: + return format (s, "pop-1"); + case L2_VTR_POP_2: + return format (s, "pop-2"); + case L2_VTR_TRANSLATE_1_1: + return format (s, "trans-1-1 %s %d", dot1q ? "dot1q" : "dot1ad", tag1); + case L2_VTR_TRANSLATE_1_2: + return format (s, "trans-1-2 %s %d %d", dot1q ? "dot1q" : "dot1ad", + tag1, tag2); + case L2_VTR_TRANSLATE_2_1: + return format (s, "trans-2-1 %s %d", dot1q ? "dot1q" : "dot1ad", tag1); + case L2_VTR_TRANSLATE_2_2: + return format (s, "trans-2-2 %s %d %d", dot1q ? "dot1q" : "dot1ad", + tag1, tag2); + default: + return format (s, "none"); + } +} + +/** + Show bridge-domain state. + The CLI format is: + show bridge-domain [<bd_index>] +*/ +static clib_error_t * +bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index = ~0; + l2_bridge_domain_t *bd_config; + u32 start, end; + u32 detail = 0; + u32 intf = 0; + u32 arp = 0; + u32 bd_tag = 0; + u32 bd_id = ~0; + uword *p; + + start = 1; + end = vec_len (l2input_main.bd_configs); + + if (unformat (input, "%d", &bd_id)) + { + if (unformat (input, "detail")) + detail = 1; + else if (unformat (input, "det")) + detail = 1; + if (unformat (input, "int")) + intf = 1; + if (unformat (input, "arp")) + arp = 1; + if (unformat (input, "bd-tag")) + bd_tag = 1; + + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + bd_index = *p; + else + return clib_error_return (0, "No such bridge domain %d", bd_id); + + vec_validate (l2input_main.bd_configs, bd_index); + bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); + if (bd_is_valid (bd_config)) + { + start = bd_index; + end = start + 1; + } + else + { + vlib_cli_output (vm, "bridge-domain %d not in use", bd_id); + goto done; + } + } + + /* Show all bridge-domains that have been initialized */ + u32 printed = 0; + u8 *as = 0; + for (bd_index = start; bd_index < end; bd_index++) + { + bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); + if (bd_is_valid (bd_config)) + { + if (!printed) + { + printed = 1; + vlib_cli_output (vm, + "%=8s %=7s %=4s %=9s %=9s %=9s %=9s %=9s %=9s %=9s", + "BD-ID", "Index", "BSN", "Age(min)", + "Learning", "U-Forwrd", "UU-Flood", "Flooding", + "ARP-Term", "BVI-Intf"); + } + + if (bd_config->mac_age) + as = format (as, "%d", bd_config->mac_age); + else + as = format (as, "off"); + vlib_cli_output (vm, + "%=8d %=7d %=4d %=9v %=9s %=9s %=9s %=9s %=9s %=9U", + bd_config->bd_id, bd_index, bd_config->seq_num, as, + bd_config->feature_bitmap & L2INPUT_FEAT_LEARN ? + "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_FWD ? + "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD ? + "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD ? + "on" : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM ? + "on" : "off", + format_vnet_sw_if_index_name_with_NA, + vnm, bd_config->bvi_sw_if_index); + vec_reset_length (as); + + if (detail || intf) + { + /* Show all member interfaces */ + int i; + vec_foreach_index (i, bd_config->members) + { + l2_flood_member_t *member = + vec_elt_at_index (bd_config->members, i); + u8 swif_seq_num = *l2fib_swif_seq_num (member->sw_if_index); + u32 vtr_opr, dot1q, tag1, tag2; + if (i == 0) + { + vlib_cli_output (vm, "\n%=30s%=7s%=5s%=5s%=5s%=9s%=30s", + "Interface", "If-idx", "ISN", "SHG", + "BVI", "TxFlood", "VLAN-Tag-Rewrite"); + } + l2vtr_get (vm, vnm, member->sw_if_index, &vtr_opr, &dot1q, + &tag1, &tag2); + vlib_cli_output (vm, "%=30U%=7d%=5d%=5d%=5s%=9s%=30U", + format_vnet_sw_if_index_name, vnm, + member->sw_if_index, member->sw_if_index, + swif_seq_num, member->shg, + member->flags & L2_FLOOD_MEMBER_BVI ? "*" : + "-", i < bd_config->flood_count ? "*" : "-", + format_vtr, vtr_opr, dot1q, tag1, tag2); + } + } + + if ((detail || arp) && + (bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM)) + { + u32 ip4_addr; + ip6_address_t *ip6_addr; + u64 mac_addr; + vlib_cli_output (vm, + "\n IP4/IP6 to MAC table for ARP Termination"); + + /* *INDENT-OFF* */ + hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4, + ({ + vlib_cli_output (vm, "%=40U => %=20U", + format_ip4_address, &ip4_addr, + format_ethernet_address, &mac_addr); + })); + + hash_foreach_mem (ip6_addr, mac_addr, bd_config->mac_by_ip6, + ({ + vlib_cli_output (vm, "%=40U => %=20U", + format_ip6_address, ip6_addr, + format_ethernet_address, &mac_addr); + })); + /* *INDENT-ON* */ + } + + if ((detail || bd_tag) && (bd_config->bd_tag)) + { + vlib_cli_output (vm, "\n BD-Tag: %s", bd_config->bd_tag); + + } + } + } + vec_free (as); + + if (!printed) + { + vlib_cli_output (vm, "no bridge-domains in use"); + } + +done: + return error; +} + +/*? + * Show a summary of all the bridge-domain instances or detailed view of a + * single bridge-domain. Bridge-domains are created by adding an interface + * to a bridge using the '<em>set interface l2 bridge</em>' command. + * + * @cliexpar + * @parblock + * Example of displaying all bridge-domains: + * @cliexstart{show bridge-domain} + * ID Index Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf + * 0 0 off off off off off local0 + * 200 1 on on on on off N/A + * @cliexend + * + * Example of displaying details of a single bridge-domains: + * @cliexstart{show bridge-domain 200 detail} + * ID Index Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf + * 200 1 on on on on off N/A + * + * Interface Index SHG BVI VLAN-Tag-Rewrite + * GigabitEthernet0/8/0.200 3 0 - none + * GigabitEthernet0/9/0.200 4 0 - none + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bd_show_cli, static) = { + .path = "show bridge-domain", + .short_help = "show bridge-domain [bridge-domain-id [detail|int|arp|bd-tag]]", + .function = bd_show, +}; +/* *INDENT-ON* */ + +int +bd_add_del (l2_bridge_domain_add_del_args_t * a) +{ + bd_main_t *bdm = &bd_main; + vlib_main_t *vm = bdm->vlib_main; + int rv = 0; + + u32 bd_index = bd_find_index (bdm, a->bd_id); + if (a->is_add) + { + if (bd_index != ~0) + return VNET_API_ERROR_BD_ALREADY_EXISTS; + if (a->bd_id > L2_BD_ID_MAX) + return VNET_API_ERROR_BD_ID_EXCEED_MAX; + bd_index = bd_add_bd_index (bdm, a->bd_id); + + u32 enable_flags = 0, disable_flags = 0; + if (a->flood) + enable_flags |= L2_FLOOD; + else + disable_flags |= L2_FLOOD; + + if (a->uu_flood) + enable_flags |= L2_UU_FLOOD; + else + disable_flags |= L2_UU_FLOOD; + + if (a->forward) + enable_flags |= L2_FWD; + else + disable_flags |= L2_FWD; + + if (a->learn) + enable_flags |= L2_LEARN; + else + disable_flags |= L2_LEARN; + + if (a->arp_term) + enable_flags |= L2_ARP_TERM; + else + disable_flags |= L2_ARP_TERM; + + if (enable_flags) + bd_set_flags (vm, bd_index, enable_flags, 1 /* enable */ ); + + if (disable_flags) + bd_set_flags (vm, bd_index, disable_flags, 0 /* disable */ ); + + bd_set_mac_age (vm, bd_index, a->mac_age); + + if (a->bd_tag) + bd_set_bd_tag (vm, bd_index, a->bd_tag); + + } + else + { + if (bd_index == ~0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + if (bd_index == 0) + return VNET_API_ERROR_BD_NOT_MODIFIABLE; + if (vec_len (l2input_main.bd_configs[bd_index].members)) + return VNET_API_ERROR_BD_IN_USE; + rv = bd_delete (bdm, bd_index); + } + + return rv; +} + +/** + Create or delete bridge-domain. + The CLI format: + create bridge-domain <bd_index> [learn <0|1>] [forward <0|1>] [uu-flood <0|1>] [flood <0|1>] + [arp-term <0|1>] [mac-age <nn>] [bd-tag <tag>] [del] +*/ + +static clib_error_t * +bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + u8 is_add = 1; + u32 bd_id = ~0; + u32 flood = 1, forward = 1, learn = 1, uu_flood = 1, arp_term = 0; + u32 mac_age = 0; + u8 *bd_tag = NULL; + l2_bridge_domain_add_del_args_t _a, *a = &_a; + int rv; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &bd_id)) + ; + else if (unformat (line_input, "flood %d", &flood)) + ; + else if (unformat (line_input, "uu-flood %d", &uu_flood)) + ; + else if (unformat (line_input, "forward %d", &forward)) + ; + else if (unformat (line_input, "learn %d", &learn)) + ; + else if (unformat (line_input, "arp-term %d", &arp_term)) + ; + else if (unformat (line_input, "mac-age %d", &mac_age)) + ; + else if (unformat (line_input, "bd-tag %s", &bd_tag)) + ; + else if (unformat (line_input, "del")) + { + is_add = 0; + flood = uu_flood = forward = learn = 0; + } + else + break; + } + + if (bd_id == ~0) + { + error = clib_error_return (0, "bridge-domain-id not specified"); + goto done; + } + + if (bd_id == 0) + { + error = clib_error_return (0, "bridge domain 0 can not be modified"); + goto done; + } + + if (mac_age > 255) + { + error = clib_error_return (0, "mac age must be less than 256"); + goto done; + } + if ((bd_tag) && (strlen ((char *) bd_tag) > 63)) + { + error = clib_error_return (0, "bd-tag cannot be longer than 63"); + goto done; + } + + memset (a, 0, sizeof (*a)); + a->is_add = is_add; + a->bd_id = bd_id; + a->flood = (u8) flood; + a->uu_flood = (u8) uu_flood; + a->forward = (u8) forward; + a->learn = (u8) learn; + a->arp_term = (u8) arp_term; + a->mac_age = (u8) mac_age; + a->bd_tag = bd_tag; + + rv = bd_add_del (a); + + switch (rv) + { + case 0: + if (is_add) + vlib_cli_output (vm, "bridge-domain %d", bd_id); + break; + case VNET_API_ERROR_BD_IN_USE: + error = clib_error_return (0, "bridge domain in use - remove members"); + goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "bridge domain ID does not exist"); + goto done; + case VNET_API_ERROR_BD_NOT_MODIFIABLE: + error = clib_error_return (0, "bridge domain 0 can not be modified"); + goto done; + case VNET_API_ERROR_BD_ID_EXCEED_MAX: + error = clib_error_return (0, "bridge domain ID exceed 16M limit"); + goto done; + default: + error = clib_error_return (0, "bd_add_del returned %d", rv); + goto done; + } + +done: + vec_free (bd_tag); + unformat_free (line_input); + + return error; +} + + +/*? + * Create/Delete bridge-domain instance + * + * @cliexpar + * @parblock + * Example of creating bridge-domain 1: + * @cliexstart{create bridge-domain 1} + * bridge-domain 1 + * @cliexend + * + * Example of creating bridge-domain 2 with enabling arp-term, mac-age 60: + * @cliexstart{create bridge-domain 2 arp-term 1 mac-age 60} + * bridge-domain 2 + * + * vpp# show bridge-domain + * ID Index BSN Age(min) Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf + * 0 0 0 off off off off off off local0 + * 1 1 0 off on on off on off N/A + * 2 2 0 60 on on off on on N/A + * + * @cliexend + * + * Example of delete bridge-domain 1: + * @cliexstart{create bridge-domain 1 del} + * @cliexend + * @endparblock +?*/ + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bd_create_cli, static) = { + .path = "create bridge-domain", + .short_help = "create bridge-domain <bridge-domain-id>" + " [learn <0|1>] [forward <0|1>] [uu-flood <0|1>] [flood <0|1>] [arp-term <0|1>]" + " [mac-age <nn>] [bd-tag <tag>] [del]", + .function = bd_add_del_command_fn, +}; +/* *INDENT-ON* */ + + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h new file mode 100644 index 00000000..fd34ae67 --- /dev/null +++ b/src/vnet/l2/l2_bd.h @@ -0,0 +1,190 @@ +/* + * l2_bd.h : layer 2 bridge domain + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2bd_h +#define included_l2bd_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +typedef struct +{ + /* hash bd_id -> bd_index */ + uword *bd_index_by_bd_id; + + /* Busy bd_index bitmap */ + uword *bd_index_bitmap; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} bd_main_t; + +extern bd_main_t bd_main; + +/* Bridge domain member */ + +#define L2_FLOOD_MEMBER_NORMAL 0 +#define L2_FLOOD_MEMBER_BVI 1 + +typedef struct +{ + u32 sw_if_index; /* the output L2 interface */ + u8 flags; /* 0=normal, 1=bvi */ + u8 shg; /* split horizon group number */ + u16 spare; +} l2_flood_member_t; + +/* Per-bridge domain configuration */ + +typedef struct +{ + u32 feature_bitmap; + /* + * Contains bit enables for flooding, learning, and forwarding. + * All other feature bits should always be set. + * + * identity of the bridge-domain's BVI interface + * set to ~0 if there is no BVI + */ + u32 bvi_sw_if_index; + + /* bridge domain id, not to be confused with bd_index */ + u32 bd_id; + + /* Vector of member ports */ + l2_flood_member_t *members; + + /* First flood_count member ports are flooded */ + u32 flood_count; + + /* Tunnel Master (Multicast vxlan) are always flooded */ + u32 tun_master_count; + + /* Tunnels (Unicast vxlan) are flooded if there are no masters */ + u32 tun_normal_count; + + /* hash ip4/ip6 -> mac for arp/nd termination */ + uword *mac_by_ip4; + uword *mac_by_ip6; + + /* mac aging */ + u8 mac_age; + + /* sequence number for bridge domain based flush of MACs */ + u8 seq_num; + + /* Bridge domain tag (C string NULL terminated) */ + u8 *bd_tag; + +} l2_bridge_domain_t; + +/* Limit Bridge Domain ID to 24 bits to match 24-bit VNI range */ +#define L2_BD_ID_MAX ((1<<24)-1) + +typedef struct +{ + u32 bd_id; + u8 flood; + u8 uu_flood; + u8 forward; + u8 learn; + u8 arp_term; + u8 mac_age; + u8 *bd_tag; + u8 is_add; +} l2_bridge_domain_add_del_args_t; + +/* Return 1 if bridge domain has been initialized */ +always_inline u32 +bd_is_valid (l2_bridge_domain_t * bd_config) +{ + return (bd_config->feature_bitmap != 0); +} + +/* Init bridge domain if not done already */ +void bd_validate (l2_bridge_domain_t * bd_config); + + +void +bd_add_member (l2_bridge_domain_t * bd_config, l2_flood_member_t * member); + +u32 bd_remove_member (l2_bridge_domain_t * bd_config, u32 sw_if_index); + + +#define L2_LEARN (1<<0) +#define L2_FWD (1<<1) +#define L2_FLOOD (1<<2) +#define L2_UU_FLOOD (1<<3) +#define L2_ARP_TERM (1<<4) + +u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable); +void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age); +int bd_add_del (l2_bridge_domain_add_del_args_t * args); + +/** + * \brief Get a bridge domain. + * + * Get a bridge domain with the given bridge domain ID. + * + * \param bdm bd_main pointer. + * \param bd_id The bridge domain ID + * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector. + */ +u32 bd_find_index (bd_main_t * bdm, u32 bd_id); + +/** + * \brief Create a bridge domain. + * + * Create a bridge domain with the given bridge domain ID + * + * \param bdm bd_main pointer. + * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector. + */ +u32 bd_add_bd_index (bd_main_t * bdm, u32 bd_id); + +/** + * \brief Get or create a bridge domain. + * + * Get a bridge domain with the given bridge domain ID, if one exists, otherwise + * create one with the given ID, or the first unused ID if the given ID is ~0.. + * + * \param bdm bd_main pointer. + * \param bd_id The bridge domain ID + * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector. + */ +static inline u32 +bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id) +{ + u32 bd_index = bd_find_index (bdm, bd_id); + if (bd_index == ~0) + return bd_add_bd_index (bdm, bd_id); + return bd_index; +} + +u32 bd_add_del_ip_mac (u32 bd_index, + u8 * ip_addr, u8 * mac_addr, u8 is_ip6, u8 is_add); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_bvi.c b/src/vnet/l2/l2_bvi.c new file mode 100644 index 00000000..f239743a --- /dev/null +++ b/src/vnet/l2/l2_bvi.c @@ -0,0 +1,40 @@ +/* + * l2_bvi.c : layer 2 Bridged Virtual Interface + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/l2_fwd.h> +#include <vnet/l2/l2_flood.h> +#include <vnet/l2/l2_bvi.h> + + +/* Call the L2 nodes that need the ethertype mapping */ +void +l2bvi_register_input_type (vlib_main_t * vm, + ethernet_type_t type, u32 node_index) +{ + l2fwd_register_input_type (vm, type, node_index); + l2flood_register_input_type (vm, type, node_index); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h new file mode 100644 index 00000000..662ec402 --- /dev/null +++ b/src/vnet/l2/l2_bvi.h @@ -0,0 +1,117 @@ +/* + * l2_bvi.h : layer 2 Bridged Virtual Interface + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2bvi_h +#define included_l2bvi_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/sparse_vec.h> + +#include <vnet/l2/l2_input.h> + +#define TO_BVI_ERR_OK 0 +#define TO_BVI_ERR_BAD_MAC 1 +#define TO_BVI_ERR_ETHERTYPE 2 + +/** + * Send a packet from L2 processing to L3 via the BVI interface. + * Set next0 to the proper L3 input node. + * Return an error if the packet isn't what we expect. + */ + +static_always_inline u32 +l2_to_bvi (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + vlib_buffer_t * b0, + u32 bvi_sw_if_index, next_by_ethertype_t * l3_next, u32 * next0) +{ + u8 l2_len; + u16 ethertype; + u8 *l3h; + ethernet_header_t *e0; + vnet_hw_interface_t *hi; + + e0 = vlib_buffer_get_current (b0); + hi = vnet_get_sup_hw_interface (vnet_main, bvi_sw_if_index); + + /* Perform L3 my-mac filter */ + if ((!ethernet_address_cast (e0->dst_address)) && + (!eth_mac_equal ((u8 *) e0, hi->hw_address))) + { + return TO_BVI_ERR_BAD_MAC; + } + + /* Save L2 header position which may be changed due to packet replication */ + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; + + /* Strip L2 header */ + l2_len = vnet_buffer (b0)->l2.l2_len; + vlib_buffer_advance (b0, l2_len); + + l3h = vlib_buffer_get_current (b0); + ethertype = clib_net_to_host_u16 (*(u16 *) (l3h - 2)); + + /* Set the input interface to be the BVI interface */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0; + + /* Go to appropriate L3 input node */ + if (ethertype == ETHERNET_TYPE_IP4) + { + *next0 = l3_next->input_next_ip4; + } + else if (ethertype == ETHERNET_TYPE_IP6) + { + *next0 = l3_next->input_next_ip6; + } + else + { + /* uncommon ethertype, check table */ + u32 i0; + + i0 = sparse_vec_index (l3_next->input_next_by_type, ethertype); + *next0 = vec_elt (l3_next->input_next_by_type, i0); + + if (i0 == SPARSE_VEC_INVALID_INDEX) + { + return TO_BVI_ERR_ETHERTYPE; + } + } + + /* increment BVI RX interface stat */ + vlib_increment_combined_counter + (vnet_main->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + vlib_main->thread_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX], + 1, vlib_buffer_length_in_chain (vlib_main, b0)); + return TO_BVI_ERR_OK; +} + +void +l2bvi_register_input_type (vlib_main_t * vm, + ethernet_type_t type, u32 node_index); +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_classify.h b/src/vnet/l2/l2_classify.h new file mode 100644 index 00000000..100c584a --- /dev/null +++ b/src/vnet/l2/l2_classify.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_vnet_l2_input_classify_h__ +#define __included_vnet_l2_input_classify_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/l2/feat_bitmap.h> +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#include <vnet/classify/vnet_classify.h> + +typedef enum +{ + L2_INPUT_CLASSIFY_NEXT_DROP, + L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT, + L2_INPUT_CLASSIFY_NEXT_IP4_INPUT, + L2_INPUT_CLASSIFY_NEXT_IP6_INPUT, + L2_INPUT_CLASSIFY_NEXT_LI, + L2_INPUT_CLASSIFY_N_NEXT, +} l2_input_classify_next_t; + +typedef enum +{ + L2_INPUT_CLASSIFY_TABLE_IP4, + L2_INPUT_CLASSIFY_TABLE_IP6, + L2_INPUT_CLASSIFY_TABLE_OTHER, + L2_INPUT_CLASSIFY_N_TABLES, +} l2_input_classify_table_id_t; + +typedef enum +{ + L2_OUTPUT_CLASSIFY_NEXT_DROP, + L2_OUTPUT_CLASSIFY_N_NEXT, +} l2_output_classify_next_t; + +typedef enum +{ + L2_OUTPUT_CLASSIFY_TABLE_IP4, + L2_OUTPUT_CLASSIFY_TABLE_IP6, + L2_OUTPUT_CLASSIFY_TABLE_OTHER, + L2_OUTPUT_CLASSIFY_N_TABLES, +} l2_output_classify_table_id_t; + +typedef struct _l2_classify_main +{ + /* Next nodes for L2 input and output features */ + u32 l2_inp_feat_next[32]; + u32 l2_out_feat_next[32]; + + /* Per-address-family classifier table vectors */ + u32 *classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_N_TABLES]; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + vnet_classify_main_t *vnet_classify_main; +} l2_input_classify_main_t; + +typedef struct _l2_classify_main l2_output_classify_main_t; + +extern l2_input_classify_main_t l2_input_classify_main; +extern vlib_node_registration_t l2_input_classify_node; + +extern l2_output_classify_main_t l2_output_classify_main; +extern vlib_node_registration_t l2_output_classify_node; + +void vnet_l2_input_classify_enable_disable (u32 sw_if_index, + int enable_disable); + +int vnet_l2_input_classify_set_tables (u32 sw_if_index, u32 ip4_table_index, + u32 ip6_table_index, + u32 other_table_index); + +void vnet_l2_output_classify_enable_disable (u32 sw_if_index, + int enable_disable); + +int vnet_l2_output_classify_set_tables (u32 sw_if_index, u32 ip4_table_index, + u32 ip6_table_index, + u32 other_table_index); + +#endif /* __included_vnet_l2_input_classify_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_efp_filter.c b/src/vnet/l2/l2_efp_filter.c new file mode 100644 index 00000000..faf78153 --- /dev/null +++ b/src/vnet/l2/l2_efp_filter.c @@ -0,0 +1,575 @@ +/* + * l2_efp_filter.c : layer 2 egress EFP Filter processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_output.h> +#include <vnet/ethernet/ethernet.h> + +#include <vppinfra/error.h> +#include <vppinfra/cache.h> + +/** + * @file + * @brief EFP-filter - Ethernet Flow Point Filter. + * + * It is possible to transmit a packet out a subinterface with VLAN tags + * that are not compatible with that subinterface. In other words, if that + * packet arrived on the output port, it would not be classified as coming + * from the output subinterface. This can happen in various ways: through + * misconfiguration, by putting subinterfaces with different VLAN encaps in + * the same bridge-domain, etc. The EFP Filter Check detects such packets + * and drops them. It consists of two checks, one that verifies the packet + * prior to output VLAN tag rewrite and one that verifies the packet after + * VLAN tag rewrite. + * + */ +typedef struct +{ + /* Next nodes for L2 output features */ + u32 l2_out_feat_next[32]; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2_efp_filter_main_t; + + +typedef struct +{ + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u8 raw[12]; /* raw data (vlans) */ + u32 sw_if_index; +} l2_efp_filter_trace_t; + +/* packet trace format function */ +static u8 * +format_l2_efp_filter_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_efp_filter_trace_t *t = va_arg (*args, l2_efp_filter_trace_t *); + + s = format (s, "l2-output-vtr: sw_if_index %d dst %U src %U data " + "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4], + t->raw[5], t->raw[6], t->raw[7], t->raw[8], t->raw[9], + t->raw[10], t->raw[11]); + return s; +} + +l2_efp_filter_main_t l2_efp_filter_main; + +static vlib_node_registration_t l2_efp_filter_node; + +#define foreach_l2_efp_filter_error \ +_(L2_EFP_FILTER, "L2 EFP filter packets") \ +_(DROP, "L2 EFP filter post-rewrite drops") + +typedef enum +{ +#define _(sym,str) L2_EFP_FILTER_ERROR_##sym, + foreach_l2_efp_filter_error +#undef _ + L2_EFP_FILTER_N_ERROR, +} l2_efp_filter_error_t; + +static char *l2_efp_filter_error_strings[] = { +#define _(sym,string) string, + foreach_l2_efp_filter_error +#undef _ +}; + +typedef enum +{ + L2_EFP_FILTER_NEXT_DROP, + L2_EFP_FILTER_N_NEXT, +} l2_efp_filter_next_t; + + +/** + * Extract fields from the packet that will be used in interface + * classification. + */ +static_always_inline void +extract_keys (vnet_main_t * vnet_main, + u32 sw_if_index0, + vlib_buffer_t * b0, + u32 * port_sw_if_index0, + u16 * first_ethertype0, + u16 * outer_id0, u16 * inner_id0, u32 * match_flags0) +{ + ethernet_header_t *e0; + ethernet_vlan_header_t *h0; + u32 tag_len; + u32 tag_num; + + *port_sw_if_index0 = + vnet_get_sup_sw_interface (vnet_main, sw_if_index0)->sw_if_index; + + e0 = vlib_buffer_get_current (b0); + h0 = (ethernet_vlan_header_t *) (e0 + 1); + + *first_ethertype0 = clib_net_to_host_u16 (e0->type); + *outer_id0 = clib_net_to_host_u16 (h0[0].priority_cfi_and_id); + *inner_id0 = clib_net_to_host_u16 (h0[1].priority_cfi_and_id); + + tag_len = vnet_buffer (b0)->l2.l2_len - sizeof (ethernet_header_t); + tag_num = tag_len / sizeof (ethernet_vlan_header_t); + *match_flags0 = eth_create_valid_subint_match_flags (tag_num); +} + +/* + * EFP filtering is a basic switch feature which prevents an interface from + * transmitting a packet that doesn't match the interface's ingress match + * criteria. The check has two parts, one performed before egress vlan tag + * rewrite and one after. + * + * The pre-rewrite check insures the packet matches what an ingress packet looks + * like after going through the interface's ingress tag rewrite operation. Only + * pushed tags are compared. So: + * - if the ingress vlan tag rewrite pushes no tags (or is not enabled), + * any packet passes the filter + * - if the ingress vlan tag rewrite pushes one tag, + * the packet must have at least one tag, and the outer tag must match the pushed tag + * - if the ingress vlan tag rewrite pushes two tags, + * the packet must have at least two tags, and the outer two tags must match the pushed tags + * + * The pre-rewrite check is performed in the l2-output node. + * + * The post-rewrite check insures the packet matches what an ingress packet looks + * like before going through the interface's ingress tag rewrite operation. It verifies + * that such a packet arriving on the wire at this port would be classified as arriving + * an input interface equal to the packet's output interface. This can be done by running + * the output packet's vlan tags and output port through the interface classification, + * and checking if the resulting interface matches the output interface. + * + * The post-rewrite check is performed here. + */ + +static uword +l2_efp_filter_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2_efp_filter_next_t next_index; + l2_efp_filter_main_t *msm = &l2_efp_filter_main; + vlib_node_t *n = vlib_get_node (vm, l2_efp_filter_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + u16 first_ethertype0, first_ethertype1; + u16 outer_id0, inner_id0, outer_id1, inner_id1; + u32 match_flags0, match_flags1; + u32 port_sw_if_index0, subint_sw_if_index0, port_sw_if_index1, + subint_sw_if_index1; + vnet_hw_interface_t *hi0, *hi1; + main_intf_t *main_intf0, *main_intf1; + vlan_intf_t *vlan_intf0, *vlan_intf1; + qinq_intf_t *qinq_intf0, *qinq_intf1; + u32 is_l20, is_l21; + __attribute__ ((unused)) u32 matched0, matched1; + u8 error0, error1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3, *p4, *p5; + __attribute__ ((unused)) u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + + /* + * Prefetch the input config for the N+1 loop iteration + * This depends on the buffer header above + */ + sw_if_index2 = vnet_buffer (p2)->sw_if_index[VLIB_TX]; + sw_if_index3 = vnet_buffer (p3)->sw_if_index[VLIB_TX]; + /* + * $$$ TODO + * CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index2), CLIB_CACHE_LINE_BYTES, LOAD); + * CLIB_PREFETCH (vec_elt_at_index(l2output_main.configs, sw_if_index3), CLIB_CACHE_LINE_BYTES, LOAD); + */ + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* TX interface handles */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX]; + + /* process 2 packets */ + em->counters[node_counter_base_index + + L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 2; + + /* Determine next node */ + next0 = vnet_l2_feature_next (b0, msm->l2_out_feat_next, + L2OUTPUT_FEAT_EFP_FILTER); + next1 = vnet_l2_feature_next (b1, msm->l2_out_feat_next, + L2OUTPUT_FEAT_EFP_FILTER); + + /* perform the efp filter check on two packets */ + + extract_keys (msm->vnet_main, + sw_if_index0, + b0, + &port_sw_if_index0, + &first_ethertype0, + &outer_id0, &inner_id0, &match_flags0); + + extract_keys (msm->vnet_main, + sw_if_index1, + b1, + &port_sw_if_index1, + &first_ethertype1, + &outer_id1, &inner_id1, &match_flags1); + + eth_vlan_table_lookups (ðernet_main, + msm->vnet_main, + port_sw_if_index0, + first_ethertype0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, &vlan_intf0, &qinq_intf0); + + eth_vlan_table_lookups (ðernet_main, + msm->vnet_main, + port_sw_if_index1, + first_ethertype1, + outer_id1, + inner_id1, + &hi1, + &main_intf1, &vlan_intf1, &qinq_intf1); + + matched0 = eth_identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, + &subint_sw_if_index0, + &error0, &is_l20); + + matched1 = eth_identify_subint (hi1, + b1, + match_flags1, + main_intf1, + vlan_intf1, + qinq_intf1, + &subint_sw_if_index1, + &error1, &is_l21); + + if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0)) + { + /* Drop packet */ + next0 = L2_EFP_FILTER_NEXT_DROP; + b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP]; + } + + if (PREDICT_FALSE (sw_if_index1 != subint_sw_if_index1)) + { + /* Drop packet */ + next1 = L2_EFP_FILTER_NEXT_DROP; + b1->error = node->errors[L2_EFP_FILTER_ERROR_DROP]; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h0 = vlib_buffer_get_current (b0); + l2_efp_filter_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + clib_memcpy (t->raw, &h0->type, sizeof (t->raw)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h1 = vlib_buffer_get_current (b1); + l2_efp_filter_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + clib_memcpy (t->src, h1->src_address, 6); + clib_memcpy (t->dst, h1->dst_address, 6); + clib_memcpy (t->raw, &h1->type, sizeof (t->raw)); + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + u16 first_ethertype0; + u16 outer_id0, inner_id0; + u32 match_flags0; + u32 port_sw_if_index0, subint_sw_if_index0; + vnet_hw_interface_t *hi0; + main_intf_t *main_intf0; + vlan_intf_t *vlan_intf0; + qinq_intf_t *qinq_intf0; + u32 is_l20; + __attribute__ ((unused)) u32 matched0; + u8 error0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + + /* process 1 packet */ + em->counters[node_counter_base_index + + L2_EFP_FILTER_ERROR_L2_EFP_FILTER] += 1; + + /* Determine next node */ + next0 = vnet_l2_feature_next (b0, msm->l2_out_feat_next, + L2OUTPUT_FEAT_EFP_FILTER); + + /* perform the efp filter check on one packet */ + + extract_keys (msm->vnet_main, + sw_if_index0, + b0, + &port_sw_if_index0, + &first_ethertype0, + &outer_id0, &inner_id0, &match_flags0); + + eth_vlan_table_lookups (ðernet_main, + msm->vnet_main, + port_sw_if_index0, + first_ethertype0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, &vlan_intf0, &qinq_intf0); + + matched0 = eth_identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, + &subint_sw_if_index0, + &error0, &is_l20); + + if (PREDICT_FALSE (sw_if_index0 != subint_sw_if_index0)) + { + /* Drop packet */ + next0 = L2_EFP_FILTER_NEXT_DROP; + b0->error = node->errors[L2_EFP_FILTER_ERROR_DROP]; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ethernet_header_t *h0 = vlib_buffer_get_current (b0); + l2_efp_filter_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + clib_memcpy (t->raw, &h0->type, sizeof (t->raw)); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_efp_filter_node,static) = { + .function = l2_efp_filter_node_fn, + .name = "l2-efp-filter", + .vector_size = sizeof (u32), + .format_trace = format_l2_efp_filter_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_efp_filter_error_strings), + .error_strings = l2_efp_filter_error_strings, + + .n_next_nodes = L2_EFP_FILTER_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_EFP_FILTER_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2_efp_filter_node, l2_efp_filter_node_fn) + clib_error_t *l2_efp_filter_init (vlib_main_t * vm) +{ + l2_efp_filter_main_t *mp = &l2_efp_filter_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2_efp_filter_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + mp->l2_out_feat_next); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_efp_filter_init); + + +/** Enable/disable the EFP Filter check on the subinterface. */ +void +l2_efp_filter_configure (vnet_main_t * vnet_main, u32 sw_if_index, u32 enable) +{ + /* set the interface flag */ + l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_EFP_FILTER, enable); +} + + +/** + * Set subinterface egress efp filter enable/disable. + * The CLI format is: + * set interface l2 efp-filter <interface> [disable]] + */ +static clib_error_t * +int_l2_efp_filter (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + u32 enable; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) + { + enable = 0; + } + + /* enable/disable the feature */ + l2_efp_filter_configure (vnm, sw_if_index, enable); + +done: + return error; +} + + +/*? + * EFP filtering is a basic switch feature which prevents an interface from + * transmitting a packet that doesn't match the interface's ingress match + * criteria. The check has two parts, one performed before egress vlan tag + * rewrite and one after. This command enables or disables the EFP filtering + * for a given sub-interface. + * + * @cliexpar + * Example of how to enable a Layer 2 efp-filter on a sub-interface: + * @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200} + * Example of how to disable a Layer 2 efp-filter on a sub-interface: + * @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200 disable} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = { + .path = "set interface l2 efp-filter", + .short_help = "set interface l2 efp-filter <interface> [disable]", + .function = int_l2_efp_filter, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_efp_filter.h b/src/vnet/l2/l2_efp_filter.h new file mode 100644 index 00000000..f40851df --- /dev/null +++ b/src/vnet/l2/l2_efp_filter.h @@ -0,0 +1,33 @@ +/* + * l2_efp_filter.h : layer 2 egress EFP Filter processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef included_vnet_l2_efp_filter_h +#define included_vnet_l2_efp_filter_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c new file mode 100644 index 00000000..64b3275b --- /dev/null +++ b/src/vnet/l2/l2_fib.c @@ -0,0 +1,1250 @@ +/* + * l2_fib.c : layer 2 forwarding table (aka mac table) + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_learn.h> +#include <vnet/l2/l2_bd.h> + +#include <vppinfra/bihash_template.c> + +#include <vlibmemory/api.h> +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/** + * @file + * @brief Ethernet MAC Address FIB Table Management. + * + * The MAC Address forwarding table for bridge-domains is called the l2fib. + * Entries are added automatically as part of mac learning, but MAC Addresses + * entries can also be added manually. + * + */ + +l2fib_main_t l2fib_main; + +/** Format sw_if_index. If the value is ~0, use the text "N/A" */ +u8 * +format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args) +{ + vnet_main_t *vnm = va_arg (*args, vnet_main_t *); + u32 sw_if_index = va_arg (*args, u32); + if (sw_if_index == ~0) + return format (s, "N/A"); + + vnet_sw_interface_t *swif = vnet_get_sw_interface_safe (vnm, sw_if_index); + if (!swif) + return format (s, "Stale"); + + return format (s, "%U", format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface_safe (vnm, sw_if_index)); +} + +void +l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, + l2fib_entry_result_t ** l2fe_res) +{ + l2fib_main_t *msm = &l2fib_main; + BVT (clib_bihash) * h = &msm->mac_table; + BVT (clib_bihash_bucket) * b; + BVT (clib_bihash_value) * v; + l2fib_entry_key_t key; + l2fib_entry_result_t result; + int i, j, k; + + for (i = 0; i < h->nbuckets; i++) + { + b = &h->buckets[i]; + if (b->offset == 0) + continue; + v = BV (clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1 << b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + key.raw = v->kvp[k].key; + result.raw = v->kvp[k].value; + + if ((bd_index == ~0) || (bd_index == key.fields.bd_index)) + { + vec_add1 (*l2fe_key, key); + vec_add1 (*l2fe_res, result); + } + } + v++; + } + } +} + +/** Display the contents of the l2fib. */ +static clib_error_t * +show_l2fib (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + l2fib_main_t *msm = &l2fib_main; + l2_bridge_domain_t *bd_config; + BVT (clib_bihash) * h = &msm->mac_table; + BVT (clib_bihash_bucket) * b; + BVT (clib_bihash_value) * v; + l2fib_entry_key_t key; + l2fib_entry_result_t result; + u32 first_entry = 1; + u64 total_entries = 0; + int i, j, k; + u8 verbose = 0; + u8 raw = 0; + u8 learn = 0; + u32 bd_id, bd_index = ~0; + u8 now = (u8) (vlib_time_now (vm) / 60); + u8 *s = 0; + + if (unformat (input, "raw")) + raw = 1; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "bd_index %d", &bd_index)) + verbose = 1; + else if (unformat (input, "learn")) + { + learn = 1; + verbose = 0; + } + else if (unformat (input, "bd_id %d", &bd_id)) + { + uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + { + if (learn == 0) + verbose = 1; + bd_index = p[0]; + } + else + { + vlib_cli_output (vm, "no such bridge domain id"); + return 0; + } + } + + for (i = 0; i < h->nbuckets; i++) + { + b = &h->buckets[i]; + if (b->offset == 0) + continue; + v = BV (clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1 << b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + if ((verbose || learn) && first_entry) + { + first_entry = 0; + vlib_cli_output (vm, + "%=19s%=7s%=7s%=8s%=9s%=7s%=7s%=5s%=30s", + "Mac-Address", "BD-Idx", "If-Idx", + "BSN-ISN", "Age(min)", "static", "filter", + "bvi", "Interface-Name"); + } + + key.raw = v->kvp[k].key; + result.raw = v->kvp[k].value; + + if ((verbose || learn) + & ((bd_index >> 31) || (bd_index == key.fields.bd_index))) + { + if (learn && result.fields.age_not) + { + total_entries++; + continue; /* skip provisioned macs */ + } + + bd_config = vec_elt_at_index (l2input_main.bd_configs, + key.fields.bd_index); + + if (bd_config->mac_age && !result.fields.age_not) + { + i16 delta = now - result.fields.timestamp; + delta += delta < 0 ? 256 : 0; + s = format (s, "%d", delta); + } + else + s = format (s, "-"); + + vlib_cli_output (vm, + "%=19U%=7d%=7d %3d/%-3d%=9v%=7s%=7s%=5s%=30U", + format_ethernet_address, key.fields.mac, + key.fields.bd_index, + result.fields.sw_if_index == ~0 + ? -1 : result.fields.sw_if_index, + result.fields.sn.bd, result.fields.sn.swif, + s, result.fields.static_mac ? "*" : "-", + result.fields.filter ? "*" : "-", + result.fields.bvi ? "*" : "-", + format_vnet_sw_if_index_name_with_NA, + msm->vnet_main, result.fields.sw_if_index); + vec_reset_length (s); + } + total_entries++; + } + v++; + } + } + + if (total_entries == 0) + vlib_cli_output (vm, "no l2fib entries"); + else + { + l2learn_main_t *lm = &l2learn_main; + vlib_cli_output (vm, "L2FIB total/learned entries: %d/%d " + "Last scan time: %.4esec Learn limit: %d ", + total_entries, lm->global_learn_count, + msm->age_scan_duration, lm->global_learn_limit); + if (lm->client_pid) + vlib_cli_output (vm, "L2MAC events client PID: %d " + "Last e-scan time: %.4esec Delay: %.2esec " + "Max macs in event: %d", + lm->client_pid, msm->evt_scan_duration, + msm->event_scan_delay, msm->max_macs_in_event); + } + + if (raw) + vlib_cli_output (vm, "Raw Hash Table:\n%U\n", + BV (format_bihash), h, 1 /* verbose */ ); + + vec_free (s); + return 0; +} + +/*? + * This command dispays the MAC Address entries of the L2 FIB table. + * Output can be filtered to just get the number of MAC Addresses or display + * each MAC Address for all bridge domains or just a single bridge domain. + * + * @cliexpar + * Example of how to display the number of MAC Address entries in the L2 + * FIB table: + * @cliexstart{show l2fib} + * 3 l2fib entries + * @cliexend + * Example of how to display all the MAC Address entries in the L2 + * FIB table: + * @cliexstart{show l2fib verbose} + * Mac Address BD Idx Interface Index static filter bvi refresh timestamp + * 52:54:00:53:18:33 1 GigabitEthernet0/8/0.200 3 0 0 0 0 0 + * 52:54:00:53:18:55 1 GigabitEthernet0/8/0.200 3 1 0 0 0 0 + * 52:54:00:53:18:77 1 N/A -1 1 1 0 0 0 + * 3 l2fib entries + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_l2fib_cli, static) = { + .path = "show l2fib", + .short_help = "show l2fib [verbose | learn | bd_id <nn> | bd_index <nn> | raw", + .function = show_l2fib, +}; +/* *INDENT-ON* */ + + +/* Remove all entries from the l2fib */ +void +l2fib_clear_table (void) +{ + l2fib_main_t *mp = &l2fib_main; + + /* Remove all entries */ + BV (clib_bihash_free) (&mp->mac_table); + BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); + l2learn_main.global_learn_count = 0; +} + +/** Clear all entries in L2FIB. + * @TODO: Later we may want a way to remove only the non-static entries + */ +static clib_error_t * +clear_l2fib (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2fib_clear_table (); + return 0; +} + +/*? + * This command clears all the MAC Address entries from the L2 FIB table. + * + * @cliexpar + * Example of how to clear the L2 FIB Table: + * @cliexcmd{clear l2fib} + * Example to show the L2 FIB Table has been cleared: + * @cliexstart{show l2fib verbose} + * no l2fib entries + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (clear_l2fib_cli, static) = { + .path = "clear l2fib", + .short_help = "clear l2fib", + .function = clear_l2fib, +}; +/* *INDENT-ON* */ + +static inline l2fib_seq_num_t +l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index) +{ + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); + /* *INDENT-OFF* */ + return (l2fib_seq_num_t) { + .swif = *l2fib_swif_seq_num (sw_if_index), + .bd = bd_config->seq_num, + }; + /* *INDENT-ON* */ +} + +/** + * Add an entry to the l2fib. + * If the entry already exists then overwrite it + */ +void +l2fib_add_entry (u64 mac, u32 bd_index, + u32 sw_if_index, u8 static_mac, u8 filter_mac, u8 bvi_mac) +{ + l2fib_entry_key_t key; + l2fib_entry_result_t result; + __attribute__ ((unused)) u32 bucket_contents; + l2fib_main_t *fm = &l2fib_main; + l2learn_main_t *lm = &l2learn_main; + BVT (clib_bihash_kv) kv; + + /* set up key */ + key.raw = l2fib_make_key ((u8 *) & mac, bd_index); + + /* check if entry alread exist */ + if (BV (clib_bihash_search) (&fm->mac_table, &kv, &kv)) + { + /* decrement counter if overwriting a learned mac */ + result.raw = kv.value; + if ((result.fields.age_not == 0) && (lm->global_learn_count)) + lm->global_learn_count--; + } + + /* set up result */ + result.raw = 0; /* clear all fields */ + result.fields.sw_if_index = sw_if_index; + result.fields.static_mac = static_mac; + result.fields.filter = filter_mac; + result.fields.bvi = bvi_mac; + result.fields.age_not = 1; /* no aging for provisioned entry */ + + kv.key = key.raw; + kv.value = result.raw; + + BV (clib_bihash_add_del) (&fm->mac_table, &kv, 1 /* is_add */ ); +} + +/** + * Add an entry to the L2FIB. + * The CLI format is: + * l2fib add <mac> <bd> <intf> [static] [bvi] + * l2fib add <mac> <bd> filter + * Note that filter and bvi entries are always static + */ +static clib_error_t * +l2fib_add (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u64 mac; + u32 bd_id; + u32 bd_index; + u32 sw_if_index = ~0; + u32 filter_mac = 0; + u32 static_mac = 0; + u32 bvi_mac = 0; + uword *p; + + if (!unformat_user (input, unformat_ethernet_address, &mac)) + { + error = clib_error_return (0, "expected mac address `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) + { + error = clib_error_return (0, "bridge domain ID %d invalid", bd_id); + goto done; + } + bd_index = p[0]; + + if (unformat (input, "filter")) + { + filter_mac = 1; + static_mac = 1; + + } + else + { + + if (!unformat_user + (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + if (unformat (input, "static")) + { + static_mac = 1; + } + else if (unformat (input, "bvi")) + { + bvi_mac = 1; + static_mac = 1; + } + } + + if (vec_len (l2input_main.configs) <= sw_if_index) + { + error = clib_error_return (0, "Interface sw_if_index %d not in L2 mode", + sw_if_index); + goto done; + } + + if (filter_mac) + l2fib_add_filter_entry (mac, bd_index); + else + l2fib_add_fwd_entry (mac, bd_index, sw_if_index, static_mac, bvi_mac); + +done: + return error; +} + +/*? + * This command adds a MAC Address entry to the L2 FIB table + * of an existing bridge-domain. The MAC Address can be static + * or dynamic. This command also allows a filter to be added, + * such that packets with given MAC Addresses (source mac or + * destination mac match) are dropped. + * + * @cliexpar + * Example of how to add a dynamic MAC Address entry to the L2 FIB table + * of a bridge-domain (where 200 is the bridge-domain-id): + * @cliexcmd{l2fib add 52:54:00:53:18:33 200 GigabitEthernet0/8/0.200} + * Example of how to add a static MAC Address entry to the L2 FIB table + * of a bridge-domain (where 200 is the bridge-domain-id): + * @cliexcmd{l2fib add 52:54:00:53:18:55 200 GigabitEthernet0/8/0.200 static} + * Example of how to add a filter such that a packet with the given MAC + * Address will be dropped in a given bridge-domain (where 200 is the + * bridge-domain-id): + * @cliexcmd{l2fib add 52:54:00:53:18:77 200 filter} + * Example of show command of the provisioned MAC Addresses and filters: + * @cliexstart{show l2fib verbose} + * Mac Address BD Idx Interface Index static filter bvi refresh timestamp + * 52:54:00:53:18:33 1 GigabitEthernet0/8/0.200 3 0 0 0 0 0 + * 52:54:00:53:18:55 1 GigabitEthernet0/8/0.200 3 1 0 0 0 0 + * 52:54:00:53:18:77 1 N/A -1 1 1 0 0 0 + * 3 l2fib entries + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_add_cli, static) = { + .path = "l2fib add", + .short_help = "l2fib add <mac> <bridge-domain-id> filter | <intf> [static | bvi]", + .function = l2fib_add, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +l2fib_test_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + u64 mac, save_mac; + u32 bd_index = 0; + u32 sw_if_index = 8; + u32 bvi_mac = 0; + u32 is_add = 0; + u32 is_del = 0; + u32 is_check = 0; + u32 count = 1; + int mac_set = 0; + int i; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mac %U", unformat_ethernet_address, &mac)) + mac_set = 1; + else if (unformat (input, "add")) + is_add = 1; + else if (unformat (input, "del")) + is_del = 1; + else if (unformat (input, "check")) + is_check = 1; + else if (unformat (input, "count %d", &count)) + ; + else + break; + } + + if (mac_set == 0) + return clib_error_return (0, "mac not set"); + + if (is_add == 0 && is_del == 0 && is_check == 0) + return clib_error_return (0, + "noop: pick at least one of (add,del,check)"); + + save_mac = mac; + + if (is_add) + { + for (i = 0; i < count; i++) + { + u64 tmp; + l2fib_add_fwd_entry (mac, bd_index, sw_if_index, mac, bvi_mac); + tmp = clib_net_to_host_u64 (mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + if (is_check) + { + BVT (clib_bihash_kv) kv; + l2fib_main_t *mp = &l2fib_main; + + mac = save_mac; + + for (i = 0; i < count; i++) + { + u64 tmp; + kv.key = l2fib_make_key ((u8 *) & mac, bd_index); + if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv)) + { + clib_warning ("key %U AWOL", format_ethernet_address, &mac); + break; + } + tmp = clib_net_to_host_u64 (mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + if (is_del) + { + for (i = 0; i < count; i++) + { + u64 tmp; + + l2fib_del_entry (mac, bd_index); + + tmp = clib_net_to_host_u64 (mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + return error; +} + +/*? + * The set of '<em>test l2fib</em>' commands allow the L2 FIB table of the default + * bridge domain (bridge-domain-id of 0) to be modified. + * + * @cliexpar + * @parblock + * Example of how to add a set of 4 sequential MAC Address entries to L2 + * FIB table of the default bridge-domain: + * @cliexcmd{test l2fib add mac 52:54:00:53:00:00 count 4} + * + * Show the set of 4 sequential MAC Address entries that were added: + * @cliexstart{show l2fib verbose} + * Mac Address BD Idx Interface Index static filter bvi refresh timestamp + * 52:54:00:53:00:00 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0 + * 52:54:00:53:00:01 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0 + * 52:54:00:53:00:03 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0 + * 52:54:00:53:00:02 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0 + * 4 l2fib entries + * @cliexend + * + * Example of how to check that the set of 4 sequential MAC Address + * entries were added to L2 FIB table of the default + * bridge-domain. Used a count of 5 to produce an error: + * + * @cliexcmd{test l2fib check mac 52:54:00:53:00:00 count 5} + * The output of the check command is in the log files. Log file + * location may vary based on your OS and Version: + * + * <b><em># tail -f /var/log/messages | grep l2fib_test_command_fn</em></b> + * + * Sep 7 17:15:24 localhost vnet[4952]: l2fib_test_command_fn:446: key 52:54:00:53:00:04 AWOL + * + * Example of how to delete a set of 4 sequential MAC Address entries + * from L2 FIB table of the default bridge-domain: + * @cliexcmd{test l2fib del mac 52:54:00:53:00:00 count 4} + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_test_command, static) = { + .path = "test l2fib", + .short_help = "test l2fib [add|del|check] mac <base-addr> count <nn>", + .function = l2fib_test_command_fn, +}; +/* *INDENT-ON* */ + + +/** + * Delete an entry from the l2fib. + * Return 0 if the entry was deleted, or 1 if it was not found + */ +static u32 +l2fib_del_entry_by_key (u64 raw_key) +{ + + l2fib_entry_result_t result; + l2fib_main_t *mp = &l2fib_main; + BVT (clib_bihash_kv) kv; + + /* set up key */ + kv.key = raw_key; + + if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv)) + return 1; + + result.raw = kv.value; + + /* decrement counter if dynamically learned mac */ + if ((result.fields.age_not == 0) && (l2learn_main.global_learn_count)) + l2learn_main.global_learn_count--; + + /* Remove entry from hash table */ + BV (clib_bihash_add_del) (&mp->mac_table, &kv, 0 /* is_add */ ); + return 0; +} + +/** + * Delete an entry from the l2fib. + * Return 0 if the entry was deleted, or 1 if it was not found + */ +u32 +l2fib_del_entry (u64 mac, u32 bd_index) +{ + return l2fib_del_entry_by_key (l2fib_make_key ((u8 *) & mac, bd_index)); +} + +/** + * Delete an entry from the L2FIB. + * The CLI format is: + * l2fib del <mac> <bd-id> + */ +static clib_error_t * +l2fib_del (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u64 mac; + u32 bd_id; + u32 bd_index; + uword *p; + + if (!unformat_user (input, unformat_ethernet_address, &mac)) + { + error = clib_error_return (0, "expected mac address `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) + { + error = clib_error_return (0, "bridge domain ID %d invalid", bd_id); + goto done; + } + bd_index = p[0]; + + /* Delete the entry */ + if (l2fib_del_entry (mac, bd_index)) + { + error = clib_error_return (0, "mac entry not found"); + goto done; + } + +done: + return error; +} + +/*? + * This command deletes an existing MAC Address entry from the L2 FIB + * table of an existing bridge-domain. + * + * @cliexpar + * Example of how to delete a MAC Address entry from the L2 FIB table of a bridge-domain (where 200 is the bridge-domain-id): + * @cliexcmd{l2fib del 52:54:00:53:18:33 200} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_del_cli, static) = { + .path = "l2fib del", + .short_help = "l2fib del <mac> <bridge-domain-id>", + .function = l2fib_del, +}; +/* *INDENT-ON* */ + +/** + Kick off ager to scan MACs to age/delete MAC entries +*/ +void +l2fib_start_ager_scan (vlib_main_t * vm) +{ + uword evt = L2_MAC_AGE_PROCESS_EVENT_ONE_PASS; + + /* check if there is at least one bd with mac aging enabled */ + l2_bridge_domain_t *bd_config; + vec_foreach (bd_config, l2input_main.bd_configs) + { + if (bd_config->bd_id != ~0 && bd_config->mac_age != 0) + { + evt = L2_MAC_AGE_PROCESS_EVENT_START; + break; + } + } + + vlib_process_signal_event (vm, l2fib_mac_age_scanner_process_node.index, + evt, 0); +} + +/** + Flush all non static MACs from an interface +*/ +void +l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index) +{ + *l2fib_swif_seq_num (sw_if_index) += 1; + l2fib_start_ager_scan (vm); +} + +/** + Flush all non static MACs in a bridge domain +*/ +void +l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index) +{ + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); + bd_config->seq_num += 1; + l2fib_start_ager_scan (vm); +} + +/** + Flush all non static MACs - flushes all valid BDs +*/ +void +l2fib_flush_all_mac (vlib_main_t * vm) +{ + l2_bridge_domain_t *bd_config; + vec_foreach (bd_config, l2input_main.bd_configs) + if (bd_is_valid (bd_config)) + bd_config->seq_num += 1; + + l2fib_start_ager_scan (vm); +} + + +/** + Flush MACs, except static ones, associated with an interface + The CLI format is: + l2fib flush-mac interface <if-name> +*/ +static clib_error_t * +l2fib_flush_mac_int (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + l2fib_flush_int_mac (vm, sw_if_index); + +done: + return error; +} + +/** + Flush all MACs, except static ones + The CLI format is: + l2fib flush-mac all +*/ +static clib_error_t * +l2fib_flush_mac_all (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2fib_flush_all_mac (vm); + return 0; +} + +/*? + * This command kick off ager to delete all existing MAC Address entries, + * except static ones, associated with an interface from the L2 FIB table. + * + * @cliexpar + * Example of how to flush MAC Address entries learned on an interface from the L2 FIB table: + * @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = { + .path = "l2fib flush-mac all", + .short_help = "l2fib flush-mac all", + .function = l2fib_flush_mac_all, +}; +/* *INDENT-ON* */ + +/*? + * This command kick off ager to delete all existing MAC Address entries, + * except static ones, associated with an interface from the L2 FIB table. + * + * @cliexpar + * Example of how to flush MAC Address entries learned on an interface from the L2 FIB table: + * @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_flush_mac_int_cli, static) = { + .path = "l2fib flush-mac interface", + .short_help = "l2fib flush-mac interface <if-name>", + .function = l2fib_flush_mac_int, +}; +/* *INDENT-ON* */ + +/** + Flush bridge-domain MACs except static ones. + The CLI format is: + l2fib flush-mac bridge-domain <bd-id> +*/ +static clib_error_t * +l2fib_flush_mac_bd (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index, bd_id; + uword *p; + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + bd_index = *p; + else + return clib_error_return (0, "No such bridge domain %d", bd_id); + + l2fib_flush_bd_mac (vm, bd_index); + +done: + return error; +} + +/*? + * This command kick off ager to delete all existing MAC Address entries, + * except static ones, in a bridge domain from the L2 FIB table. + * + * @cliexpar + * Example of how to flush MAC Address entries learned in a bridge domain from the L2 FIB table: + * @cliexcmd{l2fib flush-mac bridge-domain 1000} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_flush_mac_bd_cli, static) = { + .path = "l2fib flush-mac bridge-domain", + .short_help = "l2fib flush-mac bridge-domain <bd-id>", + .function = l2fib_flush_mac_bd, +}; +/* *INDENT-ON* */ + +clib_error_t * +l2fib_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +{ + l2_input_config_t *config = l2input_intf_config (sw_if_index); + if ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0 && config->bridge) + l2fib_flush_int_mac (vnm->vlib_main, sw_if_index); + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (l2fib_sw_interface_up_down); + +BVT (clib_bihash) * get_mac_table (void) +{ + l2fib_main_t *mp = &l2fib_main; + return &mp->mac_table; +} + +static_always_inline void * +allocate_mac_evt_buf (u32 client, u32 client_index) +{ + l2fib_main_t *fm = &l2fib_main; + vl_api_l2_macs_event_t *mp = vl_msg_api_alloc + (sizeof (*mp) + (fm->max_macs_in_event * sizeof (vl_api_mac_entry_t))); + mp->_vl_msg_id = htons (VL_API_L2_MACS_EVENT); + mp->pid = htonl (client); + mp->client_index = client_index; + return mp; +} + +static_always_inline f64 +l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) +{ + l2fib_main_t *fm = &l2fib_main; + l2learn_main_t *lm = &l2learn_main; + + BVT (clib_bihash) * h = &fm->mac_table; + int i, j, k; + f64 last_start = start_time; + f64 accum_t = 0; + f64 delta_t = 0; + u32 evt_idx = 0; + u32 learn_count = 0; + u32 client = lm->client_pid; + u32 cl_idx = lm->client_index; + vl_api_l2_macs_event_t *mp = 0; + unix_shared_memory_queue_t *q = 0; + + if (client) + { + mp = allocate_mac_evt_buf (client, cl_idx); + q = vl_api_client_index_to_input_queue (lm->client_index); + } + + for (i = 0; i < h->nbuckets; i++) + { + /* allow no more than 20us without a pause */ + delta_t = vlib_time_now (vm) - last_start; + if (delta_t > 20e-6) + { + vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */ + last_start = vlib_time_now (vm); + accum_t += delta_t; + } + + if (i < (h->nbuckets - 3)) + { + BVT (clib_bihash_bucket) * b = &h->buckets[i + 3]; + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); + b = &h->buckets[i + 1]; + if (b->offset) + { + BVT (clib_bihash_value) * v = + BV (clib_bihash_get_value) (h, b->offset); + CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD); + } + } + + BVT (clib_bihash_bucket) * b = &h->buckets[i]; + if (b->offset == 0) + continue; + BVT (clib_bihash_value) * v = BV (clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1 << b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + l2fib_entry_key_t key = {.raw = v->kvp[k].key }; + l2fib_entry_result_t result = {.raw = v->kvp[k].value }; + + if (result.fields.age_not == 0) + learn_count++; + + if (client) + { + if (PREDICT_FALSE (evt_idx >= fm->max_macs_in_event)) + { + /* event message full, send it and start a new one */ + if (q && (q->cursize < q->maxsize)) + { + mp->n_macs = htonl (evt_idx); + vl_msg_api_send_shmem (q, (u8 *) & mp); + mp = allocate_mac_evt_buf (client, cl_idx); + } + else + { + clib_warning ("MAC event to pid %d queue stuffed!" + " %d MAC entries lost", client, + evt_idx); + } + evt_idx = 0; + } + + if (result.fields.lrn_evt) + { + /* copy mac entry to event msg */ + clib_memcpy (mp->mac[evt_idx].mac_addr, key.fields.mac, + 6); + mp->mac[evt_idx].is_del = 0; + mp->mac[evt_idx].sw_if_index = + htonl (result.fields.sw_if_index); + /* clear event bit and update mac entry */ + result.fields.lrn_evt = 0; + BVT (clib_bihash_kv) kv; + kv.key = key.raw; + kv.value = result.raw; + BV (clib_bihash_add_del) (&fm->mac_table, &kv, 1); + evt_idx++; + continue; /* skip aging */ + } + } + + if (event_only || result.fields.age_not) + continue; /* skip aging - static_mac alsways age_not */ + + /* start aging processing */ + u32 bd_index = key.fields.bd_index; + u32 sw_if_index = result.fields.sw_if_index; + u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index).as_u16; + if (result.fields.sn.as_u16 != sn) + goto age_out; /* stale mac */ + + l2_bridge_domain_t *bd_config = + vec_elt_at_index (l2input_main.bd_configs, bd_index); + + if (bd_config->mac_age == 0) + continue; /* skip aging */ + + i16 delta = (u8) (start_time / 60) - result.fields.timestamp; + delta += delta < 0 ? 256 : 0; + + if (delta < bd_config->mac_age) + continue; /* still valid */ + + age_out: + if (client) + { + /* copy mac entry to event msg */ + clib_memcpy (mp->mac[evt_idx].mac_addr, key.fields.mac, 6); + mp->mac[evt_idx].is_del = 1; + mp->mac[evt_idx].sw_if_index = + htonl (result.fields.sw_if_index); + evt_idx++; + } + /* delete mac entry */ + BVT (clib_bihash_kv) kv; + kv.key = key.raw; + BV (clib_bihash_add_del) (&fm->mac_table, &kv, 0); + learn_count--; + } + v++; + } + } + + /* keep learn count consistent */ + l2learn_main.global_learn_count = learn_count; + + if (mp) + { + /* send any outstanding mac event message else free message buffer */ + if (evt_idx) + { + if (q && (q->cursize < q->maxsize)) + { + mp->n_macs = htonl (evt_idx); + vl_msg_api_send_shmem (q, (u8 *) & mp); + } + else + { + clib_warning ("MAC event to pid %d queue stuffed!" + " %d MAC entries lost", client, evt_idx); + vl_msg_api_free (mp); + } + } + else + vl_msg_api_free (mp); + } + return delta_t + accum_t; +} + +/* Maximum f64 value */ +#define TIME_MAX (1.7976931348623157e+308) + +static uword +l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + uword event_type, *event_data = 0; + l2fib_main_t *fm = &l2fib_main; + l2learn_main_t *lm = &l2learn_main; + bool enabled = 0; + f64 start_time, next_age_scan_time = TIME_MAX; + + while (1) + { + if (lm->client_pid) + vlib_process_wait_for_event_or_clock (vm, fm->event_scan_delay); + else if (enabled) + { + f64 t = next_age_scan_time - vlib_time_now (vm); + vlib_process_wait_for_event_or_clock (vm, t); + } + else + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + start_time = vlib_time_now (vm); + enum + { SCAN_MAC_AGE, SCAN_MAC_EVENT, SCAN_DISABLE } scan = SCAN_MAC_AGE; + + switch (event_type) + { + case ~0: /* timer expired */ + if (lm->client_pid != 0 && start_time < next_age_scan_time) + scan = SCAN_MAC_EVENT; + break; + + case L2_MAC_AGE_PROCESS_EVENT_START: + enabled = 1; + break; + + case L2_MAC_AGE_PROCESS_EVENT_STOP: + enabled = 0; + scan = SCAN_DISABLE; + break; + + case L2_MAC_AGE_PROCESS_EVENT_ONE_PASS: + break; + + default: + ASSERT (0); + } + + if (scan == SCAN_MAC_EVENT) + l2fib_main.evt_scan_duration = l2fib_scan (vm, start_time, 1); + else + { + if (scan == SCAN_MAC_AGE) + l2fib_main.age_scan_duration = l2fib_scan (vm, start_time, 0); + if (scan == SCAN_DISABLE) + { + l2fib_main.age_scan_duration = 0; + l2fib_main.evt_scan_duration = 0; + } + /* schedule next scan */ + if (enabled) + next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; + else + next_age_scan_time = TIME_MAX; + } + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2fib_mac_age_scanner_process_node) = { + .function = l2fib_mac_age_scanner_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "l2fib-mac-age-scanner-process", +}; +/* *INDENT-ON* */ + +clib_error_t * +l2fib_init (vlib_main_t * vm) +{ + l2fib_main_t *mp = &l2fib_main; + l2fib_entry_key_t test_key; + u8 test_mac[6]; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Create the hash table */ + BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); + + /* verify the key constructor is good, since it is endian-sensitive */ + memset (test_mac, 0, sizeof (test_mac)); + test_mac[0] = 0x11; + test_key.raw = 0; + test_key.raw = l2fib_make_key ((u8 *) & test_mac, 0x1234); + ASSERT (test_key.fields.mac[0] == 0x11); + ASSERT (test_key.fields.bd_index == 0x1234); + + return 0; +} + +VLIB_INIT_FUNCTION (l2fib_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h new file mode 100644 index 00000000..7cc2dc5e --- /dev/null +++ b/src/vnet/l2/l2_fib.h @@ -0,0 +1,432 @@ +/* + * l2_fib.h : layer 2 forwarding table (aka mac table) + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2fib_h +#define included_l2fib_h + +#include <vlib/vlib.h> +#include <vppinfra/bihash_8_8.h> + +/* + * The size of the hash table + */ +#define L2FIB_NUM_BUCKETS (64 * 1024) +#define L2FIB_MEMORY_SIZE (512<<20) + +/* Ager scan interval is 1 minute for aging */ +#define L2FIB_AGE_SCAN_INTERVAL (60.0) + +/* MAC event scan delay is 100 msec unless specified by MAC event client */ +#define L2FIB_EVENT_SCAN_DELAY_DEFAULT (0.1) + +/* Max MACs in a event message is 100 unless specified by MAC event client */ +#define L2FIB_EVENT_MAX_MACS_DEFAULT (100) + +/* MAC event learn limit is 1000 unless specified by MAC event client */ +#define L2FIB_EVENT_LEARN_LIMIT_DEFAULT (1000) + +typedef struct +{ + + /* hash table */ + BVT (clib_bihash) mac_table; + + /* per swif vector of sequence number for interface based flush of MACs */ + u8 *swif_seq_num; + + /* last event or ager scan duration */ + f64 evt_scan_duration; + f64 age_scan_duration; + + /* delay between event scans, default to 100 msec */ + f64 event_scan_delay; + + /* max macs in evet message, default to 100 entries */ + u32 max_macs_in_event; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2fib_main_t; + +extern l2fib_main_t l2fib_main; + +/* + * The L2fib key is the mac address and bridge domain ID + */ +typedef struct +{ + union + { + struct + { + u16 bd_index; + u8 mac[6]; + } fields; + struct + { + u32 w0; + u32 w1; + } words; + u64 raw; + }; +} l2fib_entry_key_t; + +STATIC_ASSERT_SIZEOF (l2fib_entry_key_t, 8); + + +typedef struct +{ + union + { + struct + { + u8 swif; + u8 bd; + }; + u16 as_u16; + }; +} l2fib_seq_num_t; + +/* + * The l2fib entry results + */ +typedef struct +{ + union + { + struct + { + u32 sw_if_index; /* output sw_if_index (L3 intf if bvi==1) */ + + u8 static_mac:1; /* static mac, no MAC move */ + u8 age_not:1; /* not subject to age */ + u8 bvi:1; /* mac is for a bridged virtual interface */ + u8 filter:1; /* drop packets to/from this mac */ + u8 lrn_evt:1; /* MAC learned to be sent in L2 MAC event */ + u8 unused:3; + + u8 timestamp; /* timestamp for aging */ + l2fib_seq_num_t sn; /* bd/int seq num */ + } fields; + u64 raw; + }; +} l2fib_entry_result_t; + +STATIC_ASSERT_SIZEOF (l2fib_entry_result_t, 8); + +/** + * Compute the hash for the given key and return + * the corresponding bucket index + */ +always_inline u32 +l2fib_compute_hash_bucket (l2fib_entry_key_t * key) +{ + u32 result; + u32 temp_a; + u32 temp_b; + + result = 0xa5a5a5a5; /* some seed */ + temp_a = key->words.w0; + temp_b = key->words.w1; + hash_mix32 (temp_a, temp_b, result); + + return result % L2FIB_NUM_BUCKETS; +} + +always_inline u64 +l2fib_make_key (u8 * mac_address, u16 bd_index) +{ + u64 temp; + + /* + * The mac address in memory is A:B:C:D:E:F + * The bd id in register is H:L + */ +#if CLIB_ARCH_IS_LITTLE_ENDIAN + /* + * Create the in-register key as F:E:D:C:B:A:H:L + * In memory the key is L:H:A:B:C:D:E:F + */ + temp = *((u64 *) (mac_address)) << 16; + temp = (temp & ~0xffff) | (u64) (bd_index); +#else + /* + * Create the in-register key as H:L:A:B:C:D:E:F + * In memory the key is H:L:A:B:C:D:E:F + */ + temp = *((u64 *) (mac_address)) >> 16; + temp = temp | (((u64) bd_index) << 48); +#endif + + return temp; +} + + + +/** + * Lookup the entry for mac and bd_index in the mac table for 1 packet. + * Cached_key and cached_result are used as a one-entry cache. + * The function reads and updates them as needed. + * + * mac0 and bd_index0 are the keys. The entry is written to result0. + * If the entry was not found, result0 is set to ~0. + * + * key0 and bucket0 return with the computed key and hash bucket, + * convenient if the entry needs to be updated afterward. + * If the cached_result was used, bucket0 is set to ~0. + */ + +static_always_inline void +l2fib_lookup_1 (BVT (clib_bihash) * mac_table, + l2fib_entry_key_t * cached_key, + l2fib_entry_result_t * cached_result, + u8 * mac0, + u16 bd_index0, + l2fib_entry_key_t * key0, + u32 * bucket0, l2fib_entry_result_t * result0) +{ + /* set up key */ + key0->raw = l2fib_make_key (mac0, bd_index0); + *bucket0 = ~0; + + if (key0->raw == cached_key->raw) + { + /* Hit in the one-entry cache */ + result0->raw = cached_result->raw; + } + else + { + /* Do a regular mac table lookup */ + BVT (clib_bihash_kv) kv; + + kv.key = key0->raw; + kv.value = ~0ULL; + BV (clib_bihash_search_inline) (mac_table, &kv); + result0->raw = kv.value; + + /* Update one-entry cache */ + cached_key->raw = key0->raw; + cached_result->raw = result0->raw; + } +} + + +/** + * Lookup the entry for mac and bd_index in the mac table for 2 packets. + * The lookups for the two packets are interleaved. + * + * Cached_key and cached_result are used as a one-entry cache. + * The function reads and updates them as needed. + * + * mac0 and bd_index0 are the keys. The entry is written to result0. + * If the entry was not found, result0 is set to ~0. The same + * holds for mac1/bd_index1/result1. + */ +static_always_inline void +l2fib_lookup_2 (BVT (clib_bihash) * mac_table, + l2fib_entry_key_t * cached_key, + l2fib_entry_result_t * cached_result, + u8 * mac0, + u8 * mac1, + u16 bd_index0, + u16 bd_index1, + l2fib_entry_key_t * key0, + l2fib_entry_key_t * key1, + u32 * bucket0, + u32 * bucket1, + l2fib_entry_result_t * result0, + l2fib_entry_result_t * result1) +{ + /* set up key */ + key0->raw = l2fib_make_key (mac0, bd_index0); + key1->raw = l2fib_make_key (mac1, bd_index1); + + if ((key0->raw == cached_key->raw) && (key1->raw == cached_key->raw)) + { + /* Both hit in the one-entry cache */ + result0->raw = cached_result->raw; + result1->raw = cached_result->raw; + *bucket0 = ~0; + *bucket1 = ~0; + + } + else + { + BVT (clib_bihash_kv) kv0, kv1; + + /* + * Do a regular mac table lookup + * Interleave lookups for packet 0 and packet 1 + */ + kv0.key = key0->raw; + kv1.key = key1->raw; + kv0.value = ~0ULL; + kv1.value = ~0ULL; + + BV (clib_bihash_search_inline) (mac_table, &kv0); + BV (clib_bihash_search_inline) (mac_table, &kv1); + + result0->raw = kv0.value; + result1->raw = kv1.value; + + /* Update one-entry cache */ + cached_key->raw = key1->raw; + cached_result->raw = result1->raw; + } +} + +static_always_inline void +l2fib_lookup_4 (BVT (clib_bihash) * mac_table, + l2fib_entry_key_t * cached_key, + l2fib_entry_result_t * cached_result, + u8 * mac0, + u8 * mac1, + u8 * mac2, + u8 * mac3, + u16 bd_index0, + u16 bd_index1, + u16 bd_index2, + u16 bd_index3, + l2fib_entry_key_t * key0, + l2fib_entry_key_t * key1, + l2fib_entry_key_t * key2, + l2fib_entry_key_t * key3, + u32 * bucket0, + u32 * bucket1, + u32 * bucket2, + u32 * bucket3, + l2fib_entry_result_t * result0, + l2fib_entry_result_t * result1, + l2fib_entry_result_t * result2, + l2fib_entry_result_t * result3) +{ + /* set up key */ + key0->raw = l2fib_make_key (mac0, bd_index0); + key1->raw = l2fib_make_key (mac1, bd_index1); + key2->raw = l2fib_make_key (mac2, bd_index2); + key3->raw = l2fib_make_key (mac3, bd_index3); + + if ((key0->raw == cached_key->raw) && (key1->raw == cached_key->raw) && + (key2->raw == cached_key->raw) && (key3->raw == cached_key->raw)) + { + /* Both hit in the one-entry cache */ + result0->raw = cached_result->raw; + result1->raw = cached_result->raw; + result2->raw = cached_result->raw; + result3->raw = cached_result->raw; + *bucket0 = ~0; + *bucket1 = ~0; + *bucket2 = ~0; + *bucket3 = ~0; + + } + else + { + BVT (clib_bihash_kv) kv0, kv1, kv2, kv3; + + /* + * Do a regular mac table lookup + * Interleave lookups for packet 0 and packet 1 + */ + kv0.key = key0->raw; + kv1.key = key1->raw; + kv2.key = key2->raw; + kv3.key = key3->raw; + kv0.value = ~0ULL; + kv1.value = ~0ULL; + kv2.value = ~0ULL; + kv3.value = ~0ULL; + + BV (clib_bihash_search_inline) (mac_table, &kv0); + BV (clib_bihash_search_inline) (mac_table, &kv1); + BV (clib_bihash_search_inline) (mac_table, &kv2); + BV (clib_bihash_search_inline) (mac_table, &kv3); + + result0->raw = kv0.value; + result1->raw = kv1.value; + result2->raw = kv2.value; + result3->raw = kv3.value; + + /* Update one-entry cache */ + cached_key->raw = key1->raw; + cached_result->raw = result1->raw; + } +} + +void l2fib_clear_table (void); + +void +l2fib_add_entry (u64 mac, + u32 bd_index, + u32 sw_if_index, u8 static_mac, u8 drop_mac, u8 bvi_mac); + +static inline void +l2fib_add_fwd_entry (u64 mac, u32 bd_index, u32 sw_if_index, u8 static_mac, + u8 bvi_mac) +{ + l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, 0, bvi_mac); +} + +static inline void +l2fib_add_filter_entry (u64 mac, u32 bd_index) +{ + l2fib_add_entry (mac, bd_index, ~0, 1, 1, 0); +} + +u32 l2fib_del_entry (u64 mac, u32 bd_index); + +void l2fib_start_ager_scan (vlib_main_t * vm); + +void l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index); + +void l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index); + +void l2fib_flush_all_mac (vlib_main_t * vm); + +void +l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, + l2fib_entry_result_t ** l2fe_res); + +u8 *format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args); + +static_always_inline u8 * +l2fib_swif_seq_num (u32 sw_if_index) +{ + l2fib_main_t *mp = &l2fib_main; + return vec_elt_at_index (mp->swif_seq_num, sw_if_index); +} + +static_always_inline u8 * +l2fib_valid_swif_seq_num (u32 sw_if_index) +{ + l2fib_main_t *mp = &l2fib_main; + vec_validate (mp->swif_seq_num, sw_if_index); + return l2fib_swif_seq_num (sw_if_index); +} + +BVT (clib_bihash) * get_mac_table (void); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c new file mode 100644 index 00000000..ed9e5ac2 --- /dev/null +++ b/src/vnet/l2/l2_flood.c @@ -0,0 +1,568 @@ +/* + * l2_flood.c : layer 2 flooding + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_bvi.h> +#include <vnet/replication.h> +#include <vnet/l2/l2_fib.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> + + +/** + * @file + * @brief Ethernet Flooding. + * + * Flooding uses the packet replication infrastructure to send a copy of the + * packet to each member interface. Logically the replication infrastructure + * expects two graph nodes: a prep node that initiates replication and sends the + * packet to the first destination, and a recycle node that is passed the packet + * after it has been transmitted. + * + * To decrease the amount of code, l2 flooding implements both functions in + * the same graph node. This node can tell if is it being called as the "prep" + * or "recycle" using replication_is_recycled(). + */ + + +typedef struct +{ + + /* Next nodes for each feature */ + u32 feat_next_node_index[32]; + + /* next node index for the L3 input node of each ethertype */ + next_by_ethertype_t l3_next; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2flood_main_t; + +typedef struct +{ + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u16 bd_index; +} l2flood_trace_t; + + +/* packet trace format function */ +static u8 * +format_l2flood_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2flood_trace_t *t = va_arg (*args, l2flood_trace_t *); + + s = format (s, "l2-flood: sw_if_index %d dst %U src %U bd_index %d", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, t->bd_index); + return s; +} + +l2flood_main_t l2flood_main; + +static vlib_node_registration_t l2flood_node; + +#define foreach_l2flood_error \ +_(L2FLOOD, "L2 flood packets") \ +_(REPL_FAIL, "L2 replication failures") \ +_(NO_MEMBERS, "L2 replication complete") \ +_(BVI_BAD_MAC, "BVI L3 mac mismatch") \ +_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") + +typedef enum +{ +#define _(sym,str) L2FLOOD_ERROR_##sym, + foreach_l2flood_error +#undef _ + L2FLOOD_N_ERROR, +} l2flood_error_t; + +static char *l2flood_error_strings[] = { +#define _(sym,string) string, + foreach_l2flood_error +#undef _ +}; + +typedef enum +{ + L2FLOOD_NEXT_L2_OUTPUT, + L2FLOOD_NEXT_DROP, + L2FLOOD_N_NEXT, +} l2flood_next_t; + +/* + * Perform flooding on one packet + * + * Due to the way BVI processing can modify the packet, the BVI interface + * (if present) must be processed last in the replication. The member vector + * is arranged so that the BVI interface is always the first element. + * Flooding walks the vector in reverse. + * + * BVI processing causes the packet to go to L3 processing. This strips the + * L2 header, which is fine because the replication infrastructure restores + * it. However L3 processing can trigger larger changes to the packet. For + * example, an ARP request could be turned into an ARP reply, an ICMP request + * could be turned into an ICMP reply. If BVI processing is not performed + * last, the modified packet would be replicated to the remaining members. + */ + +static_always_inline void +l2flood_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + l2flood_main_t * msm, + u64 * counter_base, + vlib_buffer_t * b0, + u32 * sw_if_index0, + l2fib_entry_key_t * key0, + u32 * bucket0, l2fib_entry_result_t * result0, u32 * next0) +{ + u16 bd_index0; + l2_bridge_domain_t *bd_config; + l2_flood_member_t *members; + i32 current_member; /* signed */ + replication_context_t *ctx; + u8 in_shg = vnet_buffer (b0)->l2.shg; + + if (!replication_is_recycled (b0)) + { + + /* Do flood "prep node" processing */ + + /* Get config for the bridge domain interface */ + bd_index0 = vnet_buffer (b0)->l2.bd_index; + bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index0); + members = bd_config->members; + + /* Find first member that passes the reflection and SHG checks */ + current_member = bd_config->flood_count - 1; + while ((current_member >= 0) && + ((members[current_member].sw_if_index == *sw_if_index0) || + (in_shg && members[current_member].shg == in_shg))) + { + current_member--; + } + + if (current_member < 0) + { + /* No members to flood to */ + *next0 = L2FLOOD_NEXT_DROP; + b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS]; + return; + } + + if ((current_member > 0) && + ((current_member > 1) || + ((members[0].sw_if_index != *sw_if_index0) && + (!in_shg || members[0].shg != in_shg)))) + { + /* If more than one member then initiate replication */ + ctx = + replication_prep (vm, b0, l2flood_node.index, 1 /* l2_packet */ ); + ctx->feature_replicas = (uword) members; + ctx->feature_counter = current_member; + } + + } + else + { + vnet_buffer_opaque_t *vnet_buff_op; + + /* Do flood "recycle node" processing */ + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL)) + { + (void) replication_recycle (vm, b0, 1 /* is_last */ ); + *next0 = L2FLOOD_NEXT_DROP; + b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL]; + return; + } + + ctx = replication_get_ctx (b0); + replication_clear_recycled (b0); + + members = (l2_flood_member_t *) (intptr_t) ctx->feature_replicas; + current_member = (i32) ctx->feature_counter - 1; + + /* Need to update input index from saved packet context */ + vnet_buff_op = (vnet_buffer_opaque_t *) ctx->vnet_buffer; + *sw_if_index0 = vnet_buff_op->sw_if_index[VLIB_RX]; + + /* Find next member that passes the reflection and SHG check */ + while ((current_member >= 0) && + ((members[current_member].sw_if_index == *sw_if_index0) || + (in_shg && members[current_member].shg == in_shg))) + { + current_member--; + } + + if (current_member < 0) + { + /* + * No more members to flood to. + * Terminate replication and drop packet. + */ + + replication_recycle (vm, b0, 1 /* is_last */ ); + + *next0 = L2FLOOD_NEXT_DROP; + /* Ideally we woudn't bump a counter here, just silently complete */ + b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS]; + return; + } + + /* Restore packet and context and continue replication */ + ctx->feature_counter = current_member; + replication_recycle (vm, b0, ((current_member == 0) || /*is_last */ + ((current_member == 1) && + ((members[0].sw_if_index == + *sw_if_index0) || (in_shg + && members[0].shg == + in_shg))))); + } + + /* Forward packet to the current member */ + if (PREDICT_FALSE (members[current_member].flags & L2_FLOOD_MEMBER_BVI)) + { + /* Do BVI processing */ + u32 rc; + rc = l2_to_bvi (vm, + msm->vnet_main, + b0, + members[current_member].sw_if_index, + &msm->l3_next, next0); + + if (PREDICT_FALSE (rc)) + { + if (rc == TO_BVI_ERR_BAD_MAC) + { + b0->error = node->errors[L2FLOOD_ERROR_BVI_BAD_MAC]; + *next0 = L2FLOOD_NEXT_DROP; + } + else if (rc == TO_BVI_ERR_ETHERTYPE) + { + b0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE]; + *next0 = L2FLOOD_NEXT_DROP; + } + } + } + else + { + /* Do normal L2 forwarding */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + members[current_member].sw_if_index; + *next0 = L2FLOOD_NEXT_L2_OUTPUT; + + } + +} + + +static uword +l2flood_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2flood_next_t next_index; + l2flood_main_t *msm = &l2flood_main; + vlib_node_t *n = vlib_get_node (vm, l2flood_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + l2fib_entry_key_t key0, key1; + l2fib_entry_result_t result0, result1; + u32 bucket0, bucket1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3, *p4, *p5; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + /* Prefetch the buffer header for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + /* Prefetch the replication context for the N+1 loop iteration */ + /* This depends on the buffer header above */ + replication_prefetch_ctx (p2); + replication_prefetch_ctx (p3); + + /* Prefetch the packet for the N+1 loop iteration */ + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + /* process 2 pkts */ + em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 2; + + l2flood_process (vm, node, msm, + &em->counters[node_counter_base_index], b0, + &sw_if_index0, &key0, &bucket0, &result0, &next0); + + l2flood_process (vm, node, msm, + &em->counters[node_counter_base_index], b1, + &sw_if_index1, &key1, &bucket1, &result1, &next1); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + l2flood_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + ethernet_header_t *h0 = vlib_buffer_get_current (b0); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer (b0)->l2.bd_index; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + l2flood_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + ethernet_header_t *h1 = vlib_buffer_get_current (b1); + t->sw_if_index = sw_if_index1; + t->bd_index = vnet_buffer (b1)->l2.bd_index; + clib_memcpy (t->src, h1->src_address, 6); + clib_memcpy (t->dst, h1->dst_address, 6); + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + l2fib_entry_key_t key0; + l2fib_entry_result_t result0; + u32 bucket0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + /* process 1 pkt */ + em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 1; + + l2flood_process (vm, node, msm, + &em->counters[node_counter_base_index], b0, + &sw_if_index0, &key0, &bucket0, &result0, &next0); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + ethernet_header_t *h0 = vlib_buffer_get_current (b0); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer (b0)->l2.bd_index; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2flood_node,static) = { + .function = l2flood_node_fn, + .name = "l2-flood", + .vector_size = sizeof (u32), + .format_trace = format_l2flood_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2flood_error_strings), + .error_strings = l2flood_error_strings, + + .n_next_nodes = L2FLOOD_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2FLOOD_NEXT_L2_OUTPUT] = "l2-output", + [L2FLOOD_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2flood_node, l2flood_node_fn) + clib_error_t *l2flood_init (vlib_main_t * vm) +{ + l2flood_main_t *mp = &l2flood_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2flood_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2flood_init); + + + +/** Add the L3 input node for this ethertype to the next nodes structure. */ +void +l2flood_register_input_type (vlib_main_t * vm, + ethernet_type_t type, u32 node_index) +{ + l2flood_main_t *mp = &l2flood_main; + u32 next_index; + + next_index = vlib_node_add_next (vm, l2flood_node.index, node_index); + + next_by_ethertype_register (&mp->l3_next, type, next_index); +} + + +/** + * Set subinterface flood enable/disable. + * The CLI format is: + * set interface l2 flood <interface> [disable] + */ +static clib_error_t * +int_flood (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + u32 enable; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) + { + enable = 0; + } + + /* set the interface flag */ + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_FLOOD, enable); + +done: + return error; +} + +/*? + * Layer 2 flooding can be enabled and disabled on each + * interface and on each bridge-domain. Use this command to + * manage interfaces. It is enabled by default. + * + * @cliexpar + * Example of how to enable flooding: + * @cliexcmd{set interface l2 flood GigabitEthernet0/8/0} + * Example of how to disable flooding: + * @cliexcmd{set interface l2 flood GigabitEthernet0/8/0 disable} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_flood_cli, static) = { + .path = "set interface l2 flood", + .short_help = "set interface l2 flood <interface> [disable]", + .function = int_flood, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_flood.h b/src/vnet/l2/l2_flood.h new file mode 100644 index 00000000..acd7c905 --- /dev/null +++ b/src/vnet/l2/l2_flood.h @@ -0,0 +1,35 @@ +/* + * l2_flood.h : layer 2 flooding + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2flood_h +#define included_l2flood_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + +void +l2flood_register_input_type (vlib_main_t * vm, + ethernet_type_t type, u32 node_index); +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c new file mode 100644 index 00000000..2bb7307c --- /dev/null +++ b/src/vnet/l2/l2_fwd.c @@ -0,0 +1,577 @@ +/* + * l2_fwd.c : layer 2 forwarding using l2fib + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_bvi.h> +#include <vnet/l2/l2_fwd.h> +#include <vnet/l2/l2_fib.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/sparse_vec.h> + + +/** + * @file + * @brief Ethernet Forwarding. + * + * Code in this file handles forwarding Layer 2 packets. This file calls + * the FIB lookup, packet learning and the packet flooding as necessary. + * Packet is then sent to the next graph node. + */ + +typedef struct +{ + + /* Hash table */ + BVT (clib_bihash) * mac_table; + + /* next node index for the L3 input node of each ethertype */ + next_by_ethertype_t l3_next; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2fwd_main_t; + +typedef struct +{ + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u16 bd_index; +} l2fwd_trace_t; + +/* packet trace format function */ +static u8 * +format_l2fwd_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2fwd_trace_t *t = va_arg (*args, l2fwd_trace_t *); + + s = format (s, "l2-fwd: sw_if_index %d dst %U src %U bd_index %d", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, t->bd_index); + return s; +} + +l2fwd_main_t l2fwd_main; + +static vlib_node_registration_t l2fwd_node; + +#define foreach_l2fwd_error \ +_(L2FWD, "L2 forward packets") \ +_(FLOOD, "L2 forward misses") \ +_(HIT, "L2 forward hits") \ +_(BVI_BAD_MAC, "BVI L3 MAC mismatch") \ +_(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") \ +_(FILTER_DROP, "Filter Mac Drop") \ +_(REFLECT_DROP, "Reflection Drop") \ +_(STALE_DROP, "Stale entry Drop") + +typedef enum +{ +#define _(sym,str) L2FWD_ERROR_##sym, + foreach_l2fwd_error +#undef _ + L2FWD_N_ERROR, +} l2fwd_error_t; + +static char *l2fwd_error_strings[] = { +#define _(sym,string) string, + foreach_l2fwd_error +#undef _ +}; + +typedef enum +{ + L2FWD_NEXT_L2_OUTPUT, + L2FWD_NEXT_FLOOD, + L2FWD_NEXT_DROP, + L2FWD_N_NEXT, +} l2fwd_next_t; + +/** Forward one packet based on the mac table lookup result. */ + +static_always_inline void +l2fwd_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + l2fwd_main_t * msm, + vlib_error_main_t * em, + vlib_buffer_t * b0, + u32 sw_if_index0, l2fib_entry_result_t * result0, u32 * next0) +{ + int try_flood = result0->raw == ~0; + int flood_error; + + if (PREDICT_FALSE (try_flood)) + { + flood_error = L2FWD_ERROR_FLOOD; + } + else + { + /* lookup hit, forward packet */ +#ifdef COUNTERS + em->counters[node_counter_base_index + L2FWD_ERROR_HIT] += 1; +#endif + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = result0->fields.sw_if_index; + *next0 = L2FWD_NEXT_L2_OUTPUT; + int l2fib_seq_num_valid = 1; + + /* check l2fib seq num for stale entries */ + if (!result0->fields.age_not) + { + l2fib_seq_num_t in_sn = {.as_u16 = vnet_buffer (b0)->l2.l2fib_sn }; + l2fib_seq_num_t expected_sn = { + .bd = in_sn.bd, + .swif = *l2fib_swif_seq_num (result0->fields.sw_if_index), + }; + l2fib_seq_num_valid = + expected_sn.as_u16 == result0->fields.sn.as_u16; + } + + if (PREDICT_FALSE (!l2fib_seq_num_valid)) + { + flood_error = L2FWD_ERROR_STALE_DROP; + try_flood = 1; + } + /* perform reflection check */ + else if (PREDICT_FALSE (sw_if_index0 == result0->fields.sw_if_index)) + { + b0->error = node->errors[L2FWD_ERROR_REFLECT_DROP]; + *next0 = L2FWD_NEXT_DROP; + } + /* perform filter check */ + else if (PREDICT_FALSE (result0->fields.filter)) + { + b0->error = node->errors[L2FWD_ERROR_FILTER_DROP]; + *next0 = L2FWD_NEXT_DROP; + } + /* perform BVI check */ + else if (PREDICT_FALSE (result0->fields.bvi)) + { + u32 rc; + rc = l2_to_bvi (vm, + msm->vnet_main, + b0, + vnet_buffer (b0)->sw_if_index[VLIB_TX], + &msm->l3_next, next0); + + if (PREDICT_FALSE (rc)) + { + if (rc == TO_BVI_ERR_BAD_MAC) + { + b0->error = node->errors[L2FWD_ERROR_BVI_BAD_MAC]; + *next0 = L2FWD_NEXT_DROP; + } + else if (rc == TO_BVI_ERR_ETHERTYPE) + { + b0->error = node->errors[L2FWD_ERROR_BVI_ETHERTYPE]; + *next0 = L2FWD_NEXT_DROP; + } + } + } + } + + /* flood */ + if (PREDICT_FALSE (try_flood)) + { + /* + * lookup miss, so flood + * TODO:replicate packet to each intf in bridge-domain + * For now just drop + */ + if (vnet_buffer (b0)->l2.feature_bitmap & L2INPUT_FEAT_UU_FLOOD) + { + *next0 = L2FWD_NEXT_FLOOD; + } + else + { + /* Flooding is disabled */ + b0->error = node->errors[flood_error]; + *next0 = L2FWD_NEXT_DROP; + } + } + +} + + +static_always_inline uword +l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, int do_trace) +{ + u32 n_left_from, *from, *to_next; + l2fwd_next_t next_index; + l2fwd_main_t *msm = &l2fwd_main; + vlib_node_t *n = vlib_get_node (vm, l2fwd_node.index); + CLIB_UNUSED (u32 node_counter_base_index) = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + l2fib_entry_key_t cached_key; + l2fib_entry_result_t cached_result; + + /* Clear the one-entry cache in case mac table was updated */ + cached_key.raw = ~0; + cached_result.raw = ~0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; + ethernet_header_t *h0, *h1, *h2, *h3; + l2fib_entry_key_t key0, key1, key2, key3; + l2fib_entry_result_t result0, result1, result2, result3; + u32 bucket0, bucket1, bucket2, bucket3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + h2 = vlib_buffer_get_current (b2); + h3 = vlib_buffer_get_current (b3); + + if (do_trace) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer (b0)->l2.bd_index; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->bd_index = vnet_buffer (b1)->l2.bd_index; + clib_memcpy (t->src, h1->src_address, 6); + clib_memcpy (t->dst, h1->dst_address, 6); + } + if (b2->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b2, sizeof (*t)); + t->sw_if_index = sw_if_index2; + t->bd_index = vnet_buffer (b2)->l2.bd_index; + clib_memcpy (t->src, h2->src_address, 6); + clib_memcpy (t->dst, h2->dst_address, 6); + } + if (b3->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b3, sizeof (*t)); + t->sw_if_index = sw_if_index3; + t->bd_index = vnet_buffer (b3)->l2.bd_index; + clib_memcpy (t->src, h3->src_address, 6); + clib_memcpy (t->dst, h3->dst_address, 6); + } + } + + /* process 2 pkts */ +#ifdef COUNTERS + em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 4; +#endif + /* *INDENT-OFF* */ + l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result, + h0->dst_address, h1->dst_address, + h2->dst_address, h3->dst_address, + vnet_buffer (b0)->l2.bd_index, + vnet_buffer (b1)->l2.bd_index, + vnet_buffer (b2)->l2.bd_index, + vnet_buffer (b3)->l2.bd_index, + &key0, /* not used */ + &key1, /* not used */ + &key2, /* not used */ + &key3, /* not used */ + &bucket0, /* not used */ + &bucket1, /* not used */ + &bucket2, /* not used */ + &bucket3, /* not used */ + &result0, + &result1, + &result2, + &result3); + /* *INDENT-ON* */ + l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0, + &next0); + l2fwd_process (vm, node, msm, em, b1, sw_if_index1, &result1, + &next1); + l2fwd_process (vm, node, msm, em, b2, sw_if_index2, &result2, + &next2); + l2fwd_process (vm, node, msm, em, b3, sw_if_index3, &result3, + &next3); + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t *h0; + l2fib_entry_key_t key0; + l2fib_entry_result_t result0; + u32 bucket0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + + if (do_trace && PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + l2fwd_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer (b0)->l2.bd_index; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + + /* process 1 pkt */ +#ifdef COUNTERS + em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 1; +#endif + l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result, h0->dst_address, vnet_buffer (b0)->l2.bd_index, &key0, /* not used */ + &bucket0, /* not used */ + &result0); + l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0, + &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +l2fwd_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + return l2fwd_node_inline (vm, node, frame, 1 /* do_trace */ ); + return l2fwd_node_inline (vm, node, frame, 0 /* do_trace */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2fwd_node,static) = { + .function = l2fwd_node_fn, + .name = "l2-fwd", + .vector_size = sizeof (u32), + .format_trace = format_l2fwd_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2fwd_error_strings), + .error_strings = l2fwd_error_strings, + + .n_next_nodes = L2FWD_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2FWD_NEXT_L2_OUTPUT] = "l2-output", + [L2FWD_NEXT_FLOOD] = "l2-flood", + [L2FWD_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2fwd_node, l2fwd_node_fn) + clib_error_t *l2fwd_init (vlib_main_t * vm) +{ + l2fwd_main_t *mp = &l2fwd_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* init the hash table ptr */ + mp->mac_table = get_mac_table (); + + /* Initialize the next nodes for each ethertype */ + next_by_ethertype_init (&mp->l3_next); + + return 0; +} + +VLIB_INIT_FUNCTION (l2fwd_init); + + +/** Add the L3 input node for this ethertype to the next nodes structure. */ +void +l2fwd_register_input_type (vlib_main_t * vm, + ethernet_type_t type, u32 node_index) +{ + l2fwd_main_t *mp = &l2fwd_main; + u32 next_index; + + next_index = vlib_node_add_next (vm, l2fwd_node.index, node_index); + + next_by_ethertype_register (&mp->l3_next, type, next_index); +} + + +/** + * Set subinterface forward enable/disable. + * The CLI format is: + * set interface l2 forward <interface> [disable] + */ +static clib_error_t * +int_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + u32 enable; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) + { + enable = 0; + } + + /* set the interface flag */ + if (l2input_intf_config (sw_if_index)->xconnect) + { + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_XCONNECT, enable); + } + else + { + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_FWD, enable); + } + +done: + return error; +} + +/*? + * Layer 2 unicast forwarding can be enabled and disabled on each + * interface and on each bridge-domain. Use this command to + * manage interfaces. It is enabled by default. + * + * @cliexpar + * Example of how to enable fowarding: + * @cliexcmd{set interface l2 forward GigabitEthernet0/8/0} + * Example of how to disable fowarding: + * @cliexcmd{set interface l2 forward GigabitEthernet0/8/0 disable} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_fwd_cli, static) = { + .path = "set interface l2 forward", + .short_help = "set interface l2 forward <interface> [disable]", + .function = int_fwd, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_fwd.h b/src/vnet/l2/l2_fwd.h new file mode 100644 index 00000000..3968732d --- /dev/null +++ b/src/vnet/l2/l2_fwd.h @@ -0,0 +1,36 @@ +/* + * l2_fwd.c : layer 2 forwarding using l2fib + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2fwd_h +#define included_l2fwd_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + + +void +l2fwd_register_input_type (vlib_main_t * vm, + ethernet_type_t type, u32 node_index); +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c new file mode 100644 index 00000000..3933dae5 --- /dev/null +++ b/src/vnet/l2/l2_input.c @@ -0,0 +1,1187 @@ +/* + * l2_input.c : layer 2 input packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/fib/fib_node.h> +#include <vnet/ethernet/arp_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_bvi.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_bd.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +/** + * @file + * @brief Interface Input Mode (Layer 2 Cross-Connect or Bridge / Layer 3). + * + * This file contains the CLI Commands that modify the input mode of an + * interface. For interfaces in a Layer 2 cross-connect, all packets + * received on one interface will be transmitted to the other. For + * interfaces in a bridge-domain, packets will be forwarded to other + * interfaces in the same bridge-domain based on destination mac address. + * For interfaces in Layer 3 mode, the packets will be routed. + */ + +/* Feature graph node names */ +static char *l2input_feat_names[] = { +#define _(sym,name) name, + foreach_l2input_feat +#undef _ +}; + +char ** +l2input_get_feat_names (void) +{ + return l2input_feat_names; +} + +u8 * +format_l2_input_features (u8 * s, va_list * args) +{ + static char *display_names[] = { +#define _(sym,name) #sym, + foreach_l2input_feat +#undef _ + }; + u32 feature_bitmap = va_arg (*args, u32); + + if (feature_bitmap == 0) + { + s = format (s, " none configured"); + return s; + } + + feature_bitmap &= ~L2INPUT_FEAT_DROP; /* Not a feature */ + int i; + for (i = L2INPUT_N_FEAT; i >= 0; i--) + if (feature_bitmap & (1 << i)) + s = format (s, "%10s (%s)\n", display_names[i], l2input_feat_names[i]); + return s; +} + +typedef struct +{ + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 next_index; + u32 sw_if_index; +} l2input_trace_t; + +/* packet trace format function */ +static u8 * +format_l2input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2input_trace_t *t = va_arg (*args, l2input_trace_t *); + + s = format (s, "l2-input: sw_if_index %d dst %U src %U", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src); + return s; +} + +l2input_main_t l2input_main; + +#define foreach_l2input_error \ +_(L2INPUT, "L2 input packets") \ +_(DROP, "L2 input drops") + +typedef enum +{ +#define _(sym,str) L2INPUT_ERROR_##sym, + foreach_l2input_error +#undef _ + L2INPUT_N_ERROR, +} l2input_error_t; + +static char *l2input_error_strings[] = { +#define _(sym,string) string, + foreach_l2input_error +#undef _ +}; + +typedef enum +{ /* */ + L2INPUT_NEXT_LEARN, + L2INPUT_NEXT_FWD, + L2INPUT_NEXT_DROP, + L2INPUT_N_NEXT, +} l2input_next_t; + + +static_always_inline void +classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u32 * next0) +{ + /* + * Load L2 input feature struct + * Load bridge domain struct + * Parse ethernet header to determine unicast/mcast/broadcast + * take L2 input stat + * classify packet as IP/UDP/TCP, control, other + * mask feature bitmap + * go to first node in bitmap + * Later: optimize VTM + * + * For L2XC, + * set tx sw-if-handle + */ + + u16 ethertype; + u8 protocol; + l2_input_config_t *config; + l2_bridge_domain_t *bd_config; + u16 bd_index0; + u32 feature_bitmap; + u32 feat_mask; + ethernet_header_t *h0; + u8 *l3h0; + u32 sw_if_index0; + +#define get_u16(addr) ( *((u16 *)(addr)) ) + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + l3h0 = (u8 *) h0 + vnet_buffer (b0)->l2.l2_len; + + ethertype = clib_net_to_host_u16 (get_u16 (l3h0 - 2)); + feat_mask = ~0; + + /* Get config for the input interface */ + config = vec_elt_at_index (msm->configs, sw_if_index0); + + /* Save split horizon group */ + vnet_buffer (b0)->l2.shg = config->shg; + + /* determine layer2 kind for stat and mask */ + if (PREDICT_FALSE (ethernet_address_cast (h0->dst_address))) + { + protocol = ((ip6_header_t *) l3h0)->protocol; + + /* Disable bridge forwarding (flooding will execute instead if not xconnect) */ + feat_mask &= ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD); + + /* Disable ARP-term for non-ARP and non-ICMP6 packet */ + if (ethertype != ETHERNET_TYPE_ARP && + (ethertype != ETHERNET_TYPE_IP6 || protocol != IP_PROTOCOL_ICMP6)) + feat_mask &= ~(L2INPUT_FEAT_ARP_TERM); + + /* + * For packet from BVI - set SHG of ARP request or ICMPv6 neighbor + * solicitation packet from BVI to 0 so it can also flood to VXLAN + * tunnels or other ports with the same SHG as that of the BVI. + */ + else if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] == + L2INPUT_BVI)) + { + if (ethertype == ETHERNET_TYPE_ARP) + { + ethernet_arp_header_t *arp0 = (ethernet_arp_header_t *) l3h0; + if (arp0->opcode == + clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request)) + vnet_buffer (b0)->l2.shg = 0; + } + else /* must be ICMPv6 */ + { + ip6_header_t *iph0 = (ip6_header_t *) l3h0; + icmp6_neighbor_solicitation_or_advertisement_header_t *ndh0; + ndh0 = ip6_next_header (iph0); + if (ndh0->icmp.type == ICMP6_neighbor_solicitation) + vnet_buffer (b0)->l2.shg = 0; + } + } + } + else + { + /* + * For packet from BVI - set SHG of unicast packet from BVI to 0 so it + * is not dropped on output to VXLAN tunnels or other ports with the + * same SHG as that of the BVI. + */ + if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] == + L2INPUT_BVI)) + vnet_buffer (b0)->l2.shg = 0; + } + + + if (config->bridge) + { + /* Do bridge-domain processing */ + bd_index0 = config->bd_index; + /* save BD ID for next feature graph nodes */ + vnet_buffer (b0)->l2.bd_index = bd_index0; + + /* Get config for the bridge domain interface */ + bd_config = vec_elt_at_index (msm->bd_configs, bd_index0); + + /* Save bridge domain and interface seq_num */ + /* *INDENT-OFF* */ + l2fib_seq_num_t sn = { + .swif = *l2fib_swif_seq_num(sw_if_index0), + .bd = bd_config->seq_num, + }; + /* *INDENT-ON* */ + vnet_buffer (b0)->l2.l2fib_sn = sn.as_u16;; + vnet_buffer (b0)->l2.bd_age = bd_config->mac_age; + + /* + * Process bridge domain feature enables. + * To perform learning/flooding/forwarding, the corresponding bit + * must be enabled in both the input interface config and in the + * bridge domain config. In the bd_bitmap, bits for features other + * than learning/flooding/forwarding should always be set. + */ + feat_mask = feat_mask & bd_config->feature_bitmap; + } + else if (config->xconnect) + { + /* Set the output interface */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index; + } + else + feat_mask = L2INPUT_FEAT_DROP; + + /* mask out features from bitmap using packet type and bd config */ + feature_bitmap = config->feature_bitmap & feat_mask; + + /* save for next feature graph nodes */ + vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap; + + /* Determine the next node */ + *next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index, + feature_bitmap); +} + +static_always_inline uword +l2input_node_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame, + int do_trace) +{ + u32 n_left_from, *from, *to_next; + l2input_next_t next_index; + l2input_main_t *msm = &l2input_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + + /* + * Don't bother prefetching the bridge-domain config (which + * depends on the input config above). Only a small number of + * bridge domains are expected. Plus the structure is small + * and several fit in a cache line. + */ + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + if (do_trace) + { + /* RX interface handles */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX]; + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h0 = vlib_buffer_get_current (b0); + l2input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h1 = vlib_buffer_get_current (b1); + l2input_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + clib_memcpy (t->src, h1->src_address, 6); + clib_memcpy (t->dst, h1->dst_address, 6); + } + if (b2->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h2 = vlib_buffer_get_current (b2); + l2input_trace_t *t = + vlib_add_trace (vm, node, b2, sizeof (*t)); + t->sw_if_index = sw_if_index2; + clib_memcpy (t->src, h2->src_address, 6); + clib_memcpy (t->dst, h2->dst_address, 6); + } + if (b3->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h3 = vlib_buffer_get_current (b3); + l2input_trace_t *t = + vlib_add_trace (vm, node, b3, sizeof (*t)); + t->sw_if_index = sw_if_index3; + clib_memcpy (t->src, h3->src_address, 6); + clib_memcpy (t->dst, h3->dst_address, 6); + } + } + + vlib_node_increment_counter (vm, l2input_node.index, + L2INPUT_ERROR_L2INPUT, 4); + + classify_and_dispatch (msm, b0, &next0); + classify_and_dispatch (msm, b1, &next1); + classify_and_dispatch (msm, b2, &next2); + classify_and_dispatch (msm, b3, &next3); + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (do_trace && PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ethernet_header_t *h0 = vlib_buffer_get_current (b0); + l2input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index0; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + + vlib_node_increment_counter (vm, l2input_node.index, + L2INPUT_ERROR_L2INPUT, 1); + + classify_and_dispatch (msm, b0, &next0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +l2input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + return l2input_node_inline (vm, node, frame, 1 /* do_trace */ ); + return l2input_node_inline (vm, node, frame, 0 /* do_trace */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2input_node) = { + .function = l2input_node_fn, + .name = "l2-input", + .vector_size = sizeof (u32), + .format_trace = format_l2input_trace, + .format_buffer = format_ethernet_header_with_length, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2input_error_strings), + .error_strings = l2input_error_strings, + + .n_next_nodes = L2INPUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2INPUT_NEXT_LEARN] = "l2-learn", + [L2INPUT_NEXT_FWD] = "l2-fwd", + [L2INPUT_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2input_node, l2input_node_fn) + clib_error_t *l2input_init (vlib_main_t * vm) +{ + l2input_main_t *mp = &l2input_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Get packets RX'd from L2 interfaces */ + ethernet_register_l2_input (vm, l2input_node.index); + + /* Create the config vector */ + vec_validate (mp->configs, 100); + /* create 100 sw interface entries and zero them */ + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2input_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2input_init); + + +/** Get a pointer to the config for the given interface. */ +l2_input_config_t * +l2input_intf_config (u32 sw_if_index) +{ + l2input_main_t *mp = &l2input_main; + + vec_validate (mp->configs, sw_if_index); + return vec_elt_at_index (mp->configs, sw_if_index); +} + +/** Enable (or disable) the feature in the bitmap for the given interface. */ +u32 +l2input_intf_bitmap_enable (u32 sw_if_index, u32 feature_bitmap, u32 enable) +{ + l2_input_config_t *config = l2input_intf_config (sw_if_index); + + if (enable) + config->feature_bitmap |= feature_bitmap; + else + config->feature_bitmap &= ~feature_bitmap; + + return config->feature_bitmap; +} + +u32 +l2input_set_bridge_features (u32 bd_index, u32 feat_mask, u32 feat_value) +{ + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index);; + bd_validate (bd_config); + bd_config->feature_bitmap = + (bd_config->feature_bitmap & ~feat_mask) | feat_value; + return bd_config->feature_bitmap; +} + +/** + * Set the subinterface to run in l2 or l3 mode. + * For L3 mode, just the sw_if_index is specified. + * For bridged mode, the bd id and bvi flag are also specified. + * For xconnect mode, the peer sw_if_index is also specified. + * Return 0 if ok, or non-0 if there was an error. + */ + +u32 +set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ + u32 mode, /* One of L2 modes or back to L3 mode */ + u32 sw_if_index, /* sw interface index */ + u32 bd_index, /* for bridged interface */ + u32 bvi, /* the bridged interface is the BVI */ + u32 shg, /* the bridged interface split horizon group */ + u32 xc_sw_if_index) /* peer interface for xconnect */ +{ + l2input_main_t *mp = &l2input_main; + l2output_main_t *l2om = &l2output_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi; + l2_output_config_t *out_config; + l2_input_config_t *config; + l2_bridge_domain_t *bd_config; + u64 mac; + i32 l2_if_adjust = 0; + u32 slot; + + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + config = l2input_intf_config (sw_if_index); + + if (config->bridge) + { + /* Interface is already in bridge mode. Undo the existing config. */ + bd_config = vec_elt_at_index (mp->bd_configs, config->bd_index); + + /* remove interface from flood vector */ + bd_remove_member (bd_config, sw_if_index); + + /* undo any BVI-related config */ + if (bd_config->bvi_sw_if_index == sw_if_index) + { + bd_config->bvi_sw_if_index = ~0; + config->bvi = 0; + + /* delete the l2fib entry for the bvi interface */ + mac = *((u64 *) hi->hw_address); + l2fib_del_entry (mac, config->bd_index); + + /* Make loop output node send packet back to ethernet-input node */ + slot = + vlib_node_add_named_next_with_slot (vm, hi->tx_node_index, + "ethernet-input", + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + } + + /* Clear MACs learned on the interface */ + if ((config->feature_bitmap & L2INPUT_FEAT_LEARN) || + (bd_config->feature_bitmap & L2INPUT_FEAT_LEARN)) + l2fib_flush_int_mac (vm, sw_if_index); + + l2_if_adjust--; + } + else if (config->xconnect) + { + l2_if_adjust--; + } + + /* Make sure vector is big enough */ + vec_validate_init_empty (l2om->output_node_index_vec, sw_if_index, + L2OUTPUT_NEXT_DROP); + + /* Initialize the l2-input configuration for the interface */ + if (mode == MODE_L3) + { + /* Set L2 config to BD index 0 so that if any packet accidentally + * came in on L2 path, it will be dropped in BD 0 */ + config->xconnect = 0; + config->bridge = 0; + config->shg = 0; + config->bd_index = 0; + config->feature_bitmap = L2INPUT_FEAT_DROP; + + /* Clear L2 output config */ + out_config = l2output_intf_config (sw_if_index); + memset (out_config, 0, sizeof (l2_output_config_t)); + + /* Make sure any L2-output packet to this interface now in L3 mode is + * dropped. This may happen if L2 FIB MAC entry is stale */ + l2om->output_node_index_vec[sw_if_index] = L2OUTPUT_NEXT_BAD_INTF; + } + else + { + /* Add or update l2-output node next-arc and output_node_index_vec table + * for the interface */ + l2output_create_output_node_mapping (vm, vnet_main, sw_if_index); + + if (mode == MODE_L2_BRIDGE) + { + /* + * Remove a check that the interface must be an Ethernet. + * Specifically so we can bridge to L3 tunnel interfaces. + * Here's the check: + * if (hi->hw_class_index != ethernet_hw_interface_class.index) + * + */ + if (!hi) + return MODE_ERROR_ETH; /* non-ethernet */ + + config->xconnect = 0; + config->bridge = 1; + config->bd_index = bd_index; + *l2fib_valid_swif_seq_num (sw_if_index) += 1; + + /* + * Enable forwarding, flooding, learning and ARP termination by default + * (note that ARP term is disabled on BD feature bitmap by default) + */ + config->feature_bitmap |= L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD | + L2INPUT_FEAT_FLOOD | L2INPUT_FEAT_LEARN | L2INPUT_FEAT_ARP_TERM; + + /* Make sure last-chance drop is configured */ + config->feature_bitmap |= L2INPUT_FEAT_DROP; + + /* Make sure xconnect is disabled */ + config->feature_bitmap &= ~L2INPUT_FEAT_XCONNECT; + + /* Set up bridge domain */ + bd_config = l2input_bd_config (bd_index); + bd_validate (bd_config); + + /* TODO: think: add l2fib entry even for non-bvi interface? */ + + /* Do BVI interface initializations */ + if (bvi) + { + /* ensure BD has no bvi interface (or replace that one with this??) */ + if (bd_config->bvi_sw_if_index != ~0) + { + return MODE_ERROR_BVI_DEF; /* bd already has a bvi interface */ + } + bd_config->bvi_sw_if_index = sw_if_index; + config->bvi = 1; + + /* create the l2fib entry for the bvi interface */ + mac = *((u64 *) hi->hw_address); + l2fib_add_fwd_entry (mac, bd_index, sw_if_index, 1, 1); /* static + bvi */ + + /* Disable learning by default. no use since l2fib entry is static. */ + config->feature_bitmap &= ~L2INPUT_FEAT_LEARN; + + /* Make loop output node send packet to l2-input node */ + slot = + vlib_node_add_named_next_with_slot (vm, hi->tx_node_index, + "l2-input", + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + } + + /* Add interface to bridge-domain flood vector */ + l2_flood_member_t member = { + .sw_if_index = sw_if_index, + .flags = bvi ? L2_FLOOD_MEMBER_BVI : L2_FLOOD_MEMBER_NORMAL, + .shg = shg, + }; + bd_add_member (bd_config, &member); + + } + else if (mode == MODE_L2_XC) + { + config->xconnect = 1; + config->bridge = 0; + config->output_sw_if_index = xc_sw_if_index; + + /* Make sure last-chance drop is configured */ + config->feature_bitmap |= L2INPUT_FEAT_DROP; + + /* Make sure bridging features are disabled */ + config->feature_bitmap &= + ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD); + + config->feature_bitmap |= L2INPUT_FEAT_XCONNECT; + shg = 0; /* not used in xconnect */ + } + else if (mode == MODE_L2_CLASSIFY) + { + config->xconnect = 1; + config->bridge = 0; + config->output_sw_if_index = xc_sw_if_index; + + /* Make sure last-chance drop is configured */ + config->feature_bitmap |= + L2INPUT_FEAT_DROP | L2INPUT_FEAT_INPUT_CLASSIFY; + + /* Make sure bridging features are disabled */ + config->feature_bitmap &= + ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD); + shg = 0; /* not used in xconnect */ + + /* Insure all packets go to ethernet-input */ + ethernet_set_rx_redirect (vnet_main, hi, 1); + } + + /* set up split-horizon group and set output feature bit */ + config->shg = shg; + out_config = l2output_intf_config (sw_if_index); + out_config->shg = shg; + out_config->feature_bitmap |= L2OUTPUT_FEAT_OUTPUT; + + /* + * Test: remove this when non-IP features can be configured. + * Enable a non-IP feature to test IP feature masking + * config->feature_bitmap |= L2INPUT_FEAT_CTRL_PKT; + */ + + l2_if_adjust++; + } + + /* Adjust count of L2 interfaces */ + hi->l2_if_count += l2_if_adjust; + + if (hi->hw_class_index == ethernet_hw_interface_class.index) + { + if ((hi->l2_if_count == 1) && (l2_if_adjust == 1)) + { + /* Just added first L2 interface on this port */ + + /* Set promiscuous mode on the l2 interface */ + ethernet_set_flags (vnet_main, hi->hw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + + /* ensure all packets go to ethernet-input */ + ethernet_set_rx_redirect (vnet_main, hi, 1); + + } + else if ((hi->l2_if_count == 0) && (l2_if_adjust == -1)) + { + /* Just removed only L2 subinterface on this port */ + + /* Disable promiscuous mode on the l2 interface */ + ethernet_set_flags (vnet_main, hi->hw_if_index, 0); + + /* Allow ip packets to go directly to ip4-input etc */ + ethernet_set_rx_redirect (vnet_main, hi, 0); + } + } + + /* Set up the L2/L3 flag in the interface parsing tables */ + ethernet_sw_interface_set_l2_mode (vnm, sw_if_index, (mode != MODE_L3)); + + return 0; +} + +/** + * Set subinterface in bridging mode with a bridge-domain ID. + * The CLI format is: + * set interface l2 bridge <interface> <bd> [bvi] [split-horizon-group] + */ +static clib_error_t * +int_l2_bridge (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 bd_index, bd_id; + u32 sw_if_index; + u32 bvi; + u32 rc; + u32 shg; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + if (bd_id > L2_BD_ID_MAX) + { + error = clib_error_return (0, "bridge domain ID exceed 16M limit", + format_unformat_error, input); + goto done; + } + bd_index = bd_find_or_add_bd_index (&bd_main, bd_id); + + /* optional bvi */ + bvi = unformat (input, "bvi"); + + /* optional split horizon group */ + shg = 0; + (void) unformat (input, "%d", &shg); + + /* set the interface mode */ + if ((rc = + set_int_l2_mode (vm, vnm, MODE_L2_BRIDGE, sw_if_index, bd_index, bvi, + shg, 0))) + { + if (rc == MODE_ERROR_ETH) + { + error = clib_error_return (0, "bridged interface must be ethernet", + format_unformat_error, input); + } + else if (rc == MODE_ERROR_BVI_DEF) + { + error = + clib_error_return (0, "bridge-domain already has a bvi interface", + format_unformat_error, input); + } + else + { + error = clib_error_return (0, "invalid configuration for interface", + format_unformat_error, input); + } + goto done; + } + +done: + return error; +} + +/*? + * Use this command put an interface into Layer 2 bridge domain. If a + * bridge-domain with the provided bridge-domain-id does not exist, it + * will be created. Interfaces in a bridge-domain forward packets to + * other interfaces in the same bridge-domain based on destination mac + * address. To remove an interface from a the Layer 2 bridge domain, + * put the interface in a different mode, for example Layer 3 mode. + * + * Optionally, an interface can be added to a Layer 2 bridge-domain as + * a Bridged Virtual Interface (bvi). Only one interface in a Layer 2 + * bridge-domain can be a bvi. + * + * Optionally, a split-horizon group can also be specified. This defaults + * to 0 if not specified. + * + * @cliexpar + * Example of how to configure a Layer 2 bridge-domain with three + * interfaces (where 200 is the bridge-domain-id): + * @cliexcmd{set interface l2 bridge GigabitEthernet0/8/0.200 200} + * This interface is added a BVI interface: + * @cliexcmd{set interface l2 bridge GigabitEthernet0/9/0.200 200 bvi} + * This interface also has a split-horizon group of 1 specified: + * @cliexcmd{set interface l2 bridge GigabitEthernet0/a/0.200 200 1} + * Example of how to remove an interface from a Layer2 bridge-domain: + * @cliexcmd{set interface l3 GigabitEthernet0/a/0.200} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = { + .path = "set interface l2 bridge", + .short_help = "set interface l2 bridge <interface> <bridge-domain-id> [bvi] [shg]", + .function = int_l2_bridge, +}; +/* *INDENT-ON* */ + +/** + * Set subinterface in xconnect mode with another interface. + * The CLI format is: + * set interface l2 xconnect <interface> <peer interface> + */ +static clib_error_t * +int_l2_xc (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + u32 xc_sw_if_index; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat_user + (input, unformat_vnet_sw_interface, vnm, &xc_sw_if_index)) + { + error = clib_error_return (0, "unknown peer interface `%U'", + format_unformat_error, input); + goto done; + } + + /* set the interface mode */ + if (set_int_l2_mode + (vm, vnm, MODE_L2_XC, sw_if_index, 0, 0, 0, xc_sw_if_index)) + { + error = clib_error_return (0, "invalid configuration for interface", + format_unformat_error, input); + goto done; + } + +done: + return error; +} + +/*? + * Use this command put an interface into Layer 2 cross-connect mode. + * Both interfaces must be in this mode for bi-directioal traffic. All + * packets received on one interface will be transmitted to the other. + * To remove the Layer 2 cross-connect, put the interface in a different + * mode, for example Layer 3 mode. + * + * @cliexpar + * Example of how to configure a Layer2 cross-connect between two interfaces: + * @cliexcmd{set interface l2 xconnect GigabitEthernet0/8/0.300 GigabitEthernet0/9/0.300} + * @cliexcmd{set interface l2 xconnect GigabitEthernet0/9/0.300 GigabitEthernet0/8/0.300} + * Example of how to remove a Layer2 cross-connect: + * @cliexcmd{set interface l3 GigabitEthernet0/8/0.300} + * @cliexcmd{set interface l3 GigabitEthernet0/9/0.300} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_l2_xc_cli, static) = { + .path = "set interface l2 xconnect", + .short_help = "set interface l2 xconnect <interface> <peer interface>", + .function = int_l2_xc, +}; +/* *INDENT-ON* */ + +/** + * Set subinterface in L3 mode. + * The CLI format is: + * set interface l3 <interface> + */ +static clib_error_t * +int_l3 (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + /* set the interface mode */ + if (set_int_l2_mode (vm, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0)) + { + error = clib_error_return (0, "invalid configuration for interface", + format_unformat_error, input); + goto done; + } + +done: + return error; +} + +/*? + * Modify the packet processing mode of the interface to Layer 3, which + * implies packets will be routed. This is the default mode of an interface. + * Use this command to remove an interface from a Layer 2 cross-connect or a + * Layer 2 bridge. + * + * @cliexpar + * Example of how to set the mode of an interface to Layer 3: + * @cliexcmd{set interface l3 GigabitEthernet0/8/0.200} +?*/ +/* *INDENT-OFF* */ + VLIB_CLI_COMMAND (int_l3_cli, static) = { + .path = "set interface l3", + .short_help = "set interface l3 <interface>", + .function = int_l3, +}; +/* *INDENT-ON* */ + +/** + * Show interface mode. + * The CLI format is: + * show mode [<if-name1> <if-name2> ...] + */ +static clib_error_t * +show_int_mode (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + char *mode; + u8 *args; + vnet_interface_main_t *im = &vnm->interface_main; + + vnet_sw_interface_t *si, *sis = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + u32 sw_if_index; + + /* See if user wants to show specific interface */ + if (unformat + (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + si = pool_elt_at_index (im->sw_interfaces, sw_if_index); + vec_add1 (sis, si[0]); + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (vec_len (sis) == 0) /* Get all interfaces */ + { + /* Gather interfaces. */ + sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); + _vec_len (sis) = 0; + /* *INDENT-OFF* */ + pool_foreach (si, im->sw_interfaces, ({ vec_add1 (sis, si[0]); })); + /* *INDENT-ON* */ + } + + vec_foreach (si, sis) + { + l2_input_config_t *config = l2input_intf_config (si->sw_if_index); + if (config->bridge) + { + u32 bd_id; + mode = "l2 bridge"; + bd_id = l2input_main.bd_configs[config->bd_index].bd_id; + + args = format (0, "bd_id %d%s%d", bd_id, + config->bvi ? " bvi shg " : " shg ", config->shg); + } + else if (config->xconnect) + { + mode = "l2 xconnect"; + args = format (0, "%U", + format_vnet_sw_if_index_name, + vnm, config->output_sw_if_index); + } + else + { + mode = "l3"; + args = format (0, " "); + } + vlib_cli_output (vm, "%s %U %v\n", + mode, + format_vnet_sw_if_index_name, + vnm, si->sw_if_index, args); + vec_free (args); + } + +done: + vec_free (sis); + + return error; +} + +/*? + * Show the packet processing mode (Layer2 xcross-onnect, Layer 2 bridge, + * Layer 3 routed) of all interfaces and sub-interfaces, or limit the + * output to just the provided list of interfaces and sub-interfaces. + * The output shows the mode, the interface, and if the interface is + * a member of a bridge, the bridge-domain-id and the split horizen group (shg). + * + * @cliexpar + * Example of displaying the mode of all interfaces: + * @cliexstart{show mode} + * l3 local0 + * l3 GigabitEthernet0/8/0 + * l3 GigabitEthernet0/9/0 + * l3 GigabitEthernet0/a/0 + * l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0 + * l2 bridge GigabitEthernet0/9/0.200 bd_id 200 shg 0 + * l2 bridge GigabitEthernet0/a/0.200 bd_id 200 shg 0 + * l2 xconnect GigabitEthernet0/8/0.300 GigabitEthernet0/9/0.300 + * l2 xconnect GigabitEthernet0/9/0.300 GigabitEthernet0/8/0.300 + * @cliexend + * Example of displaying the mode of a seleted list of interfaces: + * @cliexstart{show mode GigabitEthernet0/8/0 GigabitEthernet0/8/0.200} + * l3 GigabitEthernet0/8/0 + * l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_l2_mode, static) = { + .path = "show mode", + .short_help = "show mode [<if-name1> <if-name2> ...]", + .function = show_int_mode, +}; +/* *INDENT-ON* */ + +#define foreach_l2_init_function \ +_(feat_bitmap_drop_init) \ +_(l2fib_init) \ +_(l2_input_classify_init) \ +_(l2bd_init) \ +_(l2fwd_init) \ +_(l2_inacl_init) \ +_(l2input_init) \ +_(l2_vtr_init) \ +_(l2_invtr_init) \ +_(l2_efp_filter_init) \ +_(l2learn_init) \ +_(l2flood_init) \ +_(l2_outacl_init) \ +_(l2output_init) \ +_(l2_patch_init) \ +_(l2_xcrw_init) + +clib_error_t * +l2_init (vlib_main_t * vm) +{ + clib_error_t *error; + +#define _(a) do { \ + if ((error = vlib_call_init_function (vm, a))) return error; } \ +while (0); + foreach_l2_init_function; +#undef _ + return 0; +} + +VLIB_INIT_FUNCTION (l2_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h new file mode 100644 index 00000000..e8a6c776 --- /dev/null +++ b/src/vnet/l2/l2_input.h @@ -0,0 +1,289 @@ +/* + * l2_input.h : layer 2 input packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_input_h +#define included_vnet_l2_input_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/l2_bd.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip.h> + +/* Per-subinterface L2 feature configuration */ + +typedef struct +{ + + union + { + u16 bd_index; /* bridge domain id */ + u32 output_sw_if_index; /* for xconnect */ + }; + + /* Interface mode. If both are 0, this interface is in L3 mode */ + u8 xconnect; + u8 bridge; + + /* this is the bvi interface for the bridge-domain */ + u8 bvi; + + /* config for which input features are configured on this interface */ + u32 feature_bitmap; + + /* some of these flags are also in the feature bitmap */ + u8 learn_enable; + u8 fwd_enable; + u8 flood_enable; + + /* split horizon group */ + u8 shg; + +} l2_input_config_t; + + +typedef struct +{ + + /* Next nodes for the feature bitmap */ + u32 feat_next_node_index[32]; + + /* config vector indexed by sw_if_index */ + l2_input_config_t *configs; + + /* bridge domain config vector indexed by bd_index */ + l2_bridge_domain_t *bd_configs; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2input_main_t; + +extern l2input_main_t l2input_main; + +extern vlib_node_registration_t l2input_node; + +static_always_inline l2_bridge_domain_t * +l2input_bd_config_from_index (l2input_main_t * l2im, u32 bd_index) +{ + l2_bridge_domain_t *bd_config; + + bd_config = vec_elt_at_index (l2im->bd_configs, bd_index); + return bd_is_valid (bd_config) ? bd_config : NULL; +} + +static_always_inline l2_bridge_domain_t * +l2input_bd_config (u32 bd_index) +{ + l2input_main_t *mp = &l2input_main; + l2_bridge_domain_t *bd_config; + + vec_validate (mp->bd_configs, bd_index); + bd_config = vec_elt_at_index (mp->bd_configs, bd_index); + return bd_config; +} + +/* L2 input indication packet is from BVI, using -2 */ +#define L2INPUT_BVI ((u32) (~0-1)) + +/* L2 input features */ + +/* Mappings from feature ID to graph node name in reverse order */ +#define foreach_l2input_feat \ + _(DROP, "feature-bitmap-drop") \ + _(XCONNECT, "l2-output") \ + _(FLOOD, "l2-flood") \ + _(ARP_TERM, "arp-term-l2bd") \ + _(UU_FLOOD, "l2-flood") \ + _(FWD, "l2-fwd") \ + _(RW, "l2-rw") \ + _(LEARN, "l2-learn") \ + _(VTR, "l2-input-vtr") \ + _(VPATH, "vpath-input-l2") \ + _(ACL, "l2-input-acl") \ + _(POLICER_CLAS, "l2-policer-classify") \ + _(INPUT_CLASSIFY, "l2-input-classify") \ + _(SPAN, "span-l2-input") + +/* Feature bitmap positions */ +typedef enum +{ +#define _(sym,str) L2INPUT_FEAT_##sym##_BIT, + foreach_l2input_feat +#undef _ + L2INPUT_N_FEAT +} l2input_feat_t; + +STATIC_ASSERT (L2INPUT_N_FEAT <= 32, "too many l2 input features"); + +/* Feature bit masks */ +typedef enum +{ +#define _(sym,str) L2INPUT_FEAT_##sym = (1<<L2INPUT_FEAT_##sym##_BIT), + foreach_l2input_feat +#undef _ + L2INPUT_VALID_MASK = +#define _(sym,str) L2INPUT_FEAT_##sym | + foreach_l2input_feat +#undef _ + 0 +} l2input_feat_masks_t; + +STATIC_ASSERT ((u64) L2INPUT_VALID_MASK == (1ull << L2INPUT_N_FEAT) - 1, ""); + +/** Return an array of strings containing graph node names of each feature */ +char **l2input_get_feat_names (void); + +/* arg0 - u32 feature_bitmap */ +u8 *format_l2_input_features (u8 * s, va_list * args); + +static_always_inline u8 +bd_feature_flood (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD) == + L2INPUT_FEAT_FLOOD); +} + +static_always_inline u8 +bd_feature_uu_flood (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD) == + L2INPUT_FEAT_UU_FLOOD); +} + +static_always_inline u8 +bd_feature_forward (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_FWD) == L2INPUT_FEAT_FWD); +} + +static_always_inline u8 +bd_feature_learn (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_LEARN) == + L2INPUT_FEAT_LEARN); +} + +static_always_inline u8 +bd_feature_arp_term (l2_bridge_domain_t * bd_config) +{ + return ((bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM) == + L2INPUT_FEAT_ARP_TERM); +} + +/** Masks for eliminating features that do not apply to a packet */ + +/** Get a pointer to the config for the given interface */ +l2_input_config_t *l2input_intf_config (u32 sw_if_index); + +/* Enable (or disable) the feature in the bitmap for the given interface */ +u32 l2input_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, u32 enable); + +/* Sets modifies flags from a bridge domain */ +u32 l2input_set_bridge_features (u32 bd_index, u32 feat_mask, u32 feat_value); + + +#define MODE_L3 0 +#define MODE_L2_BRIDGE 1 +#define MODE_L2_XC 2 +#define MODE_L2_CLASSIFY 3 + +#define MODE_ERROR_ETH 1 +#define MODE_ERROR_BVI_DEF 2 + +u32 set_int_l2_mode (vlib_main_t * vm, + vnet_main_t * vnet_main, + u32 mode, + u32 sw_if_index, + u32 bd_index, u32 bvi, u32 shg, u32 xc_sw_if_index); + +static inline void +vnet_update_l2_len (vlib_buffer_t * b) +{ + ethernet_header_t *eth; + u16 ethertype; + u8 vlan_count = 0; + + /* point at currrent l2 hdr */ + eth = vlib_buffer_get_current (b); + + /* + * l2-output pays no attention to this + * but the tag push/pop code on an l2 subif needs it. + * + * Determine l2 header len, check for up to 2 vlans + */ + vnet_buffer (b)->l2.l2_len = sizeof (ethernet_header_t); + ethertype = clib_net_to_host_u16 (eth->type); + if (ethernet_frame_is_tagged (ethertype)) + { + ethernet_vlan_header_t *vlan; + vnet_buffer (b)->l2.l2_len += sizeof (*vlan); + vlan_count = 1; + vlan = (void *) (eth + 1); + ethertype = clib_net_to_host_u16 (vlan->type); + if (ethertype == ETHERNET_TYPE_VLAN) + { + vnet_buffer (b)->l2.l2_len += sizeof (*vlan); + vlan_count = 2; + } + } + ethernet_buffer_set_vlan_count (b, vlan_count); +} + +/* + * Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet + * is ip4 or ip6. Otherwise hash on smac/dmac/etype. + * The vlib buffer current pointer is expected to be at ethernet header + * and vnet l2.l2_len is exppected to be setup already. + */ +static inline u32 +vnet_l2_compute_flow_hash (vlib_buffer_t * b) +{ + ethernet_header_t *eh = vlib_buffer_get_current (b); + u8 *l3h = (u8 *) eh + vnet_buffer (b)->l2.l2_len; + u16 ethertype = clib_net_to_host_u16 (*(u16 *) (l3h - 2)); + + if (ethertype == ETHERNET_TYPE_IP4) + return ip4_compute_flow_hash ((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT); + else if (ethertype == ETHERNET_TYPE_IP6) + return ip6_compute_flow_hash ((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT); + else + { + u32 a, b, c; + u32 *ap = (u32 *) & eh->dst_address[2]; + u32 *bp = (u32 *) & eh->src_address[2]; + a = *ap; + b = *bp; + c = ethertype; + hash_v3_mix32 (a, b, c); + hash_v3_finalize32 (a, b, c); + return c; + } +} + +#endif + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_input_acl.c b/src/vnet/l2/l2_input_acl.c new file mode 100644 index 00000000..84030888 --- /dev/null +++ b/src/vnet/l2/l2_input_acl.c @@ -0,0 +1,431 @@ +/* + * l2_input_acl.c : layer 2 input acl processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#include <vnet/classify/vnet_classify.h> +#include <vnet/classify/input_acl.h> + +typedef struct +{ + + /* Next nodes for each feature */ + u32 feat_next_node_index[32]; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2_inacl_main_t; + +typedef struct +{ + u32 sw_if_index; + u32 next_index; + u32 table_index; + u32 offset; +} l2_inacl_trace_t; + +/* packet trace format function */ +static u8 * +format_l2_inacl_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_inacl_trace_t *t = va_arg (*args, l2_inacl_trace_t *); + + s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d", + t->sw_if_index, t->next_index, t->table_index, t->offset); + return s; +} + +l2_inacl_main_t l2_inacl_main; + +static vlib_node_registration_t l2_inacl_node; + +#define foreach_l2_inacl_error \ +_(NONE, "valid input ACL packets") \ +_(MISS, "input ACL misses") \ +_(HIT, "input ACL hits") \ +_(CHAIN_HIT, "input ACL hits after chain walk") \ +_(TABLE_MISS, "input ACL table-miss drops") \ +_(SESSION_DENY, "input ACL session deny drops") + + +typedef enum +{ +#define _(sym,str) L2_INACL_ERROR_##sym, + foreach_l2_inacl_error +#undef _ + L2_INACL_N_ERROR, +} l2_inacl_error_t; + +static char *l2_inacl_error_strings[] = { +#define _(sym,string) string, + foreach_l2_inacl_error +#undef _ +}; + +static uword +l2_inacl_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + acl_next_index_t next_index; + l2_inacl_main_t *msm = &l2_inacl_main; + input_acl_main_t *am = &input_acl_main; + vnet_classify_main_t *vcm = am->vnet_classify_main; + input_acl_table_id_t tid = INPUT_ACL_TABLE_L2; + f64 now = vlib_time_now (vm); + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + /* First pass: compute hashes */ + while (n_left_from > 2) + { + vlib_buffer_t *b0, *b1; + u32 bi0, bi1; + u8 *h0, *h1; + u32 sw_if_index0, sw_if_index1; + u32 table_index0, table_index1; + vnet_classify_table_t *t0, *t1; + + /* prefetch next iteration */ + { + vlib_buffer_t *p1, *p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = + am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + table_index1 = + am->classify_table_index_by_sw_if_index[tid][sw_if_index1]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + t1 = pool_elt_at_index (vcm->tables, table_index1); + + if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) + h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset; + else + h0 = b0->data; + + vnet_buffer (b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash); + + if (t1->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) + h1 = (void *) vlib_buffer_get_current (b1) + t1->current_data_offset; + else + h1 = b1->data; + + vnet_buffer (b1)->l2_classify.hash = + vnet_classify_hash_packet (t1, (u8 *) h1); + + vnet_classify_prefetch_bucket (t1, vnet_buffer (b1)->l2_classify.hash); + + vnet_buffer (b0)->l2_classify.table_index = table_index0; + + vnet_buffer (b1)->l2_classify.table_index = table_index1; + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 bi0; + u8 *h0; + u32 sw_if_index0; + u32 table_index0; + vnet_classify_table_t *t0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = + am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) + h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset; + else + h0 = b0->data; + + vnet_buffer (b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_buffer (b0)->l2_classify.table_index = table_index0; + vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash); + + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = ACL_NEXT_INDEX_DENY; + u32 table_index0; + vnet_classify_table_t *t0; + vnet_classify_entry_t *e0; + u64 hash0; + u8 *h0; + u8 error0; + + /* Stride 3 seems to work best */ + if (PREDICT_TRUE (n_left_from > 3)) + { + vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]); + vnet_classify_table_t *tp1; + u32 table_index1; + u64 phash1; + + table_index1 = vnet_buffer (p1)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index1 != ~0)) + { + tp1 = pool_elt_at_index (vcm->tables, table_index1); + phash1 = vnet_buffer (p1)->l2_classify.hash; + vnet_classify_prefetch_entry (tp1, phash1); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + table_index0 = vnet_buffer (b0)->l2_classify.table_index; + e0 = 0; + t0 = 0; + + vnet_buffer (b0)->l2_classify.opaque_index = ~0; + + /* Determine the next node */ + next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index, + L2INPUT_FEAT_ACL); + + if (PREDICT_TRUE (table_index0 != ~0)) + { + hash0 = vnet_buffer (b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) + h0 = + (void *) vlib_buffer_get_current (b0) + + t0->current_data_offset; + else + h0 = b0->data; + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer (b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + + next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT) ? + e0->next_index : next0; + + hits++; + + error0 = (next0 == ACL_NEXT_INDEX_DENY) ? + L2_INACL_ERROR_SESSION_DENY : L2_INACL_ERROR_NONE; + b0->error = node->errors[error0]; + } + else + { + while (1) + { + if (PREDICT_TRUE (t0->next_table_index != ~0)) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = + (t0->miss_next_index < + ACL_NEXT_INDEX_N_NEXT) ? t0->miss_next_index : + next0; + + misses++; + + error0 = (next0 == ACL_NEXT_INDEX_DENY) ? + L2_INACL_ERROR_TABLE_MISS : L2_INACL_ERROR_NONE; + b0->error = node->errors[error0]; + break; + } + + if (t0->current_data_flag == + CLASSIFY_FLAG_USE_CURR_DATA) + h0 = + (void *) vlib_buffer_get_current (b0) + + t0->current_data_offset; + else + h0 = b0->data; + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry + (t0, (u8 *) h0, hash0, now); + if (e0) + { + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT) ? + e0->next_index : next0; + hits++; + chain_hits++; + + error0 = (next0 == ACL_NEXT_INDEX_DENY) ? + L2_INACL_ERROR_SESSION_DENY : L2_INACL_ERROR_NONE; + b0->error = node->errors[error0]; + break; + } + } + } + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_inacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + t->table_index = t0 ? t0 - vcm->tables : ~0; + t->offset = (t0 && e0) ? vnet_classify_get_offset (t0, e0) : ~0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + L2_INACL_ERROR_MISS, misses); + vlib_node_increment_counter (vm, node->node_index, + L2_INACL_ERROR_HIT, hits); + vlib_node_increment_counter (vm, node->node_index, + L2_INACL_ERROR_CHAIN_HIT, chain_hits); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_inacl_node,static) = { + .function = l2_inacl_node_fn, + .name = "l2-input-acl", + .vector_size = sizeof (u32), + .format_trace = format_l2_inacl_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_inacl_error_strings), + .error_strings = l2_inacl_error_strings, + + .n_next_nodes = ACL_NEXT_INDEX_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [ACL_NEXT_INDEX_DENY] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2_inacl_node, l2_inacl_node_fn) + clib_error_t *l2_inacl_init (vlib_main_t * vm) +{ + l2_inacl_main_t *mp = &l2_inacl_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2_inacl_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_inacl_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_input_classify.c b/src/vnet/l2/l2_input_classify.c new file mode 100644 index 00000000..ee8042a0 --- /dev/null +++ b/src/vnet/l2/l2_input_classify.c @@ -0,0 +1,662 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * l2_classify.c + */ + +#include <vnet/l2/l2_classify.h> +#include <vnet/api_errno.h> + +/** + * @file + * @brief L2 input classifier. + * + * @sa @ref vnet/vnet/classify/vnet_classify.c + * @sa @ref vnet/vnet/classify/vnet_classify.h + */ + +/** + * @brief l2_input_classifier packet trace record. + */ +typedef struct +{ + /** interface handle for the ith packet */ + u32 sw_if_index; + /** graph arc index selected for this packet */ + u32 next_index; + /** classifier table which provided the final result */ + u32 table_index; + /** offset in classifier heap of the corresponding session */ + u32 session_offset; +} l2_input_classify_trace_t; + +/** + * @brief vlib node runtime. + */ +typedef struct +{ + /** use-case independent main object pointer */ + vnet_classify_main_t *vcm; + /** l2 input classifier main object pointer */ + l2_input_classify_main_t *l2cm; +} l2_input_classify_runtime_t; + +/** Packet trace format function. */ +static u8 * +format_l2_input_classify_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_input_classify_trace_t *t = va_arg (*args, l2_input_classify_trace_t *); + + s = format (s, "l2-classify: sw_if_index %d, table %d, offset %x, next %d", + t->sw_if_index, t->table_index, t->session_offset, + t->next_index); + return s; +} + +/** l2 input classifier main data structure. */ +l2_input_classify_main_t l2_input_classify_main; + +vlib_node_registration_t l2_input_classify_node; + +#define foreach_l2_input_classify_error \ +_(MISS, "Classify misses") \ +_(HIT, "Classify hits") \ +_(CHAIN_HIT, "Classify hits after chain walk") \ +_(DROP, "L2 Classify Drops") + +typedef enum +{ +#define _(sym,str) L2_INPUT_CLASSIFY_ERROR_##sym, + foreach_l2_input_classify_error +#undef _ + L2_INPUT_CLASSIFY_N_ERROR, +} l2_input_classify_error_t; + +static char *l2_input_classify_error_strings[] = { +#define _(sym,string) string, + foreach_l2_input_classify_error +#undef _ +}; + +/** + * @brief l2 input classifier node. + * @node l2-input-classify + * + * This is the l2 input classifier dispatch node + * + * @param vm vlib_main_t corresponding to the current thread. + * @param node vlib_node_runtime_t data for this node. + * @param frame vlib_frame_t whose contents should be dispatched. + * + * @par Graph mechanics: buffer metadata, next index usage + * + * @em Uses: + * - <code>(l2_input_classify_runtime_t *) + * rt->classify_table_index_by_sw_if_index</code> + * - Head of the per-interface, per-protocol classifier table chain + * for a specific interface. + * - @c ~0 => send pkts to the next feature in the L2 feature chain. + * - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code> + * - Indicates the @c sw_if_index value of the interface that the + * packet was received on. + * - <code>vnet_buffer(b0)->l2.feature_bitmap</code> + * - Used to steer packets across l2 features enabled on the interface + * - <code>(vnet_classify_entry_t) e0->next_index</code> + * - Used to steer traffic when the classifier hits on a session + * - <code>(vnet_classify_entry_t) e0->advance</code> + * - Signed quantity applied via <code>vlib_buffer_advance</code> + * when the classifier hits on a session + * - <code>(vnet_classify_table_t) t0->miss_next_index</code> + * - Used to steer traffic when the classifier misses + * + * @em Sets: + * - <code>vnet_buffer (b0)->l2_classify.table_index</code> + * - Classifier table index of the first classifier table in + * the classifier table chain + * - <code>vnet_buffer (b0)->l2_classify.hash</code> + * - Bounded-index extensible hash corresponding to the + * masked fields in the current packet + * - <code>vnet_buffer (b0)->l2.feature_bitmap</code> + * - Used to steer packets across l2 features enabled on the interface + * - <code>vnet_buffer (b0)->l2_classify.opaque_index</code> + * - Copied from the classifier session object upon classifier hit + * + * @em Counters: + * - <code>L2_INPUT_CLASSIFY_ERROR_MISS</code> Classifier misses + * - <code>L2_INPUT_CLASSIFY_ERROR_HIT</code> Classifier hits + * - <code>L2_INPUT_CLASSIFY_ERROR_CHAIN_HIT</code> + * Classifier hits in other than the first table + */ + +static uword +l2_input_classify_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2_input_classify_next_t next_index; + l2_input_classify_main_t *cm = &l2_input_classify_main; + vnet_classify_main_t *vcm = cm->vnet_classify_main; + l2_input_classify_runtime_t *rt = + (l2_input_classify_runtime_t *) node->runtime_data; + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + f64 now; + u32 n_next_nodes; + + n_next_nodes = node->n_next_nodes; + + now = vlib_time_now (vm); + + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + /* First pass: compute hash */ + + while (n_left_from > 2) + { + vlib_buffer_t *b0, *b1; + u32 bi0, bi1; + ethernet_header_t *h0, *h1; + u32 sw_if_index0, sw_if_index1; + u16 type0, type1; + int type_index0, type_index1; + vnet_classify_table_t *t0, *t1; + u32 table_index0, table_index1; + u64 hash0, hash1; + + + /* prefetch next iteration */ + { + vlib_buffer_t *p1, *p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = vlib_buffer_get_current (b1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + vnet_buffer (b0)->l2_classify.table_index = ~0; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + vnet_buffer (b1)->l2_classify.table_index = ~0; + + /* Select classifier table based on ethertype */ + type0 = clib_net_to_host_u16 (h0->type); + type1 = clib_net_to_host_u16 (h1->type); + + type_index0 = (type0 == ETHERNET_TYPE_IP4) + ? L2_INPUT_CLASSIFY_TABLE_IP4 : L2_INPUT_CLASSIFY_TABLE_OTHER; + type_index0 = (type0 == ETHERNET_TYPE_IP6) + ? L2_INPUT_CLASSIFY_TABLE_IP6 : type_index0; + + type_index1 = (type1 == ETHERNET_TYPE_IP4) + ? L2_INPUT_CLASSIFY_TABLE_IP4 : L2_INPUT_CLASSIFY_TABLE_OTHER; + type_index1 = (type1 == ETHERNET_TYPE_IP6) + ? L2_INPUT_CLASSIFY_TABLE_IP6 : type_index1; + + vnet_buffer (b0)->l2_classify.table_index = + table_index0 = + rt->l2cm->classify_table_index_by_sw_if_index + [type_index0][sw_if_index0]; + + if (table_index0 != ~0) + { + t0 = pool_elt_at_index (vcm->tables, table_index0); + + vnet_buffer (b0)->l2_classify.hash = hash0 = + vnet_classify_hash_packet (t0, (u8 *) h0); + vnet_classify_prefetch_bucket (t0, hash0); + } + + vnet_buffer (b1)->l2_classify.table_index = + table_index1 = + rt->l2cm->classify_table_index_by_sw_if_index + [type_index1][sw_if_index1]; + + if (table_index1 != ~0) + { + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer (b1)->l2_classify.hash = hash1 = + vnet_classify_hash_packet (t1, (u8 *) h1); + vnet_classify_prefetch_bucket (t1, hash1); + } + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 bi0; + ethernet_header_t *h0; + u32 sw_if_index0; + u16 type0; + u32 type_index0; + vnet_classify_table_t *t0; + u32 table_index0; + u64 hash0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + vnet_buffer (b0)->l2_classify.table_index = ~0; + + /* Select classifier table based on ethertype */ + type0 = clib_net_to_host_u16 (h0->type); + + type_index0 = (type0 == ETHERNET_TYPE_IP4) + ? L2_INPUT_CLASSIFY_TABLE_IP4 : L2_INPUT_CLASSIFY_TABLE_OTHER; + type_index0 = (type0 == ETHERNET_TYPE_IP6) + ? L2_INPUT_CLASSIFY_TABLE_IP6 : type_index0; + + vnet_buffer (b0)->l2_classify.table_index = + table_index0 = rt->l2cm->classify_table_index_by_sw_if_index + [type_index0][sw_if_index0]; + + if (table_index0 != ~0) + { + t0 = pool_elt_at_index (vcm->tables, table_index0); + + vnet_buffer (b0)->l2_classify.hash = hash0 = + vnet_classify_hash_packet (t0, (u8 *) h0); + vnet_classify_prefetch_bucket (t0, hash0); + } + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = ~0; /* next l2 input feature, please... */ + ethernet_header_t *h0; + u32 table_index0; + u64 hash0; + vnet_classify_table_t *t0; + vnet_classify_entry_t *e0; + + if (PREDICT_TRUE (n_left_from > 2)) + { + vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]); + u64 phash2; + u32 table_index2; + vnet_classify_table_t *tp2; + + /* + * Prefetch table entry two ahead. Buffer / data + * were prefetched above... + */ + table_index2 = vnet_buffer (p2)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index2 != ~0)) + { + tp2 = pool_elt_at_index (vcm->tables, table_index2); + phash2 = vnet_buffer (p2)->l2_classify.hash; + vnet_classify_prefetch_entry (tp2, phash2); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + table_index0 = vnet_buffer (b0)->l2_classify.table_index; + e0 = 0; + vnet_buffer (b0)->l2_classify.opaque_index = ~0; + + if (PREDICT_TRUE (table_index0 != ~0)) + { + hash0 = vnet_buffer (b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer (b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < n_next_nodes) ? + e0->next_index : next0; + hits++; + } + else + { + while (1) + { + if (t0->next_table_index != ~0) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < n_next_nodes) ? + t0->miss_next_index : next0; + misses++; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = + vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer (b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < n_next_nodes) ? + e0->next_index : next0; + hits++; + chain_hits++; + break; + } + } + } + } + + if (PREDICT_FALSE (next0 == 0)) + b0->error = node->errors[L2_INPUT_CLASSIFY_ERROR_DROP]; + + /* Determine the next node and remove ourself from bitmap */ + if (PREDICT_TRUE (next0 == ~0)) + next0 = vnet_l2_feature_next (b0, cm->l2_inp_feat_next, + L2INPUT_FEAT_INPUT_CLASSIFY); + else + vnet_buffer (b0)->l2.feature_bitmap &= + ~L2INPUT_FEAT_INPUT_CLASSIFY; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_input_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->table_index = table_index0; + t->next_index = next0; + t->session_offset = e0 ? vnet_classify_get_offset (t0, e0) : 0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + L2_INPUT_CLASSIFY_ERROR_MISS, misses); + vlib_node_increment_counter (vm, node->node_index, + L2_INPUT_CLASSIFY_ERROR_HIT, hits); + vlib_node_increment_counter (vm, node->node_index, + L2_INPUT_CLASSIFY_ERROR_CHAIN_HIT, chain_hits); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_input_classify_node) = { + .function = l2_input_classify_node_fn, + .name = "l2-input-classify", + .vector_size = sizeof (u32), + .format_trace = format_l2_input_classify_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_input_classify_error_strings), + .error_strings = l2_input_classify_error_strings, + + .runtime_data_bytes = sizeof (l2_input_classify_runtime_t), + + .n_next_nodes = L2_INPUT_CLASSIFY_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_INPUT_CLASSIFY_NEXT_DROP] = "error-drop", + [L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2", + [L2_INPUT_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input", + [L2_INPUT_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input", + [L2_INPUT_CLASSIFY_NEXT_LI] = "li-hit", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2_input_classify_node, + l2_input_classify_node_fn); + +/** l2 input classsifier feature initialization. */ +clib_error_t * +l2_input_classify_init (vlib_main_t * vm) +{ + l2_input_classify_main_t *cm = &l2_input_classify_main; + l2_input_classify_runtime_t *rt; + + rt = vlib_node_get_runtime_data (vm, l2_input_classify_node.index); + + cm->vlib_main = vm; + cm->vnet_main = vnet_get_main (); + cm->vnet_classify_main = &vnet_classify_main; + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2_input_classify_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + cm->l2_inp_feat_next); + rt->l2cm = cm; + rt->vcm = cm->vnet_classify_main; + + return 0; +} + +VLIB_INIT_FUNCTION (l2_input_classify_init); + +clib_error_t * +l2_input_classify_worker_init (vlib_main_t * vm) +{ + l2_input_classify_main_t *cm = &l2_input_classify_main; + l2_input_classify_runtime_t *rt; + + rt = vlib_node_get_runtime_data (vm, l2_input_classify_node.index); + + rt->l2cm = cm; + rt->vcm = cm->vnet_classify_main; + + return 0; +} + +VLIB_WORKER_INIT_FUNCTION (l2_input_classify_worker_init); + +/** Enable/disable l2 input classification on a specific interface. */ +void +vnet_l2_input_classify_enable_disable (u32 sw_if_index, int enable_disable) +{ + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_INPUT_CLASSIFY, + (u32) enable_disable); +} + +/** @brief Set l2 per-protocol, per-interface input classification tables. + * + * @param sw_if_index interface handle + * @param ip4_table_index ip4 classification table index, or ~0 + * @param ip6_table_index ip6 classification table index, or ~0 + * @param other_table_index non-ip4, non-ip6 classification table index, + * or ~0 + * @returns 0 on success, VNET_API_ERROR_NO_SUCH_TABLE, TABLE2, TABLE3 + * if the indicated (non-~0) table does not exist. + */ + +int +vnet_l2_input_classify_set_tables (u32 sw_if_index, + u32 ip4_table_index, + u32 ip6_table_index, u32 other_table_index) +{ + l2_input_classify_main_t *cm = &l2_input_classify_main; + vnet_classify_main_t *vcm = cm->vnet_classify_main; + + /* Assume that we've validated sw_if_index in the API layer */ + + if (ip4_table_index != ~0 && + pool_is_free_index (vcm->tables, ip4_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE; + + if (ip6_table_index != ~0 && + pool_is_free_index (vcm->tables, ip6_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE2; + + if (other_table_index != ~0 && + pool_is_free_index (vcm->tables, other_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE3; + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4], + sw_if_index); + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6], + sw_if_index); + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER], + sw_if_index); + + cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4] + [sw_if_index] = ip4_table_index; + + cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6] + [sw_if_index] = ip6_table_index; + + cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER] + [sw_if_index] = other_table_index; + + return 0; +} + +static clib_error_t * +int_l2_input_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index = ~0; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + u32 other_table_index = ~0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else if (unformat (input, "ip4-table %d", &ip4_table_index)) + ; + else if (unformat (input, "ip6-table %d", &ip6_table_index)) + ; + else if (unformat (input, "other-table %d", &other_table_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "interface must be specified"); + + + if (ip4_table_index == ~0 && ip6_table_index == ~0 + && other_table_index == ~0) + { + vlib_cli_output (vm, "L2 classification disabled"); + vnet_l2_input_classify_enable_disable (sw_if_index, 0 /* enable */ ); + return 0; + } + + rv = vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, other_table_index); + switch (rv) + { + case 0: + vnet_l2_input_classify_enable_disable (sw_if_index, 1 /* enable */ ); + break; + + default: + return clib_error_return (0, "vnet_l2_input_classify_set_tables: %d", + rv); + break; + } + + return 0; +} + +/*? + * Configure l2 input classification. + * + * @cliexpar + * @cliexstart{set interface l2 input classify intfc <interface-name> [ip4-table <index>] [ip6-table <index>] [other-table <index>]} + * @cliexend + * @todo This is incomplete. This needs a detailed description and a + * practical example. + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = { + .path = "set interface l2 input classify", + .short_help = + "set interface l2 input classify intfc <interface-name> [ip4-table <n>]\n" + " [ip6-table <n>] [other-table <n>]", + .function = int_l2_input_classify_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_input_vtr.c b/src/vnet/l2/l2_input_vtr.c new file mode 100644 index 00000000..9470752f --- /dev/null +++ b/src/vnet/l2/l2_input_vtr.c @@ -0,0 +1,369 @@ +/* + * l2_input_vtr.c : layer 2 input vlan tag rewrite processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/l2/l2_input_vtr.h> +#include <vnet/l2/l2_output.h> + +#include <vppinfra/error.h> +#include <vppinfra/cache.h> + + +typedef struct +{ + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u8 raw[12]; /* raw data (vlans) */ + u32 sw_if_index; +} l2_invtr_trace_t; + +/* packet trace format function */ +static u8 * +format_l2_invtr_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_invtr_trace_t *t = va_arg (*args, l2_invtr_trace_t *); + + s = format (s, "l2-input-vtr: sw_if_index %d dst %U src %U data " + "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4], + t->raw[5], t->raw[6], t->raw[7], t->raw[8], t->raw[9], + t->raw[10], t->raw[11]); + return s; +} + +l2_invtr_main_t l2_invtr_main; + +static vlib_node_registration_t l2_invtr_node; + +#define foreach_l2_invtr_error \ +_(L2_INVTR, "L2 inverter packets") \ +_(DROP, "L2 input tag rewrite drops") + +typedef enum +{ +#define _(sym,str) L2_INVTR_ERROR_##sym, + foreach_l2_invtr_error +#undef _ + L2_INVTR_N_ERROR, +} l2_invtr_error_t; + +static char *l2_invtr_error_strings[] = { +#define _(sym,string) string, + foreach_l2_invtr_error +#undef _ +}; + +typedef enum +{ + L2_INVTR_NEXT_DROP, + L2_INVTR_N_NEXT, +} l2_invtr_next_t; + + +static uword +l2_invtr_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2_invtr_next_t next_index; + l2_invtr_main_t *msm = &l2_invtr_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 6 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3, *p4, *p5; + u32 sw_if_index2, sw_if_index3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + + /* + * Prefetch the input config for the N+1 loop iteration + * This depends on the buffer header above + */ + sw_if_index2 = vnet_buffer (p2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (p3)->sw_if_index[VLIB_RX]; + CLIB_PREFETCH (vec_elt_at_index + (l2output_main.configs, sw_if_index2), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (vec_elt_at_index + (l2output_main.configs, sw_if_index3), + CLIB_CACHE_LINE_BYTES, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + /* Determine the next node */ + next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index, + L2INPUT_FEAT_VTR); + next1 = vnet_l2_feature_next (b1, msm->feat_next_node_index, + L2INPUT_FEAT_VTR); + + l2_output_config_t *config0; + l2_output_config_t *config1; + config0 = vec_elt_at_index (l2output_main.configs, sw_if_index0); + config1 = vec_elt_at_index (l2output_main.configs, sw_if_index1); + + if (PREDICT_FALSE (config0->out_vtr_flag)) + { + if (config0->output_vtr.push_and_pop_bytes) + { + /* perform the tag rewrite on two packets */ + if (l2_vtr_process (b0, &config0->input_vtr)) + { + /* Drop packet */ + next0 = L2_INVTR_NEXT_DROP; + b0->error = node->errors[L2_INVTR_ERROR_DROP]; + } + } + else if (config0->output_pbb_vtr.push_and_pop_bytes) + { + if (l2_pbb_process (b0, &(config0->input_pbb_vtr))) + { + /* Drop packet */ + next0 = L2_INVTR_NEXT_DROP; + b0->error = node->errors[L2_INVTR_ERROR_DROP]; + } + } + } + if (PREDICT_FALSE (config1->out_vtr_flag)) + { + if (config1->output_vtr.push_and_pop_bytes) + { + if (l2_vtr_process (b1, &config1->input_vtr)) + { + /* Drop packet */ + next1 = L2_INVTR_NEXT_DROP; + b1->error = node->errors[L2_INVTR_ERROR_DROP]; + } + } + else if (config1->output_pbb_vtr.push_and_pop_bytes) + { + if (l2_pbb_process (b1, &(config1->input_pbb_vtr))) + { + /* Drop packet */ + next1 = L2_INVTR_NEXT_DROP; + b1->error = node->errors[L2_INVTR_ERROR_DROP]; + } + } + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_invtr_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + ethernet_header_t *h0 = vlib_buffer_get_current (b0); + t->sw_if_index = sw_if_index0; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + clib_memcpy (t->raw, &h0->type, sizeof (t->raw)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2_invtr_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + ethernet_header_t *h1 = vlib_buffer_get_current (b1); + t->sw_if_index = sw_if_index0; + clib_memcpy (t->src, h1->src_address, 6); + clib_memcpy (t->dst, h1->dst_address, 6); + clib_memcpy (t->raw, &h1->type, sizeof (t->raw)); + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + /* Determine the next node */ + next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index, + L2INPUT_FEAT_VTR); + + l2_output_config_t *config0; + config0 = vec_elt_at_index (l2output_main.configs, sw_if_index0); + + if (PREDICT_FALSE (config0->out_vtr_flag)) + { + if (config0->output_vtr.push_and_pop_bytes) + { + /* perform the tag rewrite on one packet */ + if (l2_vtr_process (b0, &config0->input_vtr)) + { + /* Drop packet */ + next0 = L2_INVTR_NEXT_DROP; + b0->error = node->errors[L2_INVTR_ERROR_DROP]; + } + } + else if (config0->output_pbb_vtr.push_and_pop_bytes) + { + if (l2_pbb_process (b0, &(config0->input_pbb_vtr))) + { + /* Drop packet */ + next0 = L2_INVTR_NEXT_DROP; + b0->error = node->errors[L2_INVTR_ERROR_DROP]; + } + } + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_invtr_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + ethernet_header_t *h0 = vlib_buffer_get_current (b0); + t->sw_if_index = sw_if_index0; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + clib_memcpy (t->raw, &h0->type, sizeof (t->raw)); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_invtr_node,static) = { + .function = l2_invtr_node_fn, + .name = "l2-input-vtr", + .vector_size = sizeof (u32), + .format_trace = format_l2_invtr_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_invtr_error_strings), + .error_strings = l2_invtr_error_strings, + + .n_next_nodes = L2_INVTR_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_INVTR_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2_invtr_node, l2_invtr_node_fn) + clib_error_t *l2_invtr_init (vlib_main_t * vm) +{ + l2_invtr_main_t *mp = &l2_invtr_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2_invtr_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + mp->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_invtr_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_input_vtr.h b/src/vnet/l2/l2_input_vtr.h new file mode 100644 index 00000000..f248669e --- /dev/null +++ b/src/vnet/l2/l2_input_vtr.h @@ -0,0 +1,54 @@ +/* + * l2_input_vtr.h : layer 2 input vlan tag rewrite processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_input_vtr_h +#define included_vnet_l2_input_vtr_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> + + +typedef struct +{ + + /* + * The input vtr data is located in l2_output_config_t because + * the same config data is used for the egress EFP Filter check. + */ + + /* Next nodes for each feature */ + u32 feat_next_node_index[32]; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2_invtr_main_t; + +extern l2_invtr_main_t l2_invtr_main; + +#endif /* included_vnet_l2_input_vtr_h */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c new file mode 100644 index 00000000..fddab824 --- /dev/null +++ b/src/vnet/l2/l2_learn.c @@ -0,0 +1,596 @@ +/* + * l2_learn.c : layer 2 learning using l2fib + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_fib.h> +#include <vnet/l2/l2_learn.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> + +/** + * @file + * @brief Ethernet Bridge Learning. + * + * Populate the mac table with entries mapping the packet's source mac + bridge + * domain ID to the input sw_if_index. + * + * Note that learning and forwarding are separate graph nodes. This means that + * for a set of packets, all learning is performed first, then all nodes are + * forwarded. The forwarding is done based on the end-state of the mac table, + * instead of the state after each packet. Thus the forwarding results could + * differ in certain cases (mac move tests), but this not expected to cause + * problems in real-world networks. It is much simpler to separate learning + * and forwarding into separate nodes. + */ + + +typedef struct +{ + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u16 bd_index; +} l2learn_trace_t; + + +/* packet trace format function */ +static u8 * +format_l2learn_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2learn_trace_t *t = va_arg (*args, l2learn_trace_t *); + + s = format (s, "l2-learn: sw_if_index %d dst %U src %U bd_index %d", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, t->bd_index); + return s; +} + +static vlib_node_registration_t l2learn_node; + +#define foreach_l2learn_error \ +_(L2LEARN, "L2 learn packets") \ +_(MISS, "L2 learn misses") \ +_(MAC_MOVE, "L2 mac moves") \ +_(MAC_MOVE_VIOLATE, "L2 mac move violations") \ +_(LIMIT, "L2 not learned due to limit") \ +_(HIT_UPDATE, "L2 learn hit updates") \ +_(FILTER_DROP, "L2 filter mac drops") + +typedef enum +{ +#define _(sym,str) L2LEARN_ERROR_##sym, + foreach_l2learn_error +#undef _ + L2LEARN_N_ERROR, +} l2learn_error_t; + +static char *l2learn_error_strings[] = { +#define _(sym,string) string, + foreach_l2learn_error +#undef _ +}; + +typedef enum +{ + L2LEARN_NEXT_L2FWD, + L2LEARN_NEXT_DROP, + L2LEARN_N_NEXT, +} l2learn_next_t; + + +/** Perform learning on one packet based on the mac table lookup result. */ + +static_always_inline void +l2learn_process (vlib_node_runtime_t * node, + l2learn_main_t * msm, + u64 * counter_base, + vlib_buffer_t * b0, + u32 sw_if_index0, + l2fib_entry_key_t * key0, + l2fib_entry_key_t * cached_key, + u32 * count, + l2fib_entry_result_t * result0, u32 * next0, u8 timestamp) +{ + /* Set up the default next node (typically L2FWD) */ + *next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index, + L2INPUT_FEAT_LEARN); + + /* Check mac table lookup result */ + if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0)) + { + /* Entry in L2FIB with matching sw_if_index matched - normal fast path */ + u32 dtime = timestamp - result0->fields.timestamp; + u32 dsn = result0->fields.sn.as_u16 - vnet_buffer (b0)->l2.l2fib_sn; + u32 check = (dtime && vnet_buffer (b0)->l2.bd_age) || dsn; + + if (PREDICT_TRUE (check == 0)) + return; /* MAC entry up to date */ + if (result0->fields.age_not) + return; /* Static MAC always age_not */ + if (msm->global_learn_count > msm->global_learn_limit) + return; /* Above learn limit - do not update */ + + /* Limit updates per l2-learn node call to avoid prolonged update burst + * as dtime advance over 1 minute mark, unless more than 1 min behind + * or SN obsolete */ + if ((*count > 2) && (dtime == 1) && (dsn == 0)) + return; + + counter_base[L2LEARN_ERROR_HIT_UPDATE] += 1; + *count += 1; + } + else if (result0->raw == ~0) + { + /* Entry not in L2FIB - add it */ + counter_base[L2LEARN_ERROR_MISS] += 1; + + if (msm->global_learn_count >= msm->global_learn_limit) + { + /* + * Global limit reached. Do not learn the mac but forward the packet. + * In the future, limits could also be per-interface or bridge-domain. + */ + counter_base[L2LEARN_ERROR_LIMIT] += 1; + return; + } + + /* Do not learn if mac is 0 */ + l2fib_entry_key_t key = *key0; + key.fields.bd_index = 0; + if (key.raw == 0) + return; + + /* It is ok to learn */ + msm->global_learn_count++; + result0->raw = 0; /* clear all fields */ + result0->fields.sw_if_index = sw_if_index0; + result0->fields.lrn_evt = (msm->client_pid != 0); + } + else + { + /* Entry in L2FIB with different sw_if_index - mac move or filter */ + if (result0->fields.filter) + { + ASSERT (result0->fields.sw_if_index == ~0); + /* drop packet because lookup matched a filter mac entry */ + b0->error = node->errors[L2LEARN_ERROR_FILTER_DROP]; + *next0 = L2LEARN_NEXT_DROP; + return; + } + + if (result0->fields.static_mac) + { + /* + * Don't overwrite a static mac + * TODO: Check violation policy. For now drop the packet + */ + b0->error = node->errors[L2LEARN_ERROR_MAC_MOVE_VIOLATE]; + *next0 = L2LEARN_NEXT_DROP; + return; + } + + /* + * TODO: may want to rate limit mac moves + * TODO: check global/bridge domain/interface learn limits + */ + result0->fields.sw_if_index = sw_if_index0; + if (result0->fields.age_not) /* The mac was provisioned */ + { + msm->global_learn_count++; + result0->fields.age_not = 0; + } + result0->fields.lrn_evt = (msm->client_pid != 0); + counter_base[L2LEARN_ERROR_MAC_MOVE] += 1; + } + + /* Update the entry */ + result0->fields.timestamp = timestamp; + result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; + + BVT (clib_bihash_kv) kv; + kv.key = key0->raw; + kv.value = result0->raw; + BV (clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */ ); + + /* Invalidate the cache */ + cached_key->raw = ~0; +} + + +static_always_inline uword +l2learn_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, int do_trace) +{ + u32 n_left_from, *from, *to_next; + l2learn_next_t next_index; + l2learn_main_t *msm = &l2learn_main; + vlib_node_t *n = vlib_get_node (vm, l2learn_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + l2fib_entry_key_t cached_key; + l2fib_entry_result_t cached_result; + u8 timestamp = (u8) (vlib_time_now (vm) / 60); + u32 count = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + /* Clear the one-entry cache in case mac table was updated */ + cached_key.raw = ~0; + cached_result.raw = ~0; /* warning be gone */ + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; + ethernet_header_t *h0, *h1, *h2, *h3; + l2fib_entry_key_t key0, key1, key2, key3; + l2fib_entry_result_t result0, result1, result2, result3; + u32 bucket0, bucket1, bucket2, bucket3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7;; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + /* RX interface handles */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX]; + + /* Process 4 x pkts */ + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + h2 = vlib_buffer_get_current (b2); + h3 = vlib_buffer_get_current (b3); + + if (do_trace) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer (b0)->l2.bd_index; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->bd_index = vnet_buffer (b1)->l2.bd_index; + clib_memcpy (t->src, h1->src_address, 6); + clib_memcpy (t->dst, h1->dst_address, 6); + } + if (b2->flags & VLIB_BUFFER_IS_TRACED) + { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b2, sizeof (*t)); + t->sw_if_index = sw_if_index2; + t->bd_index = vnet_buffer (b2)->l2.bd_index; + clib_memcpy (t->src, h2->src_address, 6); + clib_memcpy (t->dst, h2->dst_address, 6); + } + if (b3->flags & VLIB_BUFFER_IS_TRACED) + { + l2learn_trace_t *t = + vlib_add_trace (vm, node, b3, sizeof (*t)); + t->sw_if_index = sw_if_index3; + t->bd_index = vnet_buffer (b3)->l2.bd_index; + clib_memcpy (t->src, h3->src_address, 6); + clib_memcpy (t->dst, h3->dst_address, 6); + } + } + + /* process 4 pkts */ + vlib_node_increment_counter (vm, l2learn_node.index, + L2LEARN_ERROR_L2LEARN, 4); + + l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result, + h0->src_address, + h1->src_address, + h2->src_address, + h3->src_address, + vnet_buffer (b0)->l2.bd_index, + vnet_buffer (b1)->l2.bd_index, + vnet_buffer (b2)->l2.bd_index, + vnet_buffer (b3)->l2.bd_index, + &key0, &key1, &key2, &key3, + &bucket0, &bucket1, &bucket2, &bucket3, + &result0, &result1, &result2, &result3); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &cached_key, + &count, &result0, &next0, timestamp); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b1, sw_if_index1, &key1, &cached_key, + &count, &result1, &next1, timestamp); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b2, sw_if_index2, &key2, &cached_key, + &count, &result2, &next2, timestamp); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b3, sw_if_index3, &key3, &cached_key, + &count, &result3, &next3, timestamp); + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t *h0; + l2fib_entry_key_t key0; + l2fib_entry_result_t result0; + u32 bucket0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + h0 = vlib_buffer_get_current (b0); + + if (do_trace && PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + l2learn_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->bd_index = vnet_buffer (b0)->l2.bd_index; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + + /* process 1 pkt */ + vlib_node_increment_counter (vm, l2learn_node.index, + L2LEARN_ERROR_L2LEARN, 1); + + + l2fib_lookup_1 (msm->mac_table, &cached_key, &cached_result, + h0->src_address, vnet_buffer (b0)->l2.bd_index, + &key0, &bucket0, &result0); + + l2learn_process (node, msm, &em->counters[node_counter_base_index], + b0, sw_if_index0, &key0, &cached_key, + &count, &result0, &next0, timestamp); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +l2learn_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + return l2learn_node_inline (vm, node, frame, 1 /* do_trace */ ); + return l2learn_node_inline (vm, node, frame, 0 /* do_trace */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2learn_node,static) = { + .function = l2learn_node_fn, + .name = "l2-learn", + .vector_size = sizeof (u32), + .format_trace = format_l2learn_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2learn_error_strings), + .error_strings = l2learn_error_strings, + + .n_next_nodes = L2LEARN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2LEARN_NEXT_DROP] = "error-drop", + [L2LEARN_NEXT_L2FWD] = "l2-fwd", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2learn_node, l2learn_node_fn) + clib_error_t *l2learn_init (vlib_main_t * vm) +{ + l2learn_main_t *mp = &l2learn_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2learn_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + mp->feat_next_node_index); + + /* init the hash table ptr */ + mp->mac_table = get_mac_table (); + + /* + * Set the default number of dynamically learned macs to the number + * of buckets. + */ + mp->global_learn_limit = L2LEARN_DEFAULT_LIMIT; + + return 0; +} + +VLIB_INIT_FUNCTION (l2learn_init); + + +/** + * Set subinterface learn enable/disable. + * The CLI format is: + * set interface l2 learn <interface> [disable] + */ +static clib_error_t * +int_learn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + u32 enable; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) + { + enable = 0; + } + + /* set the interface flag */ + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_LEARN, enable); + +done: + return error; +} + +/*? + * Layer 2 learning can be enabled and disabled on each + * interface and on each bridge-domain. Use this command to + * manage interfaces. It is enabled by default. + * + * @cliexpar + * Example of how to enable learning: + * @cliexcmd{set interface l2 learn GigabitEthernet0/8/0} + * Example of how to disable learning: + * @cliexcmd{set interface l2 learn GigabitEthernet0/8/0 disable} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_learn_cli, static) = { + .path = "set interface l2 learn", + .short_help = "set interface l2 learn <interface> [disable]", + .function = int_learn, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +l2learn_config (vlib_main_t * vm, unformat_input_t * input) +{ + l2learn_main_t *mp = &l2learn_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "limit %d", &mp->global_learn_limit)) + ; + + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + return 0; +} + +VLIB_CONFIG_FUNCTION (l2learn_config, "l2learn"); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_learn.h b/src/vnet/l2/l2_learn.h new file mode 100644 index 00000000..3aaf48e2 --- /dev/null +++ b/src/vnet/l2/l2_learn.h @@ -0,0 +1,70 @@ +/* + * l2_learn.c : layer 2 learning using l2fib + * + * Copyright (c) 2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_l2learn_h +#define included_l2learn_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + + +typedef struct +{ + + /* Hash table */ + BVT (clib_bihash) * mac_table; + + /* number of dynamically learned mac entries */ + u32 global_learn_count; + + /* maximum number of dynamically learned mac entries */ + u32 global_learn_limit; + + /* client waiting for L2 MAC events for learned and aged MACs */ + u32 client_pid; + u32 client_index; + + /* Next nodes for each feature */ + u32 feat_next_node_index[32]; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2learn_main_t; + +#define L2LEARN_DEFAULT_LIMIT (L2FIB_NUM_BUCKETS * 64) + +l2learn_main_t l2learn_main; + +extern vlib_node_registration_t l2fib_mac_age_scanner_process_node; + +enum +{ + L2_MAC_AGE_PROCESS_EVENT_START = 1, + L2_MAC_AGE_PROCESS_EVENT_STOP = 2, + L2_MAC_AGE_PROCESS_EVENT_ONE_PASS = 3, +} l2_mac_age_process_event_t; + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c new file mode 100644 index 00000000..500fc5d0 --- /dev/null +++ b/src/vnet/l2/l2_output.c @@ -0,0 +1,710 @@ +/* + * l2_output.c : layer 2 output packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/cli.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_output.h> + + +/* Feature graph node names */ +static char *l2output_feat_names[] = { +#define _(sym,name) name, + foreach_l2output_feat +#undef _ +}; + +char ** +l2output_get_feat_names (void) +{ + return l2output_feat_names; +} + +u8 * +format_l2_output_features (u8 * s, va_list * args) +{ + static char *display_names[] = { +#define _(sym,name) #sym, + foreach_l2output_feat +#undef _ + }; + u32 feature_bitmap = va_arg (*args, u32); + + if (feature_bitmap == 0) + { + s = format (s, " none configured"); + return s; + } + + int i; + for (i = L2OUTPUT_N_FEAT - 1; i >= 0; i--) + if (feature_bitmap & (1 << i)) + s = format (s, "%10s (%s)\n", display_names[i], l2output_feat_names[i]); + return s; +} + +l2output_main_t l2output_main; + +typedef struct +{ + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u8 raw[12]; /* raw data */ +} l2output_trace_t; + +/* packet trace format function */ +static u8 * +format_l2output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2output_trace_t *t = va_arg (*args, l2output_trace_t *); + + s = format (s, "l2-output: sw_if_index %d dst %U src %U data " + "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src, + t->raw[0], t->raw[1], t->raw[2], t->raw[3], t->raw[4], + t->raw[5], t->raw[6], t->raw[7], t->raw[8], t->raw[9], + t->raw[10], t->raw[11]); + + return s; +} + + +static char *l2output_error_strings[] = { +#define _(sym,string) string, + foreach_l2output_error +#undef _ +}; + +/** + * Check for split horizon violations. + * Return 0 if split horizon check passes, otherwise return non-zero. + * Packets should not be transmitted out an interface with the same + * split-horizon group as the input interface, except if the @c shg is 0 + * in which case the check always passes. + */ +static_always_inline u32 +split_horizon_violation (u8 shg1, u8 shg2) +{ + if (PREDICT_TRUE (shg1 == 0)) + { + return 0; + } + else + { + return shg1 == shg2; + } +} + +/** Determine the next L2 node based on the output feature bitmap */ +static_always_inline void +l2_output_dispatch (vlib_buffer_t * b0, vlib_node_runtime_t * node, + u32 * cached_sw_if_index, u32 * cached_next_index, + u32 sw_if_index, u32 feature_bitmap, u32 * next0) +{ + /* + * The output feature bitmap always have at least the L2 output bit set + * for a normal L2 interface (or 0 if the interface is changed from L2 + * to L3 mode). So if the feature bitmap is 0 or just have L2 output bits set, + * we know there is no more feature and will just output packets on interface. + * Otherwise, get the index of the next feature node. + */ + if (PREDICT_FALSE ((feature_bitmap & ~L2OUTPUT_FEAT_OUTPUT) != 0)) + { + /* Save bitmap for the next feature graph nodes */ + vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap; + + /* Determine the next node */ + *next0 = + feat_bitmap_get_next_node_index (l2output_main.l2_out_feat_next, + feature_bitmap); + } + else + { + /* + * There are no features. Send packet to TX node for sw_if_index0 + * This is a little tricky in that the output interface next node indexes + * are not precomputed at init time. + */ + + if (sw_if_index == *cached_sw_if_index) + { + /* We hit in the one-entry cache. Use it. */ + *next0 = *cached_next_index; + } + else + { + /* Look up the output TX node for the sw_if_index */ + *next0 = vec_elt (l2output_main.output_node_index_vec, sw_if_index); + + if (PREDICT_FALSE (*next0 == L2OUTPUT_NEXT_DROP)) + b0->error = node->errors[L2OUTPUT_ERROR_MAPPING_DROP]; + + /* Update the one-entry cache */ + *cached_sw_if_index = sw_if_index; + *cached_next_index = *next0; + } + } +} + +static_always_inline void +l2output_vtr (vlib_node_runtime_t * node, l2_output_config_t * config, + u32 feature_bitmap, vlib_buffer_t * b, u32 * next) +{ + if (PREDICT_FALSE (config->out_vtr_flag)) + { + /* Perform pre-vtr EFP filter check if configured */ + if (config->output_vtr.push_and_pop_bytes) + { + /* + * Perform output vlan tag rewrite and the pre-vtr EFP filter check. + * The EFP Filter only needs to be run if there is an output VTR + * configured. The flag for the post-vtr EFP Filter node is used + * to trigger the pre-vtr check as well. + */ + u32 failed1 = (feature_bitmap & L2OUTPUT_FEAT_EFP_FILTER) + && (l2_efp_filter_process (b, &(config->input_vtr))); + u32 failed2 = l2_vtr_process (b, &(config->output_vtr)); + + if (PREDICT_FALSE (failed1 | failed2)) + { + *next = L2OUTPUT_NEXT_DROP; + if (failed2) + { + b->error = node->errors[L2OUTPUT_ERROR_VTR_DROP]; + } + if (failed1) + { + b->error = node->errors[L2OUTPUT_ERROR_EFP_DROP]; + } + } + } + // perform the PBB rewrite + else if (config->output_pbb_vtr.push_and_pop_bytes) + { + u32 failed = l2_pbb_process (b, &(config->output_pbb_vtr)); + if (PREDICT_FALSE (failed)) + { + *next = L2OUTPUT_NEXT_DROP; + b->error = node->errors[L2OUTPUT_ERROR_VTR_DROP]; + } + } + } +} + + +static_always_inline uword +l2output_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, int do_trace) +{ + u32 n_left_from, *from, *to_next; + l2output_next_t next_index; + l2output_main_t *msm = &l2output_main; + u32 cached_sw_if_index; + u32 cached_next_index; + + /* Invalidate cache */ + cached_sw_if_index = ~0; + cached_next_index = ~0; /* warning be gone */ + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; + ethernet_header_t *h0, *h1, *h2, *h3; + l2_output_config_t *config0, *config1, *config2, *config3; + u32 feature_bitmap0, feature_bitmap1; + u32 feature_bitmap2, feature_bitmap3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + /* TX interface handles */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX]; + sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX]; + sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX]; + + vlib_node_increment_counter (vm, l2output_node.index, + L2OUTPUT_ERROR_L2OUTPUT, 4); + + /* Get config for the output interface */ + config0 = vec_elt_at_index (msm->configs, sw_if_index0); + config1 = vec_elt_at_index (msm->configs, sw_if_index1); + config2 = vec_elt_at_index (msm->configs, sw_if_index2); + config3 = vec_elt_at_index (msm->configs, sw_if_index3); + + /* + * Get features from the config + * TODO: mask out any non-applicable features + */ + feature_bitmap0 = config0->feature_bitmap; + feature_bitmap1 = config1->feature_bitmap; + feature_bitmap2 = config2->feature_bitmap; + feature_bitmap3 = config3->feature_bitmap; + + /* Determine next node */ + l2_output_dispatch (b0, node, &cached_sw_if_index, + &cached_next_index, sw_if_index0, + feature_bitmap0, &next0); + l2_output_dispatch (b1, node, &cached_sw_if_index, + &cached_next_index, sw_if_index1, + feature_bitmap1, &next1); + l2_output_dispatch (b2, node, &cached_sw_if_index, + &cached_next_index, sw_if_index2, + feature_bitmap2, &next2); + l2_output_dispatch (b3, node, &cached_sw_if_index, + &cached_next_index, sw_if_index3, + feature_bitmap3, &next3); + + l2output_vtr (node, config0, feature_bitmap0, b0, &next0); + l2output_vtr (node, config1, feature_bitmap1, b1, &next1); + l2output_vtr (node, config2, feature_bitmap2, b2, &next2); + l2output_vtr (node, config3, feature_bitmap3, b3, &next3); + + if (do_trace) + { + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + h2 = vlib_buffer_get_current (b2); + h3 = vlib_buffer_get_current (b3); + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2output_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + clib_memcpy (t->raw, &h0->type, sizeof (t->raw)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2output_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + clib_memcpy (t->src, h1->src_address, 6); + clib_memcpy (t->dst, h1->dst_address, 6); + clib_memcpy (t->raw, &h1->type, sizeof (t->raw)); + } + if (b2->flags & VLIB_BUFFER_IS_TRACED) + { + l2output_trace_t *t = + vlib_add_trace (vm, node, b2, sizeof (*t)); + t->sw_if_index = sw_if_index2; + clib_memcpy (t->src, h2->src_address, 6); + clib_memcpy (t->dst, h2->dst_address, 6); + clib_memcpy (t->raw, &h2->type, sizeof (t->raw)); + } + if (b3->flags & VLIB_BUFFER_IS_TRACED) + { + l2output_trace_t *t = + vlib_add_trace (vm, node, b3, sizeof (*t)); + t->sw_if_index = sw_if_index3; + clib_memcpy (t->src, h3->src_address, 6); + clib_memcpy (t->dst, h3->dst_address, 6); + clib_memcpy (t->raw, &h3->type, sizeof (t->raw)); + } + } + + /* + * Perform the split horizon check + * The check can only fail for non-zero shg's + */ + if (PREDICT_FALSE (config0->shg + config1->shg + + config2->shg + config3->shg)) + { + /* one of the checks might fail, check both */ + if (split_horizon_violation + (config0->shg, vnet_buffer (b0)->l2.shg)) + { + next0 = L2OUTPUT_NEXT_DROP; + b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + if (split_horizon_violation + (config1->shg, vnet_buffer (b1)->l2.shg)) + { + next1 = L2OUTPUT_NEXT_DROP; + b1->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + if (split_horizon_violation + (config2->shg, vnet_buffer (b2)->l2.shg)) + { + next2 = L2OUTPUT_NEXT_DROP; + b2->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + if (split_horizon_violation + (config3->shg, vnet_buffer (b3)->l2.shg)) + { + next3 = L2OUTPUT_NEXT_DROP; + b3->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t *h0; + l2_output_config_t *config0; + u32 feature_bitmap0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + + vlib_node_increment_counter (vm, l2output_node.index, + L2OUTPUT_ERROR_L2OUTPUT, 1); + + /* Get config for the output interface */ + config0 = vec_elt_at_index (msm->configs, sw_if_index0); + + /* + * Get features from the config + * TODO: mask out any non-applicable features + */ + feature_bitmap0 = config0->feature_bitmap; + + /* Determine next node */ + l2_output_dispatch (b0, node, &cached_sw_if_index, + &cached_next_index, sw_if_index0, + feature_bitmap0, &next0); + + l2output_vtr (node, config0, feature_bitmap0, b0, &next0); + + if (do_trace && PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + l2output_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + h0 = vlib_buffer_get_current (b0); + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + clib_memcpy (t->raw, &h0->type, sizeof (t->raw)); + } + + /* Perform the split horizon check */ + if (PREDICT_FALSE + (split_horizon_violation + (config0->shg, vnet_buffer (b0)->l2.shg))) + { + next0 = L2OUTPUT_NEXT_DROP; + b0->error = node->errors[L2OUTPUT_ERROR_SHG_DROP]; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +l2output_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + return l2output_node_inline (vm, node, frame, 1 /* do_trace */ ); + return l2output_node_inline (vm, node, frame, 0 /* do_trace */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2output_node) = { + .function = l2output_node_fn, + .name = "l2-output", + .vector_size = sizeof (u32), + .format_trace = format_l2output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2output_error_strings), + .error_strings = l2output_error_strings, + + .n_next_nodes = L2OUTPUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2OUTPUT_NEXT_DROP] = "error-drop", + [L2OUTPUT_NEXT_BAD_INTF] = "l2-output-bad-intf", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (l2output_node, l2output_node_fn); +/* *INDENT-ON* */ + + +#define foreach_l2output_bad_intf_error \ +_(DROP, "L2 output to interface not in L2 mode or deleted") + +static char *l2output_bad_intf_error_strings[] = { +#define _(sym,string) string, + foreach_l2output_bad_intf_error +#undef _ +}; + +typedef enum +{ +#define _(sym,str) L2OUTPUT_BAD_INTF_ERROR_##sym, + foreach_l2output_bad_intf_error +#undef _ + L2OUTPUT_BAD_INTF_N_ERROR, +} l2output_bad_intf_error_t; + + +/** + * Output node for interfaces/tunnels which was in L2 mode but were changed + * to L3 mode or possibly deleted thereafter. On changing forwarding mode + * of any tunnel/interface from L2 to L3, its entry in l2_output_main table + * next_nodes.output_node_index_vec[sw_if_index] MUST be set to the value of + * L2OUTPUT_NEXT_BAD_INTF. Thus, if there are stale entries in the L2FIB for + * this sw_if_index, l2-output will send packets for this sw_if_index to the + * l2-output-bad-intf node which just setup the proper drop reason before + * sending packets to the error-drop node to drop the packet. Then, stale L2FIB + * entries for delted tunnels won't cause possible packet or memory corrpution. + */ +static vlib_node_registration_t l2output_bad_intf_node; + +static uword +l2output_bad_intf_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2output_next_t next_index = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b0->error = node->errors[L2OUTPUT_BAD_INTF_ERROR_DROP]; + b1->error = node->errors[L2OUTPUT_BAD_INTF_ERROR_DROP]; + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + b0->error = node->errors[L2OUTPUT_BAD_INTF_ERROR_DROP]; + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2output_bad_intf_node,static) = { + .function = l2output_bad_intf_node_fn, + .name = "l2-output-bad-intf", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2output_bad_intf_error_strings), + .error_strings = l2output_bad_intf_error_strings, + + .n_next_nodes = 1, + + /* edit / add dispositions here */ + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (l2output_bad_intf_node, l2output_bad_intf_node_fn); +/* *INDENT-ON* */ + +static clib_error_t * +l2output_init (vlib_main_t * vm) +{ + l2output_main_t *mp = &l2output_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Create the config vector */ + vec_validate (mp->configs, 100); + /* Until we hook up the CLI config, just create 100 sw interface entries and zero them */ + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2output_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + mp->l2_out_feat_next); + + /* Initialize the output node mapping table */ + vec_validate_init_empty (mp->output_node_index_vec, 100, + L2OUTPUT_NEXT_DROP); + + return 0; +} + +VLIB_INIT_FUNCTION (l2output_init); + + +/** Create a mapping in the next node mapping table for the given sw_if_index. */ +void +l2output_create_output_node_mapping (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, u32 sw_if_index) +{ + vnet_hw_interface_t *hw0 = + vnet_get_sup_hw_interface (vnet_main, sw_if_index); + + /* dynamically create graph node arc */ + u32 next = vlib_node_add_next (vlib_main, l2output_node.index, + hw0->output_node_index); + l2output_main.output_node_index_vec[sw_if_index] = next; +} + +/* Get a pointer to the config for the given interface */ +l2_output_config_t * +l2output_intf_config (u32 sw_if_index) +{ + l2output_main_t *mp = &l2output_main; + + vec_validate (mp->configs, sw_if_index); + return vec_elt_at_index (mp->configs, sw_if_index); +} + +/** Enable (or disable) the feature in the bitmap for the given interface. */ +void +l2output_intf_bitmap_enable (u32 sw_if_index, u32 feature_bitmap, u32 enable) +{ + l2output_main_t *mp = &l2output_main; + l2_output_config_t *config; + + vec_validate (mp->configs, sw_if_index); + config = vec_elt_at_index (mp->configs, sw_if_index); + + if (enable) + { + config->feature_bitmap |= feature_bitmap; + } + else + { + config->feature_bitmap &= ~feature_bitmap; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_output.h b/src/vnet/l2/l2_output.h new file mode 100644 index 00000000..1a73fdf9 --- /dev/null +++ b/src/vnet/l2/l2_output.h @@ -0,0 +1,174 @@ +/* + * l2_output.h : layer 2 output packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_output_h +#define included_vnet_l2_output_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> + + +/* The L2 output feature configuration, a per-interface struct */ +typedef struct +{ + + u32 feature_bitmap; + + /* + * vlan tag rewrite for ingress and egress + * ingress vtr is located here because the same config data is used for + * the egress EFP filter check + */ + vtr_config_t input_vtr; + vtr_config_t output_vtr; + ptr_config_t input_pbb_vtr; + ptr_config_t output_pbb_vtr; + + /* some of these flags may get integrated into the feature bitmap */ + u8 fwd_enable; + u8 flood_enable; + + /* split horizon group */ + u8 shg; + + /* flag for output vtr operation */ + u8 out_vtr_flag; + +} l2_output_config_t; + +typedef struct +{ + /* + * vector of output next node index, indexed by sw_if_index. + * used when all output features have been executed and the + * next nodes are the interface output nodes. + */ + u32 *output_node_index_vec; + + /* + * array of next node index for each output feature, indexed + * by l2output_feat_t. Used to determine next feature node. + */ + u32 l2_out_feat_next[32]; + + /* config vector indexed by sw_if_index */ + l2_output_config_t *configs; + + /* Convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2output_main_t; + +l2output_main_t l2output_main; + +extern vlib_node_registration_t l2output_node; + +/* L2 output features */ + +/* Mappings from feature ID to graph node name in reverse order */ +#define foreach_l2output_feat \ + _(OUTPUT, "interface-output") \ + _(SPAN, "span-l2-output") \ + _(CFM, "feature-bitmap-drop") \ + _(QOS, "feature-bitmap-drop") \ + _(ACL, "l2-output-acl") \ + _(L2PT, "feature-bitmap-drop") \ + _(EFP_FILTER, "l2-efp-filter") \ + _(IPIW, "feature-bitmap-drop") \ + _(STP_BLOCKED, "feature-bitmap-drop") \ + _(LINESTATUS_DOWN, "feature-bitmap-drop") \ + _(OUTPUT_CLASSIFY, "l2-output-classify") \ + _(XCRW, "l2-xcrw") + +/* Feature bitmap positions */ +typedef enum +{ +#define _(sym,str) L2OUTPUT_FEAT_##sym##_BIT, + foreach_l2output_feat +#undef _ + L2OUTPUT_N_FEAT, +} l2output_feat_t; + +STATIC_ASSERT (L2OUTPUT_N_FEAT <= 32, "too many l2 output features"); + +/* Feature bit masks */ +typedef enum +{ +#define _(sym,str) L2OUTPUT_FEAT_##sym = (1<<L2OUTPUT_FEAT_##sym##_BIT), + foreach_l2output_feat +#undef _ +} l2output_feat_masks_t; + +#define foreach_l2output_error \ +_(L2OUTPUT, "L2 output packets") \ +_(EFP_DROP, "L2 EFP filter pre-rewrite drops") \ +_(VTR_DROP, "L2 output tag rewrite drops") \ +_(SHG_DROP, "L2 split horizon drops") \ +_(DROP, "L2 output drops") \ +_(MAPPING_DROP, "L2 Output interface not valid") + +typedef enum +{ + L2OUTPUT_NEXT_DROP, + L2OUTPUT_NEXT_BAD_INTF, + L2OUTPUT_N_NEXT, +} l2output_next_t; + +typedef enum +{ +#define _(sym,str) L2OUTPUT_ERROR_##sym, + foreach_l2output_error +#undef _ + L2OUTPUT_N_ERROR, +} l2output_error_t; + +/* Return an array of strings containing graph node names of each feature */ +char **l2output_get_feat_names (void); + +/* arg0 - u32 feature_bitmap */ +u8 *format_l2_output_features (u8 * s, va_list * args); + +/** + * The next set of functions is for use by output feature graph nodes. + * When the last bit has been cleared from the output feature bitmap, + * the next node is the output graph node for the TX sw_if_index. + * These functions help the feature nodes get that node index. + */ + +/* Create a mapping to the output graph node for the given sw_if_index */ +void l2output_create_output_node_mapping (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index); + +/** Get a pointer to the config for the given interface */ +l2_output_config_t *l2output_intf_config (u32 sw_if_index); + +/** Enable (or disable) the feature in the bitmap for the given interface */ +void l2output_intf_bitmap_enable (u32 sw_if_index, + u32 feature_bitmap, u32 enable); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_output_acl.c b/src/vnet/l2/l2_output_acl.c new file mode 100644 index 00000000..7d051326 --- /dev/null +++ b/src/vnet/l2/l2_output_acl.c @@ -0,0 +1,341 @@ +/* + * l2_output_acl.c : layer 2 output acl processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_output.h> + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + + +typedef struct +{ + /* Next nodes for L2 output features */ + u32 l2_out_feat_next[32]; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2_outacl_main_t; + + + +typedef struct +{ + /* per-pkt trace data */ + u8 src[6]; + u8 dst[6]; + u32 next_index; + u32 sw_if_index; +} l2_outacl_trace_t; + +/* packet trace format function */ +static u8 * +format_l2_outacl_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_outacl_trace_t *t = va_arg (*args, l2_outacl_trace_t *); + + s = format (s, "l2-output-acl: sw_if_index %d dst %U src %U", + t->sw_if_index, + format_ethernet_address, t->dst, + format_ethernet_address, t->src); + return s; +} + +l2_outacl_main_t l2_outacl_main; + +static vlib_node_registration_t l2_outacl_node; + +#define foreach_l2_outacl_error \ +_(L2_OUTACL, "L2 output ACL packets") \ +_(DROP, "L2 output drops") + +typedef enum +{ +#define _(sym,str) L2_OUTACL_ERROR_##sym, + foreach_l2_outacl_error +#undef _ + L2_OUTACL_N_ERROR, +} l2_outacl_error_t; + +static char *l2_outacl_error_strings[] = { +#define _(sym,string) string, + foreach_l2_outacl_error +#undef _ +}; + +typedef enum +{ + L2_OUTACL_NEXT_DROP, + L2_OUTACL_N_NEXT, +} l2_outacl_next_t; + + + +static uword +l2_outacl_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2_outacl_next_t next_index; + l2_outacl_main_t *msm = &l2_outacl_main; + vlib_node_t *n = vlib_get_node (vm, l2_outacl_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t *h0, *h1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + /* bi is "buffer index", b is pointer to the buffer */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* TX interface handles */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_outacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2_outacl_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + clib_memcpy (t->src, h1->src_address, 6); + clib_memcpy (t->dst, h1->dst_address, 6); + } + } + + em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] += + 2; + + /* add core loop code here */ + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + ethernet_header_t *h0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_outacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + } + + em->counters[node_counter_base_index + L2_OUTACL_ERROR_L2_OUTACL] += + 1; + + /* + * L2_OUTACL code + * Dummy for now, just go to next feature node + */ + + /* Determine next node */ + next0 = vnet_l2_feature_next (b0, msm->l2_out_feat_next, + L2OUTPUT_FEAT_ACL); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_outacl_node,static) = { + .function = l2_outacl_node_fn, + .name = "l2-output-acl", + .vector_size = sizeof (u32), + .format_trace = format_l2_outacl_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_outacl_error_strings), + .error_strings = l2_outacl_error_strings, + + .n_next_nodes = L2_OUTACL_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_OUTACL_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2_outacl_node, l2_outacl_node_fn) + clib_error_t *l2_outacl_init (vlib_main_t * vm) +{ + l2_outacl_main_t *mp = &l2_outacl_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2_outacl_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + mp->l2_out_feat_next); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_outacl_init); + +#if 0 +/** @todo maybe someone will add output ACL's in the future. + * Set subinterface outacl enable/disable. + * The CLI format is: + * set interface acl output <interface> [disable] + */ +static clib_error_t * +int_l2_outacl (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + u32 enable; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + enable = 1; + if (unformat (input, "disable")) + { + enable = 0; + } + + /* set the interface flag */ + l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_ACL, enable); + +done: + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_l2_outacl_cli, static) = { + .path = "set interface acl output", + .short_help = "set interface acl output <interface> [disable]", + .function = int_l2_outacl, +}; +/* *INDENT-ON* */ +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c new file mode 100644 index 00000000..a49abec2 --- /dev/null +++ b/src/vnet/l2/l2_output_classify.c @@ -0,0 +1,654 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/l2/l2_classify.h> +#include <vnet/api_errno.h> + +/** + * @file + * @brief Layer 2 Output Classifier. + * + * @sa @ref vnet/vnet/classify/vnet_classify.c + * @sa @ref vnet/vnet/classify/vnet_classify.h + */ + +typedef struct +{ + /** interface handle for the ith packet */ + u32 sw_if_index; + /** graph arc index selected for this packet */ + u32 next_index; + /** classifier table which provided the final result */ + u32 table_index; + /** offset in classifier heap of the corresponding session */ + u32 session_offset; +} l2_output_classify_trace_t; + +typedef struct +{ + /** use-case independent main object pointer */ + vnet_classify_main_t *vcm; + /** l2 input classifier main object pointer */ + l2_output_classify_main_t *l2cm; +} l2_output_classify_runtime_t; + +/** Packet trace format function. */ +static u8 * +format_l2_output_classify_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_output_classify_trace_t *t = + va_arg (*args, l2_output_classify_trace_t *); + + s = format (s, "l2-classify: sw_if_index %d, table %d, offset %x, next %d", + t->sw_if_index, t->table_index, t->session_offset, + t->next_index); + return s; +} + +/** l2 output classifier main data structure. */ +l2_output_classify_main_t l2_output_classify_main; + +vlib_node_registration_t l2_output_classify_node; + +#define foreach_l2_output_classify_error \ +_(MISS, "Classify misses") \ +_(HIT, "Classify hits") \ +_(CHAIN_HIT, "Classify hits after chain walk") \ +_(DROP, "L2 Classify Drops") + +typedef enum +{ +#define _(sym,str) L2_OUTPUT_CLASSIFY_ERROR_##sym, + foreach_l2_output_classify_error +#undef _ + L2_OUTPUT_CLASSIFY_N_ERROR, +} l2_output_classify_error_t; + +static char *l2_output_classify_error_strings[] = { +#define _(sym,string) string, + foreach_l2_output_classify_error +#undef _ +}; + +/** + * @brief l2 output classifier node. + * @node l2-output-classify + * + * This is the l2 output classifier dispatch node + * + * @param vm vlib_main_t corresponding to the current thread. + * @param node vlib_node_runtime_t data for this node. + * @param frame vlib_frame_t whose contents should be dispatched. + * + * @par Graph mechanics: buffer metadata, next index usage + * + * @em Uses: + * - <code>(l2_output_classify_runtime_t *) + * rt->classify_table_index_by_sw_if_index</code> + * Head of the per-interface, perprotocol classifier table chain + * for a specific interface. ~0 => send pkts to the next + * feature in the L2 feature chain. + * - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code> + * - Indicates the @c sw_if_index value of the interface that the + * packet was received on. + * - <code>vnet_buffer (b0)->l2.feature_bitmap</code> + * - Used to steer packets across l2 features enabled on the interface + * - <code>(vnet_classify_entry_t) e0->next_index</code> + * - Used to steer traffic when the classifier hits on a session + * - <code>(vnet_classify_entry_t) e0->advance</code> + * - Signed quantity applied via <code>vlib_buffer_advance</code> + * when the classifier hits on a session + * - <code>(vnet_classify_table_t) t0->miss_next_index</code> + * - Used to steer traffic when the classifier misses + * + * @em Sets: + * - <code>vnet_buffer (b0)->l2_classify.table_index</code> + * - Classifier table index of the first classifier table in + * the classifier table chain + * - <code>vnet_buffer (b0)->l2_classify.hash</code> + * - Bounded-index extensible hash corresponding to the + * masked fields in the current packet + * - <code>vnet_buffer (b0)->l2.feature_bitmap</code> + * - Used to steer packets across l2 features enabled on the interface + * - <code>vnet_buffer (b0)->l2_classify.opaque_index</code> + * - Copied from the classifier session object upon classifier hit + * + * @em Counters: + * - <code>L2_OUTPUT_CLASSIFY_ERROR_MISS</code> Classifier misses + * - <code>L2_OUTPUT_CLASSIFY_ERROR_HIT</code> Classifier hits + * - <code>L2_OUTPUT_CLASSIFY_ERROR_CHAIN_HIT</code> + * Classifier hits in other than the first table + */ + +static uword +l2_output_classify_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2_output_classify_next_t next_index; + l2_output_classify_main_t *cm = &l2_output_classify_main; + vnet_classify_main_t *vcm = cm->vnet_classify_main; + l2_output_classify_runtime_t *rt = + (l2_output_classify_runtime_t *) node->runtime_data; + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + f64 now; + u32 n_next_nodes; + u32 sw_if_index0; + + n_next_nodes = node->n_next_nodes; + + now = vlib_time_now (vm); + + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + /* First pass: compute hash */ + + while (n_left_from > 2) + { + vlib_buffer_t *b0, *b1; + u32 bi0, bi1; + ethernet_header_t *h0, *h1; + u32 sw_if_index0, sw_if_index1; + u16 type0, type1; + int type_index0, type_index1; + vnet_classify_table_t *t0, *t1; + u32 table_index0, table_index1; + u64 hash0, hash1; + + + /* prefetch next iteration */ + { + vlib_buffer_t *p1, *p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = vlib_buffer_get_current (b1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + vnet_buffer (b0)->l2_classify.table_index = ~0; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX]; + vnet_buffer (b1)->l2_classify.table_index = ~0; + + /* Select classifier table based on ethertype */ + type0 = clib_net_to_host_u16 (h0->type); + type1 = clib_net_to_host_u16 (h1->type); + + type_index0 = (type0 == ETHERNET_TYPE_IP4) + ? L2_OUTPUT_CLASSIFY_TABLE_IP4 : L2_OUTPUT_CLASSIFY_TABLE_OTHER; + type_index0 = (type0 == ETHERNET_TYPE_IP6) + ? L2_OUTPUT_CLASSIFY_TABLE_IP6 : type_index0; + + type_index1 = (type1 == ETHERNET_TYPE_IP4) + ? L2_OUTPUT_CLASSIFY_TABLE_IP4 : L2_OUTPUT_CLASSIFY_TABLE_OTHER; + type_index1 = (type1 == ETHERNET_TYPE_IP6) + ? L2_OUTPUT_CLASSIFY_TABLE_IP6 : type_index1; + + vnet_buffer (b0)->l2_classify.table_index = + table_index0 = + rt->l2cm->classify_table_index_by_sw_if_index + [type_index0][sw_if_index0]; + + if (table_index0 != ~0) + { + t0 = pool_elt_at_index (vcm->tables, table_index0); + + vnet_buffer (b0)->l2_classify.hash = hash0 = + vnet_classify_hash_packet (t0, (u8 *) h0); + vnet_classify_prefetch_bucket (t0, hash0); + } + + vnet_buffer (b1)->l2_classify.table_index = + table_index1 = + rt->l2cm->classify_table_index_by_sw_if_index + [type_index1][sw_if_index1]; + + if (table_index1 != ~0) + { + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer (b1)->l2_classify.hash = hash1 = + vnet_classify_hash_packet (t1, (u8 *) h1); + vnet_classify_prefetch_bucket (t1, hash1); + } + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 bi0; + ethernet_header_t *h0; + u16 type0; + u32 type_index0; + vnet_classify_table_t *t0; + u32 table_index0; + u64 hash0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + vnet_buffer (b0)->l2_classify.table_index = ~0; + + /* Select classifier table based on ethertype */ + type0 = clib_net_to_host_u16 (h0->type); + + type_index0 = (type0 == ETHERNET_TYPE_IP4) + ? L2_OUTPUT_CLASSIFY_TABLE_IP4 : L2_OUTPUT_CLASSIFY_TABLE_OTHER; + type_index0 = (type0 == ETHERNET_TYPE_IP6) + ? L2_OUTPUT_CLASSIFY_TABLE_IP6 : type_index0; + + vnet_buffer (b0)->l2_classify.table_index = + table_index0 = rt->l2cm->classify_table_index_by_sw_if_index + [type_index0][sw_if_index0]; + + if (table_index0 != ~0) + { + t0 = pool_elt_at_index (vcm->tables, table_index0); + + vnet_buffer (b0)->l2_classify.hash = hash0 = + vnet_classify_hash_packet (t0, (u8 *) h0); + vnet_classify_prefetch_bucket (t0, hash0); + } + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = ~0; + ethernet_header_t *h0; + u32 table_index0; + u64 hash0; + vnet_classify_table_t *t0; + vnet_classify_entry_t *e0; + + if (PREDICT_TRUE (n_left_from > 2)) + { + vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]); + u64 phash2; + u32 table_index2; + vnet_classify_table_t *tp2; + + /* + * Prefetch table entry two ahead. Buffer / data + * were prefetched above... + */ + table_index2 = vnet_buffer (p2)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index2 != ~0)) + { + tp2 = pool_elt_at_index (vcm->tables, table_index2); + phash2 = vnet_buffer (p2)->l2_classify.hash; + vnet_classify_prefetch_entry (tp2, phash2); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + table_index0 = vnet_buffer (b0)->l2_classify.table_index; + e0 = 0; + vnet_buffer (b0)->l2_classify.opaque_index = ~0; + + if (PREDICT_TRUE (table_index0 != ~0)) + { + hash0 = vnet_buffer (b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer (b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < n_next_nodes) ? + e0->next_index : next0; + hits++; + } + else + { + while (1) + { + if (t0->next_table_index != ~0) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < n_next_nodes) ? + t0->miss_next_index : next0; + misses++; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = + vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer (b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < n_next_nodes) ? + e0->next_index : next0; + hits++; + chain_hits++; + break; + } + } + } + } + + if (PREDICT_FALSE (next0 == 0)) + b0->error = node->errors[L2_OUTPUT_CLASSIFY_ERROR_DROP]; + + /* Determine the next node and remove ourself from bitmap */ + if (PREDICT_FALSE (next0 == ~0)) + next0 = vnet_l2_feature_next (b0, cm->l2_out_feat_next, + L2OUTPUT_FEAT_OUTPUT_CLASSIFY); + else + vnet_buffer (b0)->l2.feature_bitmap &= + ~L2OUTPUT_FEAT_OUTPUT_CLASSIFY; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_output_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + t->table_index = table_index0; + t->next_index = next0; + t->session_offset = e0 ? vnet_classify_get_offset (t0, e0) : 0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + L2_OUTPUT_CLASSIFY_ERROR_MISS, misses); + vlib_node_increment_counter (vm, node->node_index, + L2_OUTPUT_CLASSIFY_ERROR_HIT, hits); + vlib_node_increment_counter (vm, node->node_index, + L2_OUTPUT_CLASSIFY_ERROR_CHAIN_HIT, + chain_hits); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_output_classify_node) = { + .function = l2_output_classify_node_fn, + .name = "l2-output-classify", + .vector_size = sizeof (u32), + .format_trace = format_l2_output_classify_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_output_classify_error_strings), + .error_strings = l2_output_classify_error_strings, + + .runtime_data_bytes = sizeof (l2_output_classify_runtime_t), + + .n_next_nodes = L2_OUTPUT_CLASSIFY_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_OUTPUT_CLASSIFY_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2_output_classify_node, + l2_output_classify_node_fn); + +/** l2 output classsifier feature initialization. */ +clib_error_t * +l2_output_classify_init (vlib_main_t * vm) +{ + l2_output_classify_main_t *cm = &l2_output_classify_main; + l2_output_classify_runtime_t *rt; + + rt = vlib_node_get_runtime_data (vm, l2_output_classify_node.index); + + cm->vlib_main = vm; + cm->vnet_main = vnet_get_main (); + cm->vnet_classify_main = &vnet_classify_main; + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2_output_classify_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + cm->l2_out_feat_next); + rt->l2cm = cm; + rt->vcm = cm->vnet_classify_main; + + return 0; +} + +VLIB_INIT_FUNCTION (l2_output_classify_init); + +clib_error_t * +l2_output_classify_worker_init (vlib_main_t * vm) +{ + l2_output_classify_main_t *cm = &l2_output_classify_main; + l2_output_classify_runtime_t *rt; + + rt = vlib_node_get_runtime_data (vm, l2_output_classify_node.index); + + rt->l2cm = cm; + rt->vcm = cm->vnet_classify_main; + + return 0; +} + +VLIB_WORKER_INIT_FUNCTION (l2_output_classify_worker_init); + +/** Enable/disable l2 input classification on a specific interface. */ +void +vnet_l2_output_classify_enable_disable (u32 sw_if_index, int enable_disable) +{ + + l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_OUTPUT_CLASSIFY, + (u32) enable_disable); +} + +/** @brief Set l2 per-protocol, per-interface output classification tables. + * + * @param sw_if_index interface handle + * @param ip4_table_index ip4 classification table index, or ~0 + * @param ip6_table_index ip6 classification table index, or ~0 + * @param other_table_index non-ip4, non-ip6 classification table index, + * or ~0 + * @returns 0 on success, VNET_API_ERROR_NO_SUCH_TABLE, TABLE2, TABLE3 + * if the indicated (non-~0) table does not exist. + */ + +int +vnet_l2_output_classify_set_tables (u32 sw_if_index, + u32 ip4_table_index, + u32 ip6_table_index, + u32 other_table_index) +{ + l2_output_classify_main_t *cm = &l2_output_classify_main; + vnet_classify_main_t *vcm = cm->vnet_classify_main; + + /* Assume that we've validated sw_if_index in the API layer */ + + if (ip4_table_index != ~0 && + pool_is_free_index (vcm->tables, ip4_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE; + + if (ip6_table_index != ~0 && + pool_is_free_index (vcm->tables, ip6_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE2; + + if (other_table_index != ~0 && + pool_is_free_index (vcm->tables, other_table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE3; + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP4], + sw_if_index); + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP6], + sw_if_index); + + vec_validate + (cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_OTHER], + sw_if_index); + + cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP4] + [sw_if_index] = ip4_table_index; + + cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP6] + [sw_if_index] = ip6_table_index; + + cm->classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_OTHER] + [sw_if_index] = other_table_index; + + return 0; +} + +static clib_error_t * +int_l2_output_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index = ~0; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + u32 other_table_index = ~0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else if (unformat (input, "ip4-table %d", &ip4_table_index)) + ; + else if (unformat (input, "ip6-table %d", &ip6_table_index)) + ; + else if (unformat (input, "other-table %d", &other_table_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "interface must be specified"); + + + if (ip4_table_index == ~0 && ip6_table_index == ~0 + && other_table_index == ~0) + { + vlib_cli_output (vm, "L2 classification disabled"); + vnet_l2_output_classify_enable_disable (sw_if_index, 0 /* enable */ ); + return 0; + } + + rv = vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, + other_table_index); + switch (rv) + { + case 0: + vnet_l2_output_classify_enable_disable (sw_if_index, 1 /* enable */ ); + break; + + default: + return clib_error_return (0, "vnet_l2_output_classify_set_tables: %d", + rv); + break; + } + + return 0; +} + +/*? + * Configure Layer 2 output classification. + * + * @cliexpar + * @cliexstart{set interface l2 output classify intfc <interface-name> [ip4-table <index>] [ip6-table <index>] [other-table <index>]} + * @cliexend + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = { + .path = "set interface l2 output classify", + .short_help = + "set interface l2 output classify intfc <<interface-name>> [ip4-table <n>]\n" + " [ip6-table <n>] [other-table <n>]", + .function = int_l2_output_classify_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c new file mode 100644 index 00000000..ff3d2f3a --- /dev/null +++ b/src/vnet/l2/l2_patch.c @@ -0,0 +1,466 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/feature/feature.h> +#include <vppinfra/error.h> + +typedef struct +{ + /* vector of dispositions, indexed by rx_sw_if_index */ + u32 *tx_next_by_rx_sw_if_index; + u32 *tx_sw_if_index_by_rx_sw_if_index; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2_patch_main_t; + +typedef struct +{ + u32 rx_sw_if_index; + u32 tx_sw_if_index; +} l2_patch_trace_t; + +/* packet trace format function */ +static u8 * +format_l2_patch_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_patch_trace_t *t = va_arg (*args, l2_patch_trace_t *); + + s = format (s, "L2_PATCH: rx %d tx %d", t->rx_sw_if_index, + t->tx_sw_if_index); + return s; +} + +l2_patch_main_t l2_patch_main; + +static vlib_node_registration_t l2_patch_node; + +#define foreach_l2_patch_error \ +_(PATCHED, "L2 patch packets") \ +_(DROPPED, "L2 patch misconfigured drops") + +typedef enum +{ +#define _(sym,str) L2_PATCH_ERROR_##sym, + foreach_l2_patch_error +#undef _ + L2_PATCH_N_ERROR, +} l2_patch_error_t; + +static char *l2_patch_error_strings[] = { +#define _(sym,string) string, + foreach_l2_patch_error +#undef _ +}; + +typedef enum +{ + L2_PATCH_NEXT_DROP, + L2_PATCH_N_NEXT, +} l2_patch_next_t; + +static uword +l2_patch_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2_patch_next_t next_index; + l2_patch_main_t *l2pm = &l2_patch_main; + vlib_node_t *n = vlib_get_node (vm, l2_patch_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* So stupid / simple, we don't need to prefetch data */ + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0); + ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0); + ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index1] != ~0); + ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1] != ~0); + + next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0]; + next1 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index1]; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0]; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1]; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rx_sw_if_index = sw_if_index0; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0]; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->rx_sw_if_index = sw_if_index1; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1]; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0); + ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0); + + next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0]; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0]; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rx_sw_if_index = sw_if_index0; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0]; + } + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + em->counters[node_counter_base_index + L2_PATCH_ERROR_PATCHED] += + frame->n_vectors; + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_patch_node, static) = { + .function = l2_patch_node_fn, + .name = "l2-patch", + .vector_size = sizeof (u32), + .format_trace = format_l2_patch_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_patch_error_strings), + .error_strings = l2_patch_error_strings, + + .n_next_nodes = L2_PATCH_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_PATCH_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2_patch_node, l2_patch_node_fn) + int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, + int is_add) +{ + l2_patch_main_t *l2pm = &l2_patch_main; + vnet_hw_interface_t *rxhi, *txhi; + u32 tx_next_index; + + /* + * We assume that the API msg handler has used 2x VALIDATE_SW_IF_INDEX + * macros... + */ + + rxhi = vnet_get_sup_hw_interface (l2pm->vnet_main, rx_sw_if_index); + + /* Make sure caller didn't pass a vlan subif, etc. */ + if (rxhi->sw_if_index != rx_sw_if_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + txhi = vnet_get_sup_hw_interface (l2pm->vnet_main, tx_sw_if_index); + if (txhi->sw_if_index != tx_sw_if_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX_2; + + if (is_add) + { + tx_next_index = vlib_node_add_next (l2pm->vlib_main, + l2_patch_node.index, + txhi->output_node_index); + + vec_validate_init_empty (l2pm->tx_next_by_rx_sw_if_index, + rx_sw_if_index, ~0); + + l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = tx_next_index; + vec_validate_init_empty (l2pm->tx_sw_if_index_by_rx_sw_if_index, + rx_sw_if_index, ~0); + l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index] + = txhi->sw_if_index; + + ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + + vnet_feature_enable_disable ("device-input", "l2-patch", + rxhi->hw_if_index, 1, 0, 0); + } + else + { + ethernet_set_flags (l2pm->vnet_main, rxhi->hw_if_index, + 0 /* disable promiscuous mode */ ); + + vnet_feature_enable_disable ("device-input", "l2-patch", + rxhi->hw_if_index, 0, 0, 0); + if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index) + { + l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0; + l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index] = ~0; + } + } + + return 0; +} + +static clib_error_t * +test_patch_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2_patch_main_t *l2pm = &l2_patch_main; + unformat_input_t _line_input, *line_input = &_line_input; + u32 rx_sw_if_index, tx_sw_if_index; + int rv; + int rx_set = 0; + int tx_set = 0; + int is_add = 1; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "rx %U", unformat_vnet_sw_interface, + l2pm->vnet_main, &rx_sw_if_index)) + rx_set = 1; + else if (unformat (line_input, "tx %U", unformat_vnet_sw_interface, + l2pm->vnet_main, &tx_sw_if_index)) + tx_set = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else + break; + } + + if (rx_set == 0) + { + error = clib_error_return (0, "rx interface not set"); + goto done; + } + + if (tx_set == 0) + { + error = clib_error_return (0, "tx interface not set"); + goto done; + } + + rv = vnet_l2_patch_add_del (rx_sw_if_index, tx_sw_if_index, is_add); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + error = clib_error_return (0, "rx interface not a physical port"); + goto done; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX_2: + error = clib_error_return (0, "tx interface not a physical port"); + goto done; + + default: + error = clib_error_return + (0, "WARNING: vnet_l2_patch_add_del returned %d", rv); + goto done; + } + + +done: + unformat_free (line_input); + + return error; +} + +/*? + * Create or delete a Layer 2 patch. + * + * @cliexpar + * @cliexstart{test l2patch rx <intfc> tx <intfc> [del]} + * @cliexend + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (test_patch_command, static) = { + .path = "test l2patch", + .short_help = "test l2patch rx <intfc> tx <intfc> [del]", + .function = test_patch_command_fn, +}; +/* *INDENT-ON* */ + +/** Display the contents of the l2patch table. */ +static clib_error_t * +show_l2patch (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2_patch_main_t *l2pm = &l2_patch_main; + u32 rx_sw_if_index; + u32 no_entries = 1; + + ASSERT (vec_len (l2pm->tx_next_by_rx_sw_if_index) == + vec_len (l2pm->tx_sw_if_index_by_rx_sw_if_index)); + + for (rx_sw_if_index = 0; + rx_sw_if_index < vec_len (l2pm->tx_sw_if_index_by_rx_sw_if_index); + rx_sw_if_index++) + { + u32 tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index[rx_sw_if_index]; + if (tx_sw_if_index != ~0) + { + no_entries = 0; + vlib_cli_output (vm, "%26U -> %U", + format_vnet_sw_if_index_name, + l2pm->vnet_main, rx_sw_if_index, + format_vnet_sw_if_index_name, + l2pm->vnet_main, tx_sw_if_index); + } + } + + if (no_entries) + vlib_cli_output (vm, "no l2patch entries"); + + return 0; +} + +/*? + * Show Layer 2 patch entries. + * + * @cliexpar + * @cliexstart{show l2patch} + * @cliexend + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_l2patch_cli, static) = { + .path = "show l2patch", + .short_help = "Show l2 interface cross-connect entries", + .function = show_l2patch, +}; +/* *INDENT-ON* */ + +clib_error_t * +l2_patch_init (vlib_main_t * vm) +{ + l2_patch_main_t *mp = &l2_patch_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_patch_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_rw.c b/src/vnet/l2/l2_rw.c new file mode 100644 index 00000000..fec04774 --- /dev/null +++ b/src/vnet/l2/l2_rw.c @@ -0,0 +1,710 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_rw.h> + +/** + * @file + * @brief Layer 2 Rewrite. + * + * Layer 2-Rewrite node uses classify tables to match packets. Then, using + * the provisioned mask and value, modfies the packet header. + */ + + +l2_rw_main_t l2_rw_main; + +vlib_node_registration_t l2_rw_node; + +typedef struct +{ + u32 sw_if_index; + u32 classify_table_index; + u32 rewrite_entry_index; +} l2_rw_trace_t; + +static u8 * +format_l2_rw_entry (u8 * s, va_list * args) +{ + l2_rw_entry_t *e = va_arg (*args, l2_rw_entry_t *); + l2_rw_main_t *rw = &l2_rw_main; + s = format (s, "%d - mask:%U value:%U\n", + e - rw->entries, + format_hex_bytes, e->mask, + e->rewrite_n_vectors * sizeof (u32x4), format_hex_bytes, + e->value, e->rewrite_n_vectors * sizeof (u32x4)); + s = + format (s, " hits:%d skip_bytes:%d", e->hit_count, + e->skip_n_vectors * sizeof (u32x4)); + return s; +} + +static u8 * +format_l2_rw_config (u8 * s, va_list * args) +{ + l2_rw_config_t *c = va_arg (*args, l2_rw_config_t *); + return format (s, "table-index:%d miss-index:%d", + c->table_index, c->miss_index); +} + +/* packet trace format function */ +static u8 * +format_l2_rw_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_rw_trace_t *t = va_arg (*args, l2_rw_trace_t *); + return format (s, "l2-rw: sw_if_index %d, table %d, entry %d", + t->sw_if_index, t->classify_table_index, + t->rewrite_entry_index); +} + +always_inline l2_rw_config_t * +l2_rw_get_config (u32 sw_if_index) +{ + l2_rw_main_t *rw = &l2_rw_main; + if (PREDICT_FALSE (!clib_bitmap_get (rw->configs_bitmap, sw_if_index))) + { + vec_validate (rw->configs, sw_if_index); + rw->configs[sw_if_index].table_index = ~0; + rw->configs[sw_if_index].miss_index = ~0; + rw->configs_bitmap = + clib_bitmap_set (rw->configs_bitmap, sw_if_index, 1); + } + return &rw->configs[sw_if_index]; +} + +static_always_inline void +l2_rw_rewrite (l2_rw_entry_t * rwe, u8 * h) +{ + if (U32X4_ALIGNED (h)) + { + u32x4 *d = ((u32x4 *) h) + rwe->skip_n_vectors; + switch (rwe->rewrite_n_vectors) + { + case 5: + d[4] = (d[4] & ~rwe->mask[4]) | rwe->value[4]; + /* FALLTHROUGH */ + case 4: + d[3] = (d[3] & ~rwe->mask[3]) | rwe->value[3]; + /* FALLTHROUGH */ + case 3: + d[2] = (d[2] & ~rwe->mask[2]) | rwe->value[2]; + /* FALLTHROUGH */ + case 2: + d[1] = (d[1] & ~rwe->mask[1]) | rwe->value[1]; + /* FALLTHROUGH */ + case 1: + d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0]; + break; + default: + abort (); + } + } + else + { + u64 *d = ((u64 *) h) + rwe->skip_n_vectors * 2; + switch (rwe->rewrite_n_vectors) + { + case 5: + d[8] = + (d[8] & ~(((u64 *) rwe->mask)[8])) | (((u64 *) rwe->value)[8]); + d[9] = + (d[9] & ~(((u64 *) rwe->mask)[9])) | (((u64 *) rwe->value)[9]); + /* FALLTHROUGH */ + case 4: + d[6] = + (d[6] & ~(((u64 *) rwe->mask)[6])) | (((u64 *) rwe->value)[6]); + d[7] = + (d[7] & ~(((u64 *) rwe->mask)[7])) | (((u64 *) rwe->value)[7]); + /* FALLTHROUGH */ + case 3: + d[4] = + (d[4] & ~(((u64 *) rwe->mask)[4])) | (((u64 *) rwe->value)[4]); + d[5] = + (d[5] & ~(((u64 *) rwe->mask)[5])) | (((u64 *) rwe->value)[5]); + /* FALLTHROUGH */ + case 2: + d[2] = + (d[2] & ~(((u64 *) rwe->mask)[2])) | (((u64 *) rwe->value)[2]); + d[3] = + (d[3] & ~(((u64 *) rwe->mask)[3])) | (((u64 *) rwe->value)[3]); + /* FALLTHROUGH */ + case 1: + d[0] = + (d[0] & ~(((u64 *) rwe->mask)[0])) | (((u64 *) rwe->value)[0]); + d[1] = + (d[1] & ~(((u64 *) rwe->mask)[1])) | (((u64 *) rwe->value)[1]); + break; + default: + abort (); + } + } +} + +static uword +l2_rw_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + l2_rw_main_t *rw = &l2_rw_main; + u32 n_left_from, *from, *to_next, next_index; + vnet_classify_main_t *vcm = &vnet_classify_main; + f64 now = vlib_time_now (vlib_get_main ()); + u32 prefetch_size = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + /* get space to enqueue frame to graph node "next_index" */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, next0, sw_if_index0, rwe_index0; + u32 bi1, next1, sw_if_index1, rwe_index1; + vlib_buffer_t *b0, *b1; + ethernet_header_t *h0, *h1; + l2_rw_config_t *config0, *config1; + u64 hash0, hash1; + vnet_classify_table_t *t0, *t1; + vnet_classify_entry_t *e0, *e1; + l2_rw_entry_t *rwe0, *rwe1; + + { + vlib_buffer_t *p2, *p3; + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + CLIB_PREFETCH (vlib_buffer_get_current (p2), prefetch_size, LOAD); + CLIB_PREFETCH (vlib_buffer_get_current (p3), prefetch_size, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + config0 = l2_rw_get_config (sw_if_index0); /*TODO: check sw_if_index0 value */ + config1 = l2_rw_get_config (sw_if_index1); /*TODO: check sw_if_index0 value */ + t0 = pool_elt_at_index (vcm->tables, config0->table_index); + t1 = pool_elt_at_index (vcm->tables, config1->table_index); + prefetch_size = + (t1->skip_n_vectors + t1->match_n_vectors) * sizeof (u32x4); + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + hash1 = vnet_classify_hash_packet (t1, (u8 *) h1); + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + e1 = vnet_classify_find_entry (t1, (u8 *) h1, hash1, now); + + while (!e0 && (t0->next_table_index != ~0)) + { + t0 = pool_elt_at_index (vcm->tables, t0->next_table_index); + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + } + + while (!e1 && (t1->next_table_index != ~0)) + { + t1 = pool_elt_at_index (vcm->tables, t1->next_table_index); + hash1 = vnet_classify_hash_packet (t1, (u8 *) h1); + e1 = vnet_classify_find_entry (t1, (u8 *) h1, hash1, now); + } + + rwe_index0 = e0 ? e0->opaque_index : config0->miss_index; + rwe_index1 = e1 ? e1->opaque_index : config1->miss_index; + + if (rwe_index0 != ~0) + { + rwe0 = pool_elt_at_index (rw->entries, rwe_index0); + l2_rw_rewrite (rwe0, (u8 *) h0); + } + if (rwe_index1 != ~0) + { + rwe1 = pool_elt_at_index (rw->entries, rwe_index1); + l2_rw_rewrite (rwe1, (u8 *) h1); + } + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_rw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->classify_table_index = config0->table_index; + t->rewrite_entry_index = rwe_index0; + } + + if (PREDICT_FALSE ((b1->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_rw_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->classify_table_index = config1->table_index; + t->rewrite_entry_index = rwe_index1; + } + + /* Update feature bitmap and get next feature index */ + next0 = vnet_l2_feature_next (b0, rw->feat_next_node_index, + L2INPUT_FEAT_RW); + next1 = vnet_l2_feature_next (b1, rw->feat_next_node_index, + L2INPUT_FEAT_RW); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0, sw_if_index0, rwe_index0; + vlib_buffer_t *b0; + ethernet_header_t *h0; + l2_rw_config_t *config0; + u64 hash0; + vnet_classify_table_t *t0; + vnet_classify_entry_t *e0; + l2_rw_entry_t *rwe0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + config0 = l2_rw_get_config (sw_if_index0); /*TODO: check sw_if_index0 value */ + t0 = pool_elt_at_index (vcm->tables, config0->table_index); + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + + while (!e0 && (t0->next_table_index != ~0)) + { + t0 = pool_elt_at_index (vcm->tables, t0->next_table_index); + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + } + + rwe_index0 = e0 ? e0->opaque_index : config0->miss_index; + + if (rwe_index0 != ~0) + { + rwe0 = pool_elt_at_index (rw->entries, rwe_index0); + l2_rw_rewrite (rwe0, (u8 *) h0); + } + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_rw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->classify_table_index = config0->table_index; + t->rewrite_entry_index = rwe_index0; + } + + /* Update feature bitmap and get next feature index */ + next0 = vnet_l2_feature_next (b0, rw->feat_next_node_index, + L2INPUT_FEAT_RW); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +int +l2_rw_mod_entry (u32 * index, + u8 * mask, u8 * value, u32 len, u32 skip, u8 is_del) +{ + l2_rw_main_t *rw = &l2_rw_main; + l2_rw_entry_t *e = 0; + if (*index != ~0) + { + if (pool_is_free_index (rw->entries, *index)) + { + return -1; + } + e = pool_elt_at_index (rw->entries, *index); + } + else + { + pool_get (rw->entries, e); + *index = e - rw->entries; + } + + if (!e) + return -1; + + if (is_del) + { + pool_put (rw->entries, e); + return 0; + } + + e->skip_n_vectors = skip / sizeof (u32x4); + skip -= e->skip_n_vectors * sizeof (u32x4); + e->rewrite_n_vectors = (skip + len - 1) / sizeof (u32x4) + 1; + vec_alloc_aligned (e->mask, e->rewrite_n_vectors, sizeof (u32x4)); + memset (e->mask, 0, e->rewrite_n_vectors * sizeof (u32x4)); + vec_alloc_aligned (e->value, e->rewrite_n_vectors, sizeof (u32x4)); + memset (e->value, 0, e->rewrite_n_vectors * sizeof (u32x4)); + + clib_memcpy (((u8 *) e->value) + skip, value, len); + clib_memcpy (((u8 *) e->mask) + skip, mask, len); + + int i; + for (i = 0; i < e->rewrite_n_vectors; i++) + { + e->value[i] &= e->mask[i]; + } + + return 0; +} + +static clib_error_t * +l2_rw_entry_cli_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + u32 index = ~0; + u8 *mask = 0; + u8 *value = 0; + u32 skip = 0; + u8 del = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "index %d", &index)) + ; + else if (unformat (input, "mask %U", unformat_hex_string, &mask)) + ; + else if (unformat (input, "value %U", unformat_hex_string, &value)) + ; + else if (unformat (input, "skip %d", &skip)) + ; + else if (unformat (input, "del")) + del = 1; + else + break; + } + + if (!mask || !value) + return clib_error_return (0, "Unspecified mask or value"); + + if (vec_len (mask) != vec_len (value)) + return clib_error_return (0, "Mask and value lengths must be identical"); + + int ret; + if ((ret = + l2_rw_mod_entry (&index, mask, value, vec_len (mask), skip, del))) + return clib_error_return (0, "Could not add entry"); + + return 0; +} + +/*? + * Layer 2-Rewrite node uses classify tables to match packets. Then, using + * the provisioned mask and value, modfies the packet header. + * + * @cliexpar + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2_rw_entry_cli, static) = { + .path = "l2 rewrite entry", + .short_help = + "l2 rewrite entry [index <index>] [mask <hex-mask>] [value <hex-value>] [skip <n_bytes>] [del]", + .function = l2_rw_entry_cli_fn, +}; +/* *INDENT-ON* */ + +int +l2_rw_interface_set_table (u32 sw_if_index, u32 table_index, u32 miss_index) +{ + l2_rw_config_t *c = l2_rw_get_config (sw_if_index); + l2_rw_main_t *rw = &l2_rw_main; + + c->table_index = table_index; + c->miss_index = miss_index; + u32 feature_bitmap = (table_index == ~0) ? 0 : L2INPUT_FEAT_RW; + + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_RW, feature_bitmap); + + if (c->table_index == ~0) + clib_bitmap_set (rw->configs_bitmap, sw_if_index, 0); + + return 0; +} + +static clib_error_t * +l2_rw_interface_cli_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 table_index = ~0; + u32 sw_if_index = ~0; + u32 miss_index = ~0; + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index); + } + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "table %d", &table_index)) + ; + else if (unformat (input, "miss-index %d", &miss_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, + "You must specify an interface 'iface <interface>'", + format_unformat_error, input); + int ret; + if ((ret = + l2_rw_interface_set_table (sw_if_index, table_index, miss_index))) + return clib_error_return (0, "l2_rw_interface_set_table returned %d", + ret); + + return 0; +} + +/*? + * Layer 2-Rewrite node uses classify tables to match packets. Then, using + * the provisioned mask and value, modfies the packet header. + * + * @cliexpar + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2_rw_interface_cli, static) = { + .path = "set interface l2 rewrite", + .short_help = + "set interface l2 rewrite <interface> [table <table index>] [miss-index <entry-index>]", + .function = l2_rw_interface_cli_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +l2_rw_show_interfaces_cli_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2_rw_main_t *rw = &l2_rw_main; + if (clib_bitmap_count_set_bits (rw->configs_bitmap) == 0) + vlib_cli_output (vm, "No interface is currently using l2 rewrite\n"); + + uword i; + /* *INDENT-OFF* */ + clib_bitmap_foreach(i, rw->configs_bitmap, { + vlib_cli_output (vm, "sw_if_index:%d %U\n", i, format_l2_rw_config, &rw->configs[i]); + }); + /* *INDENT-ON* */ + return 0; +} + +/*? + * Layer 2-Rewrite node uses classify tables to match packets. Then, using + * the provisioned mask and value, modfies the packet header. + * + * @cliexpar + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2_rw_show_interfaces_cli, static) = { + .path = "show l2 rewrite interfaces", + .short_help = + "show l2 rewrite interfaces", + .function = l2_rw_show_interfaces_cli_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +l2_rw_show_entries_cli_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2_rw_main_t *rw = &l2_rw_main; + l2_rw_entry_t *e; + if (pool_elts (rw->entries) == 0) + vlib_cli_output (vm, "No entries\n"); + + /* *INDENT-OFF* */ + pool_foreach(e, rw->entries, { + vlib_cli_output (vm, "%U\n", format_l2_rw_entry, e); + }); + /* *INDENT-ON* */ + return 0; +} + +/*? + * Layer 2-Rewrite node uses classify tables to match packets. Then, using + * the provisioned mask and value, modfies the packet header. + * + * @cliexpar + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2_rw_show_entries_cli, static) = { + .path = "show l2 rewrite entries", + .short_help = + "show l2 rewrite entries", + .function = l2_rw_show_entries_cli_fn, +}; +/* *INDENT-ON* */ + +int +l2_rw_enable_disable (u32 bridge_domain, u8 disable) +{ + u32 mask = L2INPUT_FEAT_RW; + l2input_set_bridge_features (bridge_domain, mask, disable ? 0 : mask); + return 0; +} + +static clib_error_t * +l2_rw_set_cli_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + u32 bridge_domain; + u8 disable = 0; + + if (unformat_check_input (input) == UNFORMAT_END_OF_INPUT || + !unformat (input, "%d", &bridge_domain)) + { + return clib_error_return (0, "You must specify a bridge domain"); + } + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT && + unformat (input, "disable")) + { + disable = 1; + } + + if (l2_rw_enable_disable (bridge_domain, disable)) + return clib_error_return (0, "Could not enable or disable rewrite"); + + return 0; +} + +/*? + * Layer 2-Rewrite node uses classify tables to match packets. Then, using + * the provisioned mask and value, modfies the packet header. + * + * @cliexpar + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2_rw_set_cli, static) = { + .path = "set bridge-domain rewrite", + .short_help = + "set bridge-domain rewrite <bridge-domain> [disable]", + .function = l2_rw_set_cli_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +l2_rw_init (vlib_main_t * vm) +{ + l2_rw_main_t *rw = &l2_rw_main; + rw->configs = 0; + rw->entries = 0; + clib_bitmap_alloc (rw->configs_bitmap, 1); + feat_bitmap_init_next_nodes (vm, + l2_rw_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + rw->feat_next_node_index); + return 0; +} + +VLIB_INIT_FUNCTION (l2_rw_init); + +enum +{ + L2_RW_NEXT_DROP, + L2_RW_N_NEXT, +}; + +#define foreach_l2_rw_error \ +_(UNKNOWN, "Unknown error") + +typedef enum +{ +#define _(sym,str) L2_RW_ERROR_##sym, + foreach_l2_rw_error +#undef _ + L2_RW_N_ERROR, +} l2_rw_error_t; + +static char *l2_rw_error_strings[] = { +#define _(sym,string) string, + foreach_l2_rw_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_rw_node) = { + .function = l2_rw_node_fn, + .name = "l2-rw", + .vector_size = sizeof (u32), + .format_trace = format_l2_rw_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(l2_rw_error_strings), + .error_strings = l2_rw_error_strings, + .runtime_data_bytes = 0, + .n_next_nodes = L2_RW_N_NEXT, + .next_nodes = { [L2_RW_NEXT_DROP] = "error-drop"}, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2_rw_node, l2_rw_node_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_rw.h b/src/vnet/l2/l2_rw.h new file mode 100644 index 00000000..49aa25fb --- /dev/null +++ b/src/vnet/l2/l2_rw.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * l2_rw is based on vnet classifier and provides a way + * to modify packets matching a given table. + * + * Tables must be created using vnet's classify features. + * Entries contained within these tables must have their + * opaque index set to the rewrite entry created with l2_rw_mod_entry. + */ + +#ifndef L2_RW_H_ +#define L2_RW_H_ + +#include <vnet/l2/l2_input.h> + +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct _l2_rw_entry { + u16 skip_n_vectors; + u16 rewrite_n_vectors; + u64 hit_count; + u32x4 *mask; + u32x4 *value; +}) l2_rw_entry_t; +/* *INDENT-ON* */ + +/* l2_rw configuration for one interface */ +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct _l2_rw_config { + u32 table_index; /* Which classify table to use */ + u32 miss_index; /* Rewrite entry to use if table does not match */ +}) l2_rw_config_t; +/* *INDENT-ON* */ + +typedef struct +{ + /* Next feature node indexes */ + u32 feat_next_node_index[32]; + + /* A pool of entries */ + l2_rw_entry_t *entries; + + /* Config vector indexed by sw_if_index */ + l2_rw_config_t *configs; + uword *configs_bitmap; +} l2_rw_main_t; + +extern l2_rw_main_t l2_rw_main; + +/* + * Specifies which classify table and miss_index should be used + * with the given interface. + * Use special values ~0 in order to un-set table_index + * or miss_index. + * l2_rw feature is automatically enabled for the interface + * when table_index or miss_index is not ~0. + * returns 0 on success and something else on error. + */ +int l2_rw_interface_set_table (u32 sw_if_index, + u32 table_index, u32 miss_index); + +/* + * Creates, modifies or delete a rewrite entry. + * If *index != ~0, modifies an existing entry (or simply + * deletes it if is_del is set). + * If *index == ~0, creates a new entry and the created + * entry index is stored in *index (Does nothing if is_del + * is set). + * returns 0 on success and something else on error. + */ +int l2_rw_mod_entry (u32 * index, + u8 * mask, u8 * value, u32 len, u32 skip, u8 is_del); + +#endif /* L2_FW_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_vtr.c b/src/vnet/l2/l2_vtr.c new file mode 100644 index 00000000..02a68991 --- /dev/null +++ b/src/vnet/l2/l2_vtr.c @@ -0,0 +1,831 @@ +/* + * l2_vtr.c : layer 2 vlan tag rewrite configuration + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/l2/l2_vtr.h> +#include <vnet/l2/l2_input_vtr.h> +#include <vnet/l2/l2_output.h> + +#include <vppinfra/error.h> +#include <vlib/cli.h> + +/** + * @file + * @brief Ethernet VLAN Tag Rewrite. + * + * VLAN tag rewrite provides the ability to change the VLAN tags on a packet. + * Existing tags can be popped, new tags can be pushed, and existing tags can + * be swapped with new tags. The rewrite feature is attached to a subinterface + * as input and output operations. The input operation is explicitly configured. + * The output operation is the symmetric opposite and is automatically derived + * from the input operation. + */ + +/** Just a placeholder; ensures file is not eliminated by linker. */ +clib_error_t * +l2_vtr_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (l2_vtr_init); + +u32 +l2pbb_configure (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, u32 sw_if_index, u32 vtr_op, + u8 * b_dmac, u8 * b_smac, + u16 b_vlanid, u32 i_sid, u16 vlan_outer_tag) +{ + u32 error = 0; + u32 enable = 0; + + l2_output_config_t *config = 0; + vnet_hw_interface_t *hi; + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + + if (!hi) + { + error = VNET_API_ERROR_INVALID_INTERFACE; + goto done; + } + + // Config for this interface should be already initialized + ptr_config_t *in_config; + ptr_config_t *out_config; + config = vec_elt_at_index (l2output_main.configs, sw_if_index); + in_config = &(config->input_pbb_vtr); + out_config = &(config->output_pbb_vtr); + + in_config->pop_bytes = 0; + in_config->push_bytes = 0; + out_config->pop_bytes = 0; + out_config->push_bytes = 0; + enable = (vtr_op != L2_VTR_DISABLED); + + if (!enable) + goto done; + + if (vtr_op == L2_VTR_POP_2) + { + in_config->pop_bytes = sizeof (ethernet_pbb_header_packed_t); + } + else if (vtr_op == L2_VTR_PUSH_2) + { + clib_memcpy (in_config->macs_tags.b_dst_address, b_dmac, + sizeof (in_config->macs_tags.b_dst_address)); + clib_memcpy (in_config->macs_tags.b_src_address, b_smac, + sizeof (in_config->macs_tags.b_src_address)); + in_config->macs_tags.b_type = + clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AD); + in_config->macs_tags.priority_dei_id = + clib_net_to_host_u16 (b_vlanid & 0xFFF); + in_config->macs_tags.i_type = + clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AH); + in_config->macs_tags.priority_dei_uca_res_sid = + clib_net_to_host_u32 (i_sid & 0xFFFFF); + in_config->push_bytes = sizeof (ethernet_pbb_header_packed_t); + } + else if (vtr_op == L2_VTR_TRANSLATE_2_2) + { + /* TODO after PoC */ + } + + /* + * Construct the output tag-rewrite config + * + * The push/pop values are always reversed + */ + out_config->raw_data = in_config->raw_data; + out_config->pop_bytes = in_config->push_bytes; + out_config->push_bytes = in_config->pop_bytes; + +done: + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VTR, enable); + if (config) + config->out_vtr_flag = (u8) enable; + + /* output vtr enable is checked explicitly in l2_output */ + return error; +} + +/** + * Configure vtag tag rewrite on the given interface. + * Return 1 if there is an error, 0 if ok + */ +u32 +l2vtr_configure (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 sw_if_index, u32 vtr_op, u32 push_dot1q, /* ethertype of first pushed tag is dot1q/dot1ad */ + u32 vtr_tag1, /* first pushed tag */ + u32 vtr_tag2) /* second pushed tag */ +{ + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + u32 hw_no_tags; + u32 error = 0; + l2_output_config_t *config; + vtr_config_t *in_config; + vtr_config_t *out_config; + u32 enable; + u32 push_inner_et; + u32 push_outer_et; + u32 cfg_tags; + + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) + { + error = VNET_API_ERROR_INVALID_INTERFACE; /* non-ethernet interface */ + goto done; + } + + /* Init the config for this interface */ + vec_validate (l2output_main.configs, sw_if_index); + config = vec_elt_at_index (l2output_main.configs, sw_if_index); + in_config = &(config->input_vtr); + out_config = &(config->output_vtr); + in_config->raw_tags = 0; + out_config->raw_tags = 0; + + /* Get the configured tags for the interface */ + si = vnet_get_sw_interface (vnet_main, sw_if_index); + hw_no_tags = (si->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + + /* Construct the input tag-rewrite config */ + + push_outer_et = + clib_net_to_host_u16 (push_dot1q ? ETHERNET_TYPE_VLAN : + ETHERNET_TYPE_DOT1AD); + push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN); + vtr_tag1 = clib_net_to_host_u16 (vtr_tag1); + vtr_tag2 = clib_net_to_host_u16 (vtr_tag2); + + /* Determine number of vlan tags with explictly configured values */ + cfg_tags = 0; + if (hw_no_tags || si->sub.eth.flags.no_tags) + { + cfg_tags = 0; + } + else if (si->sub.eth.flags.one_tag) + { + cfg_tags = 1; + if (si->sub.eth.flags.outer_vlan_id_any) + { + cfg_tags = 0; + } + } + else if (si->sub.eth.flags.two_tags) + { + cfg_tags = 2; + if (si->sub.eth.flags.inner_vlan_id_any) + { + cfg_tags = 1; + } + if (si->sub.eth.flags.outer_vlan_id_any) + { + cfg_tags = 0; + } + } + + switch (vtr_op) + { + case L2_VTR_DISABLED: + in_config->push_and_pop_bytes = 0; + break; + + case L2_VTR_POP_1: + if (cfg_tags < 1) + { + /* Need one or two tags */ + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; + goto done; + } + in_config->pop_bytes = 4; + in_config->push_bytes = 0; + break; + + case L2_VTR_POP_2: + if (cfg_tags < 2) + { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need two tags */ + goto done; + } + in_config->pop_bytes = 8; + in_config->push_bytes = 0; + break; + + case L2_VTR_PUSH_1: + in_config->pop_bytes = 0; + in_config->push_bytes = 4; + in_config->tags[1].priority_cfi_and_id = vtr_tag1; + in_config->tags[1].type = push_outer_et; + break; + + case L2_VTR_PUSH_2: + in_config->pop_bytes = 0; + in_config->push_bytes = 8; + in_config->tags[0].priority_cfi_and_id = vtr_tag1; + in_config->tags[0].type = push_outer_et; + in_config->tags[1].priority_cfi_and_id = vtr_tag2; + in_config->tags[1].type = push_inner_et; + break; + + case L2_VTR_TRANSLATE_1_1: + if (cfg_tags < 1) + { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need one or two tags */ + goto done; + } + in_config->pop_bytes = 4; + in_config->push_bytes = 4; + in_config->tags[1].priority_cfi_and_id = vtr_tag1; + in_config->tags[1].type = push_outer_et; + break; + + case L2_VTR_TRANSLATE_1_2: + if (cfg_tags < 1) + { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need one or two tags */ + goto done; + } + in_config->pop_bytes = 4; + in_config->push_bytes = 8; + in_config->tags[0].priority_cfi_and_id = vtr_tag1; + in_config->tags[0].type = push_outer_et; + in_config->tags[1].priority_cfi_and_id = vtr_tag2; + in_config->tags[1].type = push_inner_et; + break; + + case L2_VTR_TRANSLATE_2_1: + if (cfg_tags < 2) + { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need two tags */ + goto done; + } + in_config->pop_bytes = 8; + in_config->push_bytes = 4; + in_config->tags[1].priority_cfi_and_id = vtr_tag1; + in_config->tags[1].type = push_outer_et; + break; + + case L2_VTR_TRANSLATE_2_2: + if (cfg_tags < 2) + { + error = VNET_API_ERROR_INVALID_VLAN_TAG_COUNT; /* Need two tags */ + goto done; + } + in_config->pop_bytes = 8; + in_config->push_bytes = 8; + in_config->tags[0].priority_cfi_and_id = vtr_tag1; + in_config->tags[0].type = push_outer_et; + in_config->tags[1].priority_cfi_and_id = vtr_tag2; + in_config->tags[1].type = push_inner_et; + break; + } + + /* + * Construct the output tag-rewrite config + * + * The push/pop values are always reversed + */ + out_config->push_bytes = in_config->pop_bytes; + out_config->pop_bytes = in_config->push_bytes; + + /* Any pushed tags are derived from the subinterface config */ + push_outer_et = + clib_net_to_host_u16 (si->sub.eth.flags.dot1ad ? ETHERNET_TYPE_DOT1AD : + ETHERNET_TYPE_VLAN); + push_inner_et = clib_net_to_host_u16 (ETHERNET_TYPE_VLAN); + vtr_tag1 = clib_net_to_host_u16 (si->sub.eth.outer_vlan_id); + vtr_tag2 = clib_net_to_host_u16 (si->sub.eth.inner_vlan_id); + + if (out_config->push_bytes == 4) + { + out_config->tags[1].priority_cfi_and_id = vtr_tag1; + out_config->tags[1].type = push_outer_et; + } + else if (out_config->push_bytes == 8) + { + out_config->tags[0].priority_cfi_and_id = vtr_tag1; + out_config->tags[0].type = push_outer_et; + out_config->tags[1].priority_cfi_and_id = vtr_tag2; + out_config->tags[1].type = push_inner_et; + } + + /* set the interface enable flags */ + enable = (vtr_op != L2_VTR_DISABLED); + config->out_vtr_flag = (u8) enable; + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VTR, enable); + /* output vtr enable is checked explicitly in l2_output */ + +done: + return error; +} + +/** + * Get vtag tag rewrite on the given interface. + * Return 1 if there is an error, 0 if ok + */ +u32 +l2vtr_get (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 sw_if_index, u32 * vtr_op, u32 * push_dot1q, /* ethertype of first pushed tag is dot1q/dot1ad */ + u32 * vtr_tag1, /* first pushed tag */ + u32 * vtr_tag2) /* second pushed tag */ +{ + vnet_hw_interface_t *hi; + u32 error = 0; + vtr_config_t *in_config; + + if (!vtr_op || !push_dot1q || !vtr_tag1 || !vtr_tag2) + { + clib_warning ("invalid arguments"); + error = VNET_API_ERROR_INVALID_ARGUMENT; + goto done; + } + + *vtr_op = L2_VTR_DISABLED; + *vtr_tag1 = 0; + *vtr_tag2 = 0; + *push_dot1q = 0; + + hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); + if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) + { + /* non-ethernet interface */ + goto done; + } + + if (sw_if_index >= vec_len (l2output_main.configs)) + { + /* no specific config (return disabled) */ + goto done; + } + + /* Get the config for this interface */ + in_config = + &(vec_elt_at_index (l2output_main.configs, sw_if_index)->input_vtr); + + /* DISABLED */ + if (in_config->push_and_pop_bytes == 0) + { + goto done; + } + + /* find out vtr_op */ + switch (in_config->pop_bytes) + { + case 0: + switch (in_config->push_bytes) + { + case 0: + /* DISABLED */ + goto done; + case 4: + *vtr_op = L2_VTR_PUSH_1; + *vtr_tag1 = + clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = + (ETHERNET_TYPE_VLAN == + clib_host_to_net_u16 (in_config->tags[1].type)); + break; + case 8: + *vtr_op = L2_VTR_PUSH_2; + *vtr_tag1 = + clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id); + *vtr_tag2 = + clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = + (ETHERNET_TYPE_VLAN == + clib_host_to_net_u16 (in_config->tags[0].type)); + break; + default: + clib_warning ("invalid push_bytes count: %d", + in_config->push_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + break; + + case 4: + switch (in_config->push_bytes) + { + case 0: + *vtr_op = L2_VTR_POP_1; + break; + case 4: + *vtr_op = L2_VTR_TRANSLATE_1_1; + *vtr_tag1 = + clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = + (ETHERNET_TYPE_VLAN == + clib_host_to_net_u16 (in_config->tags[1].type)); + break; + case 8: + *vtr_op = L2_VTR_TRANSLATE_1_2; + *vtr_tag1 = + clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id); + *vtr_tag2 = + clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = + (ETHERNET_TYPE_VLAN == + clib_host_to_net_u16 (in_config->tags[0].type)); + break; + default: + clib_warning ("invalid push_bytes count: %d", + in_config->push_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + break; + + case 8: + switch (in_config->push_bytes) + { + case 0: + *vtr_op = L2_VTR_POP_2; + break; + case 4: + *vtr_op = L2_VTR_TRANSLATE_2_1; + *vtr_tag1 = + clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = + (ETHERNET_TYPE_VLAN == + clib_host_to_net_u16 (in_config->tags[1].type)); + break; + case 8: + *vtr_op = L2_VTR_TRANSLATE_2_2; + *vtr_tag1 = + clib_host_to_net_u16 (in_config->tags[0].priority_cfi_and_id); + *vtr_tag2 = + clib_host_to_net_u16 (in_config->tags[1].priority_cfi_and_id); + *push_dot1q = + (ETHERNET_TYPE_VLAN == + clib_host_to_net_u16 (in_config->tags[0].type)); + break; + default: + clib_warning ("invalid push_bytes count: %d", + in_config->push_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + break; + + default: + clib_warning ("invalid pop_bytes count: %d", in_config->pop_bytes); + error = VNET_API_ERROR_UNEXPECTED_INTF_STATE; + goto done; + } + +done: + return error; +} + +/** + * Set subinterface vtr enable/disable. + * The CLI format is: + * set interface l2 tag-rewrite <interface> [disable | pop 1 | pop 2 | push {dot1q|dot1ad} <tag> [<tag>]] + * + * "push" can also be replaced by "translate-{1|2}-{1|2}" + */ +static clib_error_t * +int_l2_vtr (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + u32 vtr_op; + u32 push_dot1q = 0; + u32 tag1 = 0, tag2 = 0; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + vtr_op = L2_VTR_DISABLED; + + if (unformat (input, "disable")) + { + vtr_op = L2_VTR_DISABLED; + } + else if (unformat (input, "pop 1")) + { + vtr_op = L2_VTR_POP_1; + } + else if (unformat (input, "pop 2")) + { + vtr_op = L2_VTR_POP_2; + + } + else if (unformat (input, "push dot1q %d %d", &tag1, &tag2)) + { + vtr_op = L2_VTR_PUSH_2; + push_dot1q = 1; + } + else if (unformat (input, "push dot1ad %d %d", &tag1, &tag2)) + { + vtr_op = L2_VTR_PUSH_2; + + } + else if (unformat (input, "push dot1q %d", &tag1)) + { + vtr_op = L2_VTR_PUSH_1; + push_dot1q = 1; + } + else if (unformat (input, "push dot1ad %d", &tag1)) + { + vtr_op = L2_VTR_PUSH_1; + + } + else if (unformat (input, "translate 1-1 dot1q %d", &tag1)) + { + vtr_op = L2_VTR_TRANSLATE_1_1; + push_dot1q = 1; + } + else if (unformat (input, "translate 1-1 dot1ad %d", &tag1)) + { + vtr_op = L2_VTR_TRANSLATE_1_1; + + } + else if (unformat (input, "translate 2-1 dot1q %d", &tag1)) + { + vtr_op = L2_VTR_TRANSLATE_2_1; + push_dot1q = 1; + } + else if (unformat (input, "translate 2-1 dot1ad %d", &tag1)) + { + vtr_op = L2_VTR_TRANSLATE_2_1; + + } + else if (unformat (input, "translate 2-2 dot1q %d %d", &tag1, &tag2)) + { + vtr_op = L2_VTR_TRANSLATE_2_2; + push_dot1q = 1; + } + else if (unformat (input, "translate 2-2 dot1ad %d %d", &tag1, &tag2)) + { + vtr_op = L2_VTR_TRANSLATE_2_2; + + } + else if (unformat (input, "translate 1-2 dot1q %d %d", &tag1, &tag2)) + { + vtr_op = L2_VTR_TRANSLATE_1_2; + push_dot1q = 1; + } + else if (unformat (input, "translate 1-2 dot1ad %d %d", &tag1, &tag2)) + { + vtr_op = L2_VTR_TRANSLATE_1_2; + + } + else + { + error = + clib_error_return (0, + "expecting [disable | pop 1 | pop 2 | push {dot1q|dot1ah} <tag> [<tag>]\n" + " | translate {1|2}-{1|2} {dot1q|dot1ah} <tag> [<tag>]] but got `%U'", + format_unformat_error, input); + goto done; + } + + if (l2vtr_configure (vm, vnm, sw_if_index, vtr_op, push_dot1q, tag1, tag2)) + { + error = + clib_error_return (0, + "vlan tag rewrite is not compatible with interface"); + goto done; + } + +done: + return error; +} + +/*? + * VLAN tag rewrite provides the ability to change the VLAN tags on a packet. + * Existing tags can be popped, new tags can be pushed, and existing tags can + * be swapped with new tags. The rewrite feature is attached to a subinterface + * as input and output operations. The input operation is explicitly configured. + * The output operation is the symmetric opposite and is automatically derived + * from the input operation. + * + * <b>POP:</b> For pop operations, the subinterface encapsulation (the vlan + * tags specified when it was created) must have at least the number of popped + * tags. e.g. the \"pop 2\" operation would be rejected on a single-vlan interface. + * The output tag-rewrite operation for pops is to push the specified number of + * vlan tags onto the packet. The pushed tag values are the ones in the + * subinterface encapsulation. + * + * <b>PUSH:</b> For push operations, the ethertype is also specified. The + * output tag-rewrite operation for pushes is to pop the same number of tags + * off the packet. If the packet doesn't have enough tags it is dropped. + * + * + * @cliexpar + * @parblock + * By default a subinterface has no tag-rewrite. To return a subinterface to + * this state use: + * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 disable} + * + * To pop vlan tags off packets received from a subinterface, use: + * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 pop 1} + * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 pop 2} + * + * To push one or two vlan tags onto packets received from an interface, use: + * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 push dot1q 100} + * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 push dot1ad 100 150} + * + * Tags can also be translated, which is basically a combination of a pop and push. + * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 1-1 dot1ad 100} + * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 2-2 dot1ad 100 150} + * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 1-2 dot1q 100} + * @cliexcmd{set interface l2 tag-rewrite GigabitEthernet0/8/0.200 translate 2-1 dot1q 100 150} + * + * To display the VLAN Tag settings, show the associate bridge-domain: + * @cliexstart{show bridge-domain 200 detail} + * ID Index Learning U-Forwrd UU-Flood Flooding ARP-Term BVI-Intf + * 200 1 on on on on off N/A + * + * Interface Index SHG BVI VLAN-Tag-Rewrite + * GigabitEthernet0/8/0.200 5 0 - trans-1-1 dot1ad 100 + * GigabitEthernet0/9/0.200 4 0 - none + * GigabitEthernet0/a/0.200 6 0 - none + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = { + .path = "set interface l2 tag-rewrite", + .short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]", + .function = int_l2_vtr, +}; +/* *INDENT-ON* */ + +/** + * Get pbb tag rewrite on the given interface. + * Return 1 if there is an error, 0 if ok + */ +u32 +l2pbb_get (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 sw_if_index, + u32 * vtr_op, u16 * outer_tag, ethernet_header_t * eth_hdr, + u16 * b_vlanid, u32 * i_sid) +{ + u32 error = 1; + ptr_config_t *in_config; + + if (!vtr_op || !outer_tag || !b_vlanid || !i_sid) + { + clib_warning ("invalid arguments"); + error = VNET_API_ERROR_INVALID_ARGUMENT; + goto done; + } + + *vtr_op = L2_VTR_DISABLED; + *outer_tag = 0; + *b_vlanid = 0; + *i_sid = 0; + + if (sw_if_index >= vec_len (l2output_main.configs)) + { + /* no specific config (return disabled) */ + goto done; + } + + /* Get the config for this interface */ + in_config = + &(vec_elt_at_index (l2output_main.configs, sw_if_index)->input_pbb_vtr); + + if (in_config->push_and_pop_bytes == 0) + { + /* DISABLED */ + goto done; + } + else + { + if (in_config->pop_bytes && in_config->push_bytes) + *vtr_op = L2_VTR_TRANSLATE_2_1; + else if (in_config->pop_bytes) + *vtr_op = L2_VTR_POP_2; + else if (in_config->push_bytes) + *vtr_op = L2_VTR_PUSH_2; + + clib_memcpy (ð_hdr->dst_address, in_config->macs_tags.b_dst_address, + sizeof (eth_hdr->dst_address)); + clib_memcpy (ð_hdr->src_address, in_config->macs_tags.b_src_address, + sizeof (eth_hdr->src_address)); + + *b_vlanid = + clib_host_to_net_u16 (in_config->macs_tags.priority_dei_id) & 0xFFF; + *i_sid = + clib_host_to_net_u32 (in_config->macs_tags. + priority_dei_uca_res_sid) & 0xFFFFF; + error = 0; + } +done: + return error; +} + +/** + * Set subinterface pbb vtr enable/disable. + * The CLI format is: + * set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]] + */ +static clib_error_t * +int_l2_pbb_vtr (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index, tmp; + u32 vtr_op = L2_VTR_DISABLED; + u32 outer_tag = 0; + u8 dmac[6]; + u8 smac[6]; + u8 dmac_set = 0, smac_set = 0; + u16 b_vlanid = 0; + u32 s_id = ~0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat_user + (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (input, "disable")) + vtr_op = L2_VTR_DISABLED; + else if (vtr_op == L2_VTR_DISABLED && unformat (input, "pop")) + vtr_op = L2_VTR_POP_2; + else if (vtr_op == L2_VTR_DISABLED && unformat (input, "push")) + vtr_op = L2_VTR_PUSH_2; + else if (vtr_op == L2_VTR_DISABLED + && unformat (input, "translate_pbb_stag %d", &outer_tag)) + vtr_op = L2_VTR_TRANSLATE_2_1; + else if (unformat (input, "dmac %U", unformat_ethernet_address, dmac)) + dmac_set = 1; + else if (unformat (input, "smac %U", unformat_ethernet_address, smac)) + smac_set = 1; + else if (unformat (input, "b_vlanid %d", &tmp)) + b_vlanid = tmp; + else if (unformat (input, "s_id %d", &s_id)) + ; + else + { + error = clib_error_return (0, + "expecting [disable | pop | push | translate_pbb_stag <outer_tag>\n" + "dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]"); + goto done; + } + } + + if ((vtr_op == L2_VTR_PUSH_2 || vtr_op == L2_VTR_TRANSLATE_2_1) + && (!dmac_set || !smac_set || s_id == ~0)) + { + error = clib_error_return (0, + "expecting dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]"); + goto done; + } + + if (l2pbb_configure + (vm, vnm, sw_if_index, vtr_op, dmac, smac, b_vlanid, s_id, outer_tag)) + { + error = + clib_error_return (0, + "pbb tag rewrite is not compatible with interface"); + goto done; + } + +done: + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (int_l2_pbb_vtr_cli, static) = { + .path = "set interface l2 pbb-tag-rewrite", + .short_help = "set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]", + .function = int_l2_pbb_vtr, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_vtr.h b/src/vnet/l2/l2_vtr.h new file mode 100644 index 00000000..0aea618e --- /dev/null +++ b/src/vnet/l2/l2_vtr.h @@ -0,0 +1,281 @@ +/* + * l2_vtr.h : layer 2 vlan tag rewrite processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_l2_vtr_h +#define included_vnet_l2_vtr_h + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/l2/l2_vtr.h> + +/* VTR config options for API and CLI support */ +typedef enum +{ + L2_VTR_DISABLED, + L2_VTR_PUSH_1, + L2_VTR_PUSH_2, + L2_VTR_POP_1, + L2_VTR_POP_2, + L2_VTR_TRANSLATE_1_1, + L2_VTR_TRANSLATE_1_2, + L2_VTR_TRANSLATE_2_1, + L2_VTR_TRANSLATE_2_2 +} l2_vtr_op_t; + +/** + * Per-interface vlan tag rewrite configuration + * There will be one instance of this struct for each sw_if_index + * for both input vtr and output vtr + */ +typedef struct +{ + union + { + /* + * Up to two vlan tags to push. + * if there is only one vlan tag to push, it is in tags[1]. + */ + ethernet_vlan_header_tv_t tags[2]; + u64 raw_tags; + }; + + union + { + struct + { + u8 push_bytes; /* number of bytes to push for up to 2 vlans (0,4,8) */ + u8 pop_bytes; /* number of bytes to pop for up to 2 vlans (0,4,8) */ + }; + u16 push_and_pop_bytes; /* if 0 then the feature is disabled */ + }; +} vtr_config_t; + + +/** + * Perform the configured tag rewrite on the packet. + * Return 0 if ok, 1 if packet should be dropped (e.g. tried to pop + * too many tags) + */ +always_inline u32 +l2_vtr_process (vlib_buffer_t * b0, vtr_config_t * config) +{ + u64 temp_8; + u32 temp_4; + u8 *eth; + + eth = vlib_buffer_get_current (b0); + + /* copy the 12B dmac and smac to a temporary location */ + temp_8 = *((u64 *) eth); + temp_4 = *((u32 *) (eth + 8)); + + /* adjust for popped tags */ + eth += config->pop_bytes; + + /* if not enough tags to pop then drop packet */ + if (PREDICT_FALSE ((vnet_buffer (b0)->l2.l2_len - 12) < config->pop_bytes)) + { + return 1; + } + + /* copy the 2 new tags to the start of the packet */ + *((u64 *) (eth + 12 - 8)) = config->raw_tags; + + /* TODO: set cos bits */ + + /* adjust for pushed tags: */ + eth -= config->push_bytes; + + /* copy the 12 dmac and smac back to the packet */ + *((u64 *) eth) = temp_8; + *((u32 *) (eth + 8)) = temp_4; + + /* Update l2_len */ + vnet_buffer (b0)->l2.l2_len += + (word) config->push_bytes - (word) config->pop_bytes; + + /* Update vlan tag count */ + ethernet_buffer_adjust_vlan_count_by_bytes (b0, + (word) config->push_bytes - + (word) config->pop_bytes); + + /* Update packet len */ + vlib_buffer_advance (b0, + (word) config->pop_bytes - (word) config->push_bytes); + + return 0; +} + + +/* + * Perform the egress pre-vlan tag rewrite EFP Filter check. + * The post-vlan tag rewrite check is a separate graph node. + * + * This check insures that a packet being output to an interface + * (before output vtr is performed) has vlan tags that match those + * on a packet received from that interface (after vtr has been performed). + * This means verifying that any tags pushed by input vtr are present + * on the packet. + * + * Return 0 if ok, 1 if packet should be dropped. + * This function should be passed the input vtr config for the interface. + */ +always_inline u8 +l2_efp_filter_process (vlib_buffer_t * b0, vtr_config_t * in_config) +{ + u8 *eth; + u64 packet_tags; + u64 tag_mask; + + eth = vlib_buffer_get_current (b0); + + /* + * If there are 2 tags pushed, they must match config->tags[0] and + * config->tags[1]. + * If there is one tag pushed, it must match config->tag[1]. + * If there are 0 tags pushed, the check passes. + */ + + /* mask for two vlan id and ethertypes, no cos bits */ + tag_mask = clib_net_to_host_u64 (0xFFFF0FFFFFFF0FFF); + /* mask for one vlan id and ethertype, no cos bits */ + tag_mask = + (in_config->push_bytes == + 4) ? clib_net_to_host_u64 (0xFFFF0FFF) : tag_mask; + /* mask for always match */ + tag_mask = (in_config->push_bytes == 0) ? 0 : tag_mask; + + /* + * Read 8B from the packet, getting the proper set of vlan tags + * For 0 push bytes, the address doesn't matter since the mask + * clears the data to 0. + */ + packet_tags = *((u64 *) (eth + 4 + in_config->push_bytes)); + + /* Check if the packet tags match the configured tags */ + return (packet_tags & tag_mask) != in_config->raw_tags; +} + +typedef struct +{ + union + { + ethernet_pbb_header_t macs_tags; + struct + { + u64 data1; + u64 data2; + u16 data3; + u32 data4; + } raw_data; + }; + union + { + struct + { + u8 push_bytes; /* number of bytes to push pbb tags */ + u8 pop_bytes; /* number of bytes to pop pbb tags */ + }; + u16 push_and_pop_bytes; /* if 0 then the feature is disabled */ + }; +} ptr_config_t; + +always_inline u32 +l2_pbb_process (vlib_buffer_t * b0, ptr_config_t * config) +{ + u8 *eth = vlib_buffer_get_current (b0); + + if (config->pop_bytes > 0) + { + ethernet_pbb_header_packed_t *ph = (ethernet_pbb_header_packed_t *) eth; + + // drop packet without PBB header or with wrong I-tag or B-tag + if (clib_net_to_host_u16 (ph->priority_dei_id) != + clib_net_to_host_u16 (config->macs_tags.priority_dei_id) + || clib_net_to_host_u32 (ph->priority_dei_uca_res_sid) != + clib_net_to_host_u32 (config->macs_tags.priority_dei_uca_res_sid)) + return 1; + + eth += config->pop_bytes; + } + + if (config->push_bytes > 0) + { + eth -= config->push_bytes; + // copy the B-DA (6B), B-SA (6B), B-TAG (4B), I-TAG (6B) + *((u64 *) eth) = config->raw_data.data1; + *((u64 *) (eth + 8)) = config->raw_data.data2; + *((u16 *) (eth + 16)) = config->raw_data.data3; + *((u32 *) (eth + 18)) = config->raw_data.data4; + } + + /* Update l2_len */ + vnet_buffer (b0)->l2.l2_len += + (word) config->push_bytes - (word) config->pop_bytes; + /* Update packet len */ + vlib_buffer_advance (b0, + (word) config->pop_bytes - (word) config->push_bytes); + + return 0; +} + +u32 l2pbb_configure (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, u32 sw_if_index, u32 vtr_op, + u8 * b_dmac, u8 * b_smac, + u16 b_vlanid, u32 i_sid, u16 vlan_outer_tag); + +/** + * Configure vtag tag rewrite on the given interface. + * Return 1 if there is an error, 0 if ok + */ +u32 l2vtr_configure (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 vtr_op, u32 push_dot1q, u32 vtr_tag1, u32 vtr_tag2); + +/** + * Get vtag tag rewrite on the given interface. + * Return 1 if there is an error, 0 if ok + */ +u32 l2vtr_get (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 * vtr_op, + u32 * push_dot1q, u32 * vtr_tag1, u32 * vtr_tag2); + +/** + * Get pbb tag rewrite on the given interface. + * Return 1 if there is an error, 0 if ok + */ +u32 l2pbb_get (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index, + u32 * vtr_op, + u16 * outer_tag, + ethernet_header_t * eth_hdr, u16 * b_vlanid, u32 * i_sid); + +#endif /* included_vnet_l2_vtr_h */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_xcrw.c b/src/vnet/l2/l2_xcrw.c new file mode 100644 index 00000000..d08a5d8f --- /dev/null +++ b/src/vnet/l2/l2_xcrw.c @@ -0,0 +1,607 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/l2/l2_xcrw.h> + +/** + * @file + * General L2 / L3 cross-connect, used to set up + * "L2 interface <--> your-favorite-tunnel-encap" tunnels. + * + * We set up a typical L2 cross-connect or (future) bridge + * to hook L2 interface(s) up to the L3 stack in arbitrary ways. + * + * Each l2_xcrw adjacency specifies 3 things: + * + * 1. The next graph node (presumably in the L3 stack) to + * process the (L2 -> L3) packet + * + * 2. A new value for vnet_buffer(b)->sw_if_index[VLIB_TX] + * (i.e. a lookup FIB index), + * + * 3. A rewrite string to apply. + * + * Example: to cross-connect an L2 interface or (future) bridge + * to an mpls-o-gre tunnel, set up the L2 rewrite string as shown in + * mpls_gre_rewrite, and use "mpls-post-rewrite" to fix the + * GRE IP header checksum and length fields. + */ + +typedef struct +{ + u32 next_index; + u32 tx_fib_index; +} l2_xcrw_trace_t; + +/* packet trace format function */ +static u8 * +format_l2_xcrw_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_xcrw_trace_t *t = va_arg (*args, l2_xcrw_trace_t *); + + s = format (s, "L2_XCRW: next index %d tx_fib_index %d", + t->next_index, t->tx_fib_index); + return s; +} + +l2_xcrw_main_t l2_xcrw_main; + +static vlib_node_registration_t l2_xcrw_node; + +static char *l2_xcrw_error_strings[] = { +#define _(sym,string) string, + foreach_l2_xcrw_error +#undef _ +}; + +static uword +l2_xcrw_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2_xcrw_next_t next_index; + l2_xcrw_main_t *xcm = &l2_xcrw_main; + vlib_node_t *n = vlib_get_node (vm, l2_xcrw_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + l2_xcrw_adjacency_t *adj0, *adj1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0); + adj1 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index1); + + next0 = adj0->rewrite_header.next_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + adj0->rewrite_header.sw_if_index; + + next1 = adj1->rewrite_header.next_index; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = + adj1->rewrite_header.sw_if_index; + + em->counters[node_counter_base_index + next1]++; + + if (PREDICT_TRUE (next0 > 0)) + { + u8 *h0 = vlib_buffer_get_current (b0); + vnet_rewrite_one_header (adj0[0], h0, + adj0->rewrite_header.data_bytes); + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++; + } + + if (PREDICT_TRUE (next1 > 0)) + { + u8 *h1 = vlib_buffer_get_current (b1); + vnet_rewrite_one_header (adj1[0], h1, + adj1->rewrite_header.data_bytes); + vlib_buffer_advance (b1, -adj1->rewrite_header.data_bytes); + em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++; + } + + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_xcrw_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->tx_fib_index = adj0->rewrite_header.sw_if_index; + } + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_xcrw_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->next_index = next1; + t->tx_fib_index = adj1->rewrite_header.sw_if_index; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + l2_xcrw_adjacency_t *adj0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + adj0 = vec_elt_at_index (xcm->adj_by_sw_if_index, sw_if_index0); + + next0 = adj0->rewrite_header.next_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + adj0->rewrite_header.sw_if_index; + + if (PREDICT_TRUE (next0 > 0)) + { + u8 *h0 = vlib_buffer_get_current (b0); + vnet_rewrite_one_header (adj0[0], h0, + adj0->rewrite_header.data_bytes); + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + em->counters[node_counter_base_index + L2_XCRW_ERROR_FWD]++; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2_xcrw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->tx_fib_index = adj0->rewrite_header.sw_if_index; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_xcrw_node, static) = { + .function = l2_xcrw_node_fn, + .name = "l2-xcrw", + .vector_size = sizeof (u32), + .format_trace = format_l2_xcrw_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2_xcrw_error_strings), + .error_strings = l2_xcrw_error_strings, + + .n_next_nodes = L2_XCRW_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2_XCRW_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2_xcrw_node, l2_xcrw_node_fn) + clib_error_t *l2_xcrw_init (vlib_main_t * vm) +{ + l2_xcrw_main_t *mp = &l2_xcrw_main; + + mp->vlib_main = vm; + mp->vnet_main = &vnet_main; + mp->tunnel_index_by_l2_sw_if_index = hash_create (0, sizeof (uword)); + + return 0; +} + +VLIB_INIT_FUNCTION (l2_xcrw_init); + +static uword +dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +static u8 * +format_xcrw_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "xcrw%d", dev_instance); +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (xcrw_device_class,static) = { + .name = "Xcrw", + .format_device_name = format_xcrw_name, + .tx_function = dummy_interface_tx, +}; +/* *INDENT-ON* */ + +/* Create a sham tunnel interface and return its sw_if_index */ +static u32 +create_xcrw_interface (vlib_main_t * vm) +{ + vnet_main_t *vnm = vnet_get_main (); + static u32 instance; + u8 address[6]; + u32 hw_if_index; + vnet_hw_interface_t *hi; + u32 sw_if_index; + + /* mac address doesn't really matter */ + memset (address, 0, sizeof (address)); + address[2] = 0x12; + + /* can returns error iff phy != 0 */ + (void) ethernet_register_interface + (vnm, xcrw_device_class.index, instance++, address, &hw_if_index, + /* flag change */ 0); + + hi = vnet_get_hw_interface (vnm, hw_if_index); + sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + /* Output to the sham tunnel invokes the encap node */ + hi->output_node_index = l2_xcrw_node.index; + + return sw_if_index; +} + +int +vnet_configure_l2_xcrw (vlib_main_t * vm, vnet_main_t * vnm, + u32 l2_sw_if_index, u32 tx_fib_index, + u8 * rewrite, u32 next_node_index, int is_add) +{ + l2_xcrw_main_t *xcm = &l2_xcrw_main; + l2_xcrw_adjacency_t *a; + l2_xcrw_tunnel_t *t; + uword *p; + + if (is_add) + { + + pool_get (xcm->tunnels, t); + + /* No interface allocated? Do it. Otherwise, set admin up */ + if (t->tunnel_sw_if_index == 0) + t->tunnel_sw_if_index = create_xcrw_interface (vm); + else + vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + t->l2_sw_if_index = l2_sw_if_index; + + vec_validate (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + + a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + memset (a, 0, sizeof (*a)); + + a->rewrite_header.sw_if_index = tx_fib_index; + + /* + * Add or find a dynamic disposition for the successor node, + * e.g. so we can ship pkts to mpls_post_rewrite... + */ + a->rewrite_header.next_index = + vlib_node_add_next (vm, l2_xcrw_node.index, next_node_index); + + if (vec_len (rewrite)) + vnet_rewrite_set_data (a[0], rewrite, vec_len (rewrite)); + + set_int_l2_mode (vm, vnm, MODE_L2_XC, t->l2_sw_if_index, 0, 0, 0, + t->tunnel_sw_if_index); + hash_set (xcm->tunnel_index_by_l2_sw_if_index, + t->l2_sw_if_index, t - xcm->tunnels); + return 0; + } + else + { + p = hash_get (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index); + if (p == 0) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + t = pool_elt_at_index (xcm->tunnels, p[0]); + + a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + /* Reset adj to drop traffic */ + memset (a, 0, sizeof (*a)); + + set_int_l2_mode (vm, vnm, MODE_L3, t->l2_sw_if_index, 0, 0, 0, 0); + + vnet_sw_interface_set_flags (vnm, t->tunnel_sw_if_index, 0 /* down */ ); + + hash_unset (xcm->tunnel_index_by_l2_sw_if_index, l2_sw_if_index); + pool_put (xcm->tunnels, t); + } + return 0; +} + + +static clib_error_t * +set_l2_xcrw_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + int is_add = 1; + int is_ipv6 = 0; /* for fib id -> fib index mapping */ + u32 tx_fib_id = ~0; + u32 tx_fib_index = ~0; + u32 next_node_index = ~0; + u32 l2_sw_if_index; + u8 *rw = 0; + vnet_main_t *vnm = vnet_get_main (); + int rv; + clib_error_t *error = NULL; + + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (!unformat (line_input, "%U", + unformat_vnet_sw_interface, vnm, &l2_sw_if_index)) + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "next %U", + unformat_vlib_node, vm, &next_node_index)) + ; + else if (unformat (line_input, "tx-fib-id %d", &tx_fib_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "ipv6")) + is_ipv6 = 1; + else if (unformat (line_input, "rw %U", unformat_hex_string, &rw)); + else + break; + } + + if (next_node_index == ~0) + { + error = clib_error_return (0, "next node not specified"); + goto done; + } + + if (tx_fib_id != ~0) + { + uword *p; + + if (is_ipv6) + p = hash_get (ip6_main.fib_index_by_table_id, tx_fib_id); + else + p = hash_get (ip4_main.fib_index_by_table_id, tx_fib_id); + + if (p == 0) + { + error = + clib_error_return (0, "nonexistent tx_fib_id %d", tx_fib_id); + goto done; + } + + tx_fib_index = p[0]; + } + + rv = vnet_configure_l2_xcrw (vm, vnm, l2_sw_if_index, tx_fib_index, + rw, next_node_index, is_add); + + switch (rv) + { + + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + error = clib_error_return (0, "%U not cross-connected", + format_vnet_sw_if_index_name, + vnm, l2_sw_if_index); + goto done; + + default: + error = clib_error_return (0, "vnet_configure_l2_xcrw returned %d", rv); + goto done; + } + +done: + vec_free (rw); + unformat_free (line_input); + + return error; +} + +/*? + * Add or delete a Layer 2 to Layer 3 rewrite cross-connect. This is + * used to hook Layer 2 interface(s) up to the Layer 3 stack in + * arbitrary ways. For example, cross-connect an L2 interface or + * (future) bridge to an mpls-o-gre tunnel. Set up the L2 rewrite + * string as shown in mpls_gre_rewrite, and use \"mpls-post-rewrite\" + * to fix the GRE IP header checksum and length fields. + * + * @cliexpar + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = { + .path = "set interface l2 xcrw", + .short_help = + "set interface l2 xcrw <interface> next <node-name>\n" + " [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>", + .function = set_l2_xcrw_command_fn, +}; +/* *INDENT-ON* */ + +static u8 * +format_l2xcrw (u8 * s, va_list * args) +{ + vnet_main_t *vnm = va_arg (*args, vnet_main_t *); + l2_xcrw_tunnel_t *t = va_arg (*args, l2_xcrw_tunnel_t *); + l2_xcrw_main_t *xcm = &l2_xcrw_main; + vlib_main_t *vm = vlib_get_main (); + l2_xcrw_adjacency_t *a; + u8 *rewrite_string; + + if (t == 0) + { + s = format (s, "%-25s%s", "L2 interface", "Tunnel Details"); + return s; + } + + s = format (s, "%-25U %U ", + format_vnet_sw_if_index_name, vnm, t->l2_sw_if_index, + format_vnet_sw_if_index_name, vnm, t->tunnel_sw_if_index); + + a = vec_elt_at_index (xcm->adj_by_sw_if_index, t->l2_sw_if_index); + + s = format (s, "next %U ", + format_vlib_next_node_name, vm, l2_xcrw_node.index, + a->rewrite_header.next_index); + + if (a->rewrite_header.sw_if_index != ~0) + s = format (s, "tx fib index %d ", a->rewrite_header.sw_if_index); + + if (a->rewrite_header.data_bytes) + { + rewrite_string = (u8 *) (a + 1); + rewrite_string -= a->rewrite_header.data_bytes; + s = format (s, "rewrite data: %U ", + format_hex_bytes, rewrite_string, + a->rewrite_header.data_bytes); + } + + s = format (s, "\n"); + + return s; +} + + +static clib_error_t * +show_l2xcrw_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + l2_xcrw_main_t *xcm = &l2_xcrw_main; + l2_xcrw_tunnel_t *t; + + if (pool_elts (xcm->tunnels) == 0) + { + vlib_cli_output (vm, "No L2 / L3 rewrite cross-connects configured"); + return 0; + } + + vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0); + + /* *INDENT-OFF* */ + pool_foreach (t, xcm->tunnels, + ({ + vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t); + })); + /* *INDENT-ON* */ + + return 0; +} + +/*? + * Display a Layer 2 to Layer 3 rewrite cross-connect. This is used to + * hook Layer 2 interface(s) up to the Layer 3 stack in arbitrary ways. + * + * @todo This is incomplete. This needs a detailed description and a + * practical example. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_l2xcrw_command, static) = { + .path = "show l2xcrw", + .short_help = "show l2xcrw", + .function = show_l2xcrw_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_xcrw.h b/src/vnet/l2/l2_xcrw.h new file mode 100644 index 00000000..ca80aae9 --- /dev/null +++ b/src/vnet/l2/l2_xcrw.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_l2_xcrw_h__ +#define __included_l2_xcrw_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/api_errno.h> +#include <vnet/ethernet/ethernet.h> + +typedef struct +{ + /* + * Let: rewrite_header.sw_if_index = tx_fib_index or ~0. + * rewrite_header.next_index = L2_XCRW_NEXT_XXX + */ + vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); +} l2_xcrw_adjacency_t; + +typedef struct +{ + /* L2 interface */ + u32 l2_sw_if_index; + + /* Tunnel interface */ + u32 tunnel_sw_if_index; /* This field remains set in freed pool elts */ + +} l2_xcrw_tunnel_t; + +typedef struct +{ + u32 cached_next_index; + + /* Vector of cross-connect rewrites */ + l2_xcrw_adjacency_t *adj_by_sw_if_index; + + /* Pool of xcrw tunnels */ + l2_xcrw_tunnel_t *tunnels; + + /* Tunnel index by tunnel sw_if_index */ + uword *tunnel_index_by_l2_sw_if_index; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2_xcrw_main_t; + +typedef enum +{ + L2_XCRW_NEXT_DROP, + L2_XCRW_N_NEXT, +} l2_xcrw_next_t; + +#define foreach_l2_xcrw_error \ +_(DROP, "Packets dropped") \ +_(FWD, "Packets forwarded") + +typedef enum +{ +#define _(sym,str) L2_XCRW_ERROR_##sym, + foreach_l2_xcrw_error +#undef _ + L2_XCRW_N_ERROR, +} l2_xcrw_error_t; + +#endif /* __included_l2_xcrw_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |