From 405e41b50e336dccfdeeafae93bf4453774ecfec Mon Sep 17 00:00:00 2001 From: John Lo Date: Sat, 23 Apr 2016 15:14:12 -0400 Subject: Improve mechanism for using loopback interface as BVI for BDs When loopback interface is configured as BVI, instead of changing its output node from loopN-output to l2-input, the loopN-output node is now kept while its next tx node is changed from ethernet-input to l2-input. The packet setup previously done in bvi_to_l2 as part of l2-input is now performed in the loop output node. This change adds an extra node in the BVI output path but provides the following improvements: 1. IP address/route created on loopback prior to it being configured as BVI will still work properly. The requirement to (re)configure IP/route on loopback after it is configured as BVI is removed. 2. The output stats for loopback interfaces are always provided irrespective of their BVI configuration. 3. The loopback-BVI output stats can be batch updated outside the packet loop in output node, instead of per packet update in l2-input node, making l2-input node more efficient for BVI packets. 4. Restore original node property as implemented in node.c function vlib_node_add_next_with_slot() where next node indices stored in next slots of each node will remain unique. 5. Packet trace for BVI output includes loopN output node which provides useful packet data. Change-Id: I7f5bc72ef953a367363a179088210596881f9e73 Signed-off-by: John Lo --- vlib/vlib/node.c | 9 ++++--- vnet/vnet/ethernet/interface.c | 48 +++++++++++++++++++++++++++-------- vnet/vnet/interface_funcs.h | 2 ++ vnet/vnet/l2/l2_bd.h | 3 --- vnet/vnet/l2/l2_bvi.h | 29 --------------------- vnet/vnet/l2/l2_input.c | 57 ++++++++++++++++-------------------------- vnet/vnet/l2/l2_input.h | 2 ++ 7 files changed, 68 insertions(+), 82 deletions(-) diff --git a/vlib/vlib/node.c b/vlib/vlib/node.c index 1df786f0c89..7378701ab4d 100644 --- a/vlib/vlib/node.c +++ b/vlib/vlib/node.c @@ -177,9 +177,10 @@ vlib_node_add_next_with_slot (vlib_main_t * vm, if ((p = hash_get (node->next_slot_by_node, next_node_index))) { - /* Next already exists: use it if slot not specified or the same. */ - if ((slot == ~0) || (slot == p[0])) - return p[0]; + /* Next already exists: slot must match. */ + if (slot != ~0) + ASSERT (slot == p[0]); + return p[0]; } if (slot == ~0) @@ -189,7 +190,7 @@ vlib_node_add_next_with_slot (vlib_main_t * vm, vec_validate (node->n_vectors_by_next_node, slot); node->next_nodes[slot] = next_node_index; - if (!p) hash_set (node->next_slot_by_node, next_node_index, slot); + hash_set (node->next_slot_by_node, next_node_index, slot); vlib_node_runtime_update (vm, node_index, slot); diff --git a/vnet/vnet/ethernet/interface.c b/vnet/vnet/ethernet/interface.c index bb8c75364c8..6bc9f281a15 100644 --- a/vnet/vnet/ethernet/interface.c +++ b/vnet/vnet/ethernet/interface.c @@ -41,6 +41,7 @@ #include #include #include +#include static uword ethernet_set_rewrite (vnet_main_t * vnm, u32 sw_if_index, @@ -255,9 +256,7 @@ ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags) return (u32)~0; } -#define VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT VNET_INTERFACE_TX_N_NEXT - -/* Echo packets back to ethernet input. */ +/* Echo packets back to ethernet/l2-input. */ static uword simulated_ethernet_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -265,9 +264,22 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, { u32 n_left_from, n_left_to_next, n_copy, * from, * to_next; u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; - u32 i; + u32 i, next_node_index, bvi_flag, sw_if_index; + u32 n_pkts = 0, n_bytes = 0; + u32 cpu_index = vm->cpu_index; + vnet_main_t * vnm = vnet_get_main(); + vnet_interface_main_t * im = &vnm->interface_main; + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t *loop_node; vlib_buffer_t * b; + // check tx node index, it is ethernet-input on loopback create + // but can be changed to l2-input if loopback is configured as + // BVI of a BD (Bridge Domain). + loop_node = vec_elt (nm->nodes, node->node_index); + next_node_index = loop_node->next_nodes[next_index]; + bvi_flag = (next_node_index == l2input_node.index)? 1 : 0; + n_left_from = frame->n_vectors; from = vlib_frame_args (frame); @@ -280,16 +292,32 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, clib_memcpy (to_next, from, n_copy * sizeof (from[0])); n_left_to_next -= n_copy; n_left_from -= n_copy; - for (i = 0; i < n_copy; i++) + i = 0; + b = vlib_get_buffer (vm, from[i]); + sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX]; + while (1) { - b = vlib_get_buffer (vm, from[i]); - /* Set up RX and TX indices as if received from a real driver */ - vnet_buffer (b)->sw_if_index[VLIB_RX] = - vnet_buffer (b)->sw_if_index[VLIB_TX]; - vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~0; + // Set up RX and TX indices as if received from a real driver + // unless loopback is used as a BVI. For BVI case, leave TX index + // and update l2_len in packet as required for l2 forwarding path + vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index; + if (bvi_flag) vnet_update_l2_len(b); + else vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~0; + + i++; + n_pkts++; + n_bytes += vlib_buffer_length_in_chain (vm, b); + + if (i < n_copy) b = vlib_get_buffer (vm, from[i]); + else break; } vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + /* increment TX interface stat */ + vlib_increment_combined_counter ( + im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, + cpu_index, sw_if_index, n_pkts, n_bytes); } return n_left_from; diff --git a/vnet/vnet/interface_funcs.h b/vnet/vnet/interface_funcs.h index ab89b94efe2..9d6fe48e23c 100644 --- a/vnet/vnet/interface_funcs.h +++ b/vnet/vnet/interface_funcs.h @@ -204,6 +204,8 @@ typedef enum { VNET_INTERFACE_TX_N_NEXT, } vnet_interface_tx_next_t; +#define VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT VNET_INTERFACE_TX_N_NEXT + typedef enum { VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN, VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED, diff --git a/vnet/vnet/l2/l2_bd.h b/vnet/vnet/l2/l2_bd.h index 9ff0a864c59..034f2b39875 100644 --- a/vnet/vnet/l2/l2_bd.h +++ b/vnet/vnet/l2/l2_bd.h @@ -59,9 +59,6 @@ typedef struct { // set to ~0 if there is no BVI u32 bvi_sw_if_index; - // output node index for bvi interface before it was changed to l2-input - u32 saved_bvi_output_node_index; - // bridge domain id, not to be confused with bd_index u32 bd_id; diff --git a/vnet/vnet/l2/l2_bvi.h b/vnet/vnet/l2/l2_bvi.h index ca5673373fb..b3b20d63104 100644 --- a/vnet/vnet/l2/l2_bvi.h +++ b/vnet/vnet/l2/l2_bvi.h @@ -86,35 +86,6 @@ l2_to_bvi (vlib_main_t * vlib_main, return TO_BVI_ERR_OK; } - -// Prepare a packet that was sent to the BVI interface for L2 processing. - -static_always_inline void -bvi_to_l2 (vlib_main_t * vlib_main, - vnet_main_t * vnet_main, - u32 cpu_index, - vlib_buffer_t * b0, - u32 bvi_sw_if_index) -{ - // Set the input interface to be the BVI interface - vnet_buffer(b0)->sw_if_index[VLIB_RX] = bvi_sw_if_index; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = ~0; - - // Update l2_len in packet which is expected by l2 path, - // including l2 tag push/pop code on output - vnet_update_l2_len(b0); - - // increment BVI TX interface stat - vlib_increment_combined_counter - (vnet_main->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_TX, - cpu_index, - bvi_sw_if_index, - 1, - vlib_buffer_length_in_chain (vlib_main, b0)); -} - - void l2bvi_register_input_type (vlib_main_t * vm, ethernet_type_t type, diff --git a/vnet/vnet/l2/l2_input.c b/vnet/vnet/l2/l2_input.c index ee8f788f60c..870aff68794 100644 --- a/vnet/vnet/l2/l2_input.c +++ b/vnet/vnet/l2/l2_input.c @@ -34,9 +34,6 @@ #include #include -extern clib_error_t * -vnet_per_buffer_interface_output_hw_interface_add_del ( - vnet_main_t * vnm, u32 hw_if_index, u32 is_create); // Feature graph node names static char * l2input_feat_names[] = { @@ -74,8 +71,6 @@ static u8 * format_l2input_trace (u8 * s, va_list * args) l2input_main_t l2input_main; -static vlib_node_registration_t l2input_node; - #define foreach_l2input_error \ _(L2INPUT, "L2 input packets") \ _(DROP, "L2 input drops") @@ -135,7 +130,6 @@ classify_and_dispatch (vlib_main_t * vm, ethernet_header_t * h0; u8 * l3h0; u32 sw_if_index0; - u8 bvi_flg = 0; #define get_u32(addr) ( *((u32 *)(addr)) ) #define get_u16(addr) ( *((u16 *)(addr)) ) @@ -143,19 +137,6 @@ classify_and_dispatch (vlib_main_t * vm, #define STATS_IF_LAYER2_MCAST_INPUT_CNT 1 #define STATS_IF_LAYER2_BCAST_INPUT_CNT 2 - // Check for from-BVI processing - // When we come from ethernet-input, TX is ~0 - if (PREDICT_FALSE (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)) { - // Set up for a from-bvi packet - bvi_to_l2 (vm, - msm->vnet_main, - cpu_index, - b0, - vnet_buffer(b0)->sw_if_index[VLIB_TX]); - bvi_flg = 1; - } - - // The RX interface can be changed by bvi_to_l2() sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; h0 = vlib_buffer_get_current (b0); @@ -217,8 +198,8 @@ classify_and_dispatch (vlib_main_t * vm, // Get config for the input interface config = vec_elt_at_index(msm->configs, sw_if_index0); - // Save split horizon group, use 0 for BVI to make sure not dropped - vnet_buffer(b0)->l2.shg = bvi_flg ? 0 : config->shg; + // Save split horizon group + vnet_buffer(b0)->l2.shg = config->shg; if (config->xconnect) { // Set the output interface @@ -226,6 +207,13 @@ classify_and_dispatch (vlib_main_t * vm, } else { + // Check for from-BVI processing, TX is non-~0 if from BVI loopback + // Set SHG for BVI packets to 0 so it is not dropped for VXLAN tunnels + if (PREDICT_FALSE (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)) { + vnet_buffer(b0)->sw_if_index[VLIB_TX] = ~0; + vnet_buffer(b0)->l2.shg = 0; + } + // Do bridge-domain processing bd_index0 = config->bd_index; // save BD ID for next feature graph nodes @@ -424,7 +412,7 @@ l2input_node_fn (vlib_main_t * vm, } -VLIB_REGISTER_NODE (l2input_node,static) = { +VLIB_REGISTER_NODE (l2input_node) = { .function = l2input_node_fn, .name = "l2-input", .vector_size = sizeof (u32), @@ -536,6 +524,7 @@ u32 set_int_l2_mode (vlib_main_t * vm, l2_flood_member_t member; u64 mac; i32 l2_if_adjust = 0; + u32 slot; hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index); @@ -554,16 +543,15 @@ u32 set_int_l2_mode (vlib_main_t * vm, bd_config->bvi_sw_if_index = ~0; config->bvi = 0; - // restore output node - hi->output_node_index = bd_config->saved_bvi_output_node_index; - // delete the l2fib entry for the bvi interface mac = *((u64 *)hi->hw_address); l2fib_del_entry (mac, config->bd_index); - // Let interface-output node know that the output node index changed - vnet_per_buffer_interface_output_hw_interface_add_del( - vnet_main, hi->hw_if_index, 0); + // Make loop output node send packet back to ethernet-input node + slot = vlib_node_add_named_next_with_slot ( + vm, hi->tx_node_index, "ethernet-input", + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); } l2_if_adjust--; } else if (config->xconnect) { @@ -636,10 +624,6 @@ u32 set_int_l2_mode (vlib_main_t * vm, bd_config->bvi_sw_if_index = sw_if_index; config->bvi = 1; - // make BVI outputs go to l2-input - bd_config->saved_bvi_output_node_index = hi->output_node_index; - hi->output_node_index = l2input_node.index; - // create the l2fib entry for the bvi interface mac = *((u64 *)hi->hw_address); l2fib_add_entry (mac, bd_index, sw_if_index, 1, 0, 1); // static + bvi @@ -647,10 +631,11 @@ u32 set_int_l2_mode (vlib_main_t * vm, // Disable learning by default. no use since l2fib entry is static. config->feature_bitmap &= ~L2INPUT_FEAT_LEARN; - // Let interface-output node know that the output node index changed - // so output can be sent via BVI to BD - vnet_per_buffer_interface_output_hw_interface_add_del( - vnet_main, hi->hw_if_index, 0); + // Make loop output node send packet to l2-input node + slot = vlib_node_add_named_next_with_slot ( + vm, hi->tx_node_index, "l2-input", + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); } // Add interface to bridge-domain flood vector diff --git a/vnet/vnet/l2/l2_input.h b/vnet/vnet/l2/l2_input.h index a1cd03743c1..1e5c1f0abc8 100644 --- a/vnet/vnet/l2/l2_input.h +++ b/vnet/vnet/l2/l2_input.h @@ -72,6 +72,8 @@ typedef struct { extern l2input_main_t l2input_main; +extern vlib_node_registration_t l2input_node; + static_always_inline l2_bridge_domain_t * l2input_bd_config_from_index (l2input_main_t * l2im, u32 bd_index) { -- cgit 1.2.3-korg