From 47a3d9975fa3af7a7537b565d6511dadc0df61fb Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 29 Sep 2020 15:38:51 +0000 Subject: l2: input performance Type: improvement - cache the values form the BD on the input config to avoid loading - avoid the short write long read on the sequence number - use vlib_buffer_enqueue_to_next Signed-off-by: Neale Ranns Change-Id: I33442b9104b457e4c638d26e9ad3bc965687a0bc --- src/vnet/CMakeLists.txt | 3 +- src/vnet/dpo/dvr_dpo.c | 3 +- src/vnet/ethernet/interface.c | 48 ++-- src/vnet/interface.c | 13 -- src/vnet/interface_cli.c | 23 +- src/vnet/interface_format.c | 2 +- src/vnet/l2/l2_api.c | 4 +- src/vnet/l2/l2_arp_term.c | 2 +- src/vnet/l2/l2_bd.c | 48 +++- src/vnet/l2/l2_bd.h | 11 +- src/vnet/l2/l2_fib.c | 44 ++-- src/vnet/l2/l2_fib.h | 57 +++-- src/vnet/l2/l2_fwd.c | 15 +- src/vnet/l2/l2_input.c | 501 +++++++++--------------------------------- src/vnet/l2/l2_input.h | 75 ++++++- src/vnet/l2/l2_input_node.c | 401 +++++++++++++++++++++++++++++++++ src/vnet/l2/l2_learn.c | 4 +- 17 files changed, 724 insertions(+), 530 deletions(-) create mode 100644 src/vnet/l2/l2_input_node.c (limited to 'src') diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index 0bc55f9580b..3ae20c8b8f7 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -176,6 +176,7 @@ list(APPEND VNET_SOURCES l2/l2_flood.c l2/l2_fwd.c l2/l2_input.c + l2/l2_input_node.c l2/l2_input_vtr.c l2/l2_learn.c l2/l2_output.c @@ -196,7 +197,7 @@ list(APPEND VNET_MULTIARCH_SOURCES l2/l2_patch.c l2/l2_in_out_feat_arc.c l2/l2_input_classify.c - l2/l2_input.c + l2/l2_input_node.c l2/l2_output_classify.c l2/l2_flood.c l2/l2_uu_fwd.c diff --git a/src/vnet/dpo/dvr_dpo.c b/src/vnet/dpo/dvr_dpo.c index 6dd58108091..533ce4b39f3 100644 --- a/src/vnet/dpo/dvr_dpo.c +++ b/src/vnet/dpo/dvr_dpo.c @@ -110,7 +110,8 @@ dvr_dpo_add_or_lock (u32 sw_if_index, config = l2input_intf_config (sw_if_index); - if (config->bridge || config->xconnect) + if (l2_input_is_bridge(config) || + l2_input_is_xconnect(config)) { dd->dd_reinject = DVR_REINJECT_L2; } diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index 7b11fdad8b1..b09e2b02256 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -496,10 +496,10 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, /* Ordinarily, this is the only config lookup. */ config = l2input_intf_config (vnet_buffer (b[0])->sw_if_index[VLIB_TX]); - next_index = - config->bridge ? VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : - VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; - new_tx_sw_if_index = config->bvi ? L2INPUT_BVI : ~0; + next_index = (l2_input_is_bridge (config) ? + VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + new_tx_sw_if_index = l2_input_is_bvi (config) ? L2INPUT_BVI : ~0; new_rx_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX]; while (n_left_from >= 4) @@ -579,10 +579,10 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, { config = l2input_intf_config (vnet_buffer (b[0])->sw_if_index[VLIB_TX]); - next_index = - config->bridge ? VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : - VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; - new_tx_sw_if_index = config->bvi ? L2INPUT_BVI : ~0; + next_index = (l2_input_is_bridge (config) ? + VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + new_tx_sw_if_index = l2_input_is_bvi (config) ? L2INPUT_BVI : ~0; new_rx_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX]; } next[0] = next_index; @@ -602,11 +602,11 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, { config = l2input_intf_config (vnet_buffer (b[1])->sw_if_index[VLIB_TX]); - next_index = - config->bridge ? VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : - VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; + next_index = (l2_input_is_bridge (config) ? + VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); new_rx_sw_if_index = vnet_buffer (b[1])->sw_if_index[VLIB_TX]; - new_tx_sw_if_index = config->bvi ? L2INPUT_BVI : ~0; + new_tx_sw_if_index = l2_input_is_bvi (config) ? L2INPUT_BVI : ~0; } next[1] = next_index; vnet_buffer (b[1])->sw_if_index[VLIB_RX] = new_rx_sw_if_index; @@ -625,11 +625,11 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, { config = l2input_intf_config (vnet_buffer (b[2])->sw_if_index[VLIB_TX]); - next_index = - config->bridge ? VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : - VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; + next_index = (l2_input_is_bridge (config) ? + VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); new_rx_sw_if_index = vnet_buffer (b[2])->sw_if_index[VLIB_TX]; - new_tx_sw_if_index = config->bvi ? L2INPUT_BVI : ~0; + new_tx_sw_if_index = l2_input_is_bvi (config) ? L2INPUT_BVI : ~0; } next[2] = next_index; vnet_buffer (b[2])->sw_if_index[VLIB_RX] = new_rx_sw_if_index; @@ -648,11 +648,11 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, { config = l2input_intf_config (vnet_buffer (b[3])->sw_if_index[VLIB_TX]); - next_index = - config->bridge ? VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : - VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; + next_index = (l2_input_is_bridge (config) ? + VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); new_rx_sw_if_index = vnet_buffer (b[3])->sw_if_index[VLIB_TX]; - new_tx_sw_if_index = config->bvi ? L2INPUT_BVI : ~0; + new_tx_sw_if_index = l2_input_is_bvi (config) ? L2INPUT_BVI : ~0; } next[3] = next_index; vnet_buffer (b[3])->sw_if_index[VLIB_RX] = new_rx_sw_if_index; @@ -676,10 +676,10 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, { config = l2input_intf_config (vnet_buffer (b[0])->sw_if_index[VLIB_TX]); - next_index = - config->bridge ? VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : - VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; - new_tx_sw_if_index = config->bvi ? L2INPUT_BVI : ~0; + next_index = (l2_input_is_bridge (config) ? + VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT : + VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + new_tx_sw_if_index = l2_input_is_bvi (config) ? L2INPUT_BVI : ~0; new_rx_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX]; } next[0] = next_index; diff --git a/src/vnet/interface.c b/src/vnet/interface.c index 18c7696e9a4..ad8bde3457a 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -42,7 +42,6 @@ #include #include #include -#include typedef enum vnet_interface_helper_flags_t_ { @@ -636,18 +635,6 @@ vnet_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index) pool_elt_at_index (im->sw_interfaces, sw_if_index); /* Check if the interface has config and is removed from L2 BD or XConnect */ - vlib_main_t *vm = vlib_get_main (); - l2_input_config_t *config; - if (sw_if_index < vec_len (l2input_main.configs)) - { - config = vec_elt_at_index (l2input_main.configs, sw_if_index); - if (config->xconnect) - set_int_l2_mode (vm, vnm, MODE_L3, config->output_sw_if_index, 0, - L2_BD_PORT_TYPE_NORMAL, 0, 0); - if (config->xconnect || config->bridge) - set_int_l2_mode (vm, vnm, MODE_L3, sw_if_index, 0, - L2_BD_PORT_TYPE_NORMAL, 0, 0); - } vnet_clear_sw_interface_tag (vnm, sw_if_index); /* Bring down interface in case it is up. */ diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index 28f24ae3977..55433fe8314 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -342,14 +342,7 @@ show_sw_interfaces (vlib_main_t * vm, if (show_features) { vnet_interface_features_show (vm, sw_if_index, verbose); - - l2_input_config_t *l2_input = l2input_intf_config (sw_if_index); - u32 fb = l2_input->feature_bitmap; - /* intf input features are masked by bridge domain */ - if (l2_input->bridge) - fb &= l2input_bd_config (l2_input->bd_index)->feature_bitmap; - vlib_cli_output (vm, "\nl2-input:\n%U", format_l2_input_features, fb, - 1); + vlib_cli_output (vm, "%U", format_l2_input_features, sw_if_index, 1); l2_output_config_t *l2_output = l2output_intf_config (sw_if_index); vlib_cli_output (vm, "\nl2-output:"); @@ -448,19 +441,7 @@ show_sw_interfaces (vlib_main_t * vm, (si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? "up" : "dn"); /* Display any L2 info */ - l2_input_config_t *l2_input = l2input_intf_config (si->sw_if_index); - if (l2_input->bridge) - { - bd_main_t *bdm = &bd_main; - u32 bd_id = l2input_main.bd_configs[l2_input->bd_index].bd_id; - vlib_cli_output (vm, " L2 bridge bd-id %d idx %d shg %d %s", - bd_id, bd_find_index (bdm, bd_id), l2_input->shg, - l2_input->bvi ? "bvi" : " "); - } - else if (l2_input->xconnect) - vlib_cli_output (vm, " L2 xconnect %U", - format_vnet_sw_if_index_name, vnm, - l2_input->output_sw_if_index); + vlib_cli_output (vm, "%U", format_l2_input, si->sw_if_index); /* *INDENT-OFF* */ /* Display any IP4 addressing info */ diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c index 2b691a66071..5f995b8a34f 100644 --- a/src/vnet/interface_format.c +++ b/src/vnet/interface_format.c @@ -555,7 +555,7 @@ format_vnet_buffer_opaque (u8 * s, va_list * args) s = format (s, "l2.feature_bitmap_input: %U, L2.feature_bitmap_output: %U", - format_l2_input_features, o->l2.feature_bitmap, 0, + format_l2_input_feature_bitmap, o->l2.feature_bitmap, 0, format_l2_output_features, o->l2.feature_bitmap, 0); vec_add1 (s, '\n'); diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index d9bc47ab2e8..c8690c6ccae 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -115,7 +115,7 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp) pool_foreach (swif, im->sw_interfaces, ({ config = vec_elt_at_index (l2im->configs, swif->sw_if_index); - if (config->xconnect) + if (l2_input_is_xconnect(config)) send_l2_xconnect_details (reg, mp->context, swif->sw_if_index, config->output_sw_if_index); })); @@ -236,7 +236,7 @@ vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) { l2_input_config_t *config; config = vec_elt_at_index (l2im->configs, sw_if_index); - if (config->bridge == 0) + if (!l2_input_is_bridge (config)) { rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; goto bad_sw_if_index; diff --git a/src/vnet/l2/l2_arp_term.c b/src/vnet/l2/l2_arp_term.c index 982fd9f05ea..fd21bc433e5 100644 --- a/src/vnet/l2/l2_arp_term.c +++ b/src/vnet/l2/l2_arp_term.c @@ -377,7 +377,7 @@ arp_term_l2bd (vlib_main_t * vm, /* For BVI, need to use l2-fwd node to send ARP reply as l2-output node cannot output packet to BVI properly */ cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0); - if (PREDICT_FALSE (cfg0->bvi)) + if (PREDICT_FALSE (l2_input_is_bvi (cfg0))) { vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD; vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0; diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index 94852c90769..a3acc4b6d7e 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -243,6 +243,43 @@ l2bd_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (l2bd_init); +l2_bridge_domain_t * +bd_get (u32 bd_index) +{ + if (bd_index < vec_len (l2input_main.bd_configs)) + return (vec_elt_at_index (l2input_main.bd_configs, bd_index)); + return (NULL); +} + +u32 +bd_input_walk (u32 bd_index, bd_input_walk_fn_t fn, void *data) +{ + l2_flood_member_t *member; + l2_bridge_domain_t *bd; + u32 sw_if_index; + + sw_if_index = ~0; + bd = bd_get (bd_index); + + ASSERT (bd); + + vec_foreach (member, bd->members) + { + if (WALK_STOP == fn (bd_index, member->sw_if_index)) + { + sw_if_index = member->sw_if_index; + break; + } + } + + return (sw_if_index); +} + +static void +b2_input_recache (u32 bd_index) +{ + bd_input_walk (bd_index, l2input_recache, NULL); +} /** Set the learn/forward/flood flags for the bridge domain. @@ -290,6 +327,8 @@ bd_set_flags (vlib_main_t * vm, u32 bd_index, bd_flags_t flags, u32 enable) bd_config->feature_bitmap &= ~feature_bitmap; } + b2_input_recache (bd_index); + return bd_config->feature_bitmap; } @@ -305,6 +344,7 @@ bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age) vec_validate (l2input_main.bd_configs, bd_index); bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); bd_config->mac_age = age; + b2_input_recache (bd_index); /* check if there is at least one bd with mac aging enabled */ vec_foreach (bd_config, l2input_main.bd_configs) @@ -1093,8 +1133,8 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) else as = format (as, "off"); vlib_cli_output (vm, - "%=8d %=7d %=4d %=9v %=9s %=9s %=11U %=9s %=9s %=9s %=11U", - bd_config->bd_id, bd_index, bd_config->seq_num, as, + "%=8d %=7d %=9v %=9s %=9s %=11U %=9s %=9s %=9s %=11U", + bd_config->bd_id, bd_index, as, bd_config->feature_bitmap & L2INPUT_FEAT_LEARN ? "on" : "off", bd_config->feature_bitmap & L2INPUT_FEAT_FWD ? @@ -1108,6 +1148,8 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) "on" : "off", format_vnet_sw_if_index_name_with_NA, vnm, bd_config->bvi_sw_if_index); + vlib_cli_output (vm, "%U", format_l2_input_feature_bitmap, + bd_config->feature_bitmap); vec_reset_length (as); if (detail || intf) @@ -1118,7 +1160,7 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { l2_flood_member_t *member = vec_elt_at_index (bd_config->members, i); - u8 swif_seq_num = *l2fib_swif_seq_num (member->sw_if_index); + u8 swif_seq_num = l2_input_seq_num (member->sw_if_index); u32 vtr_opr, dot1q, tag1, tag2; if (i == 0) { diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h index 35c0e0bf0a1..943358b4e15 100644 --- a/src/vnet/l2/l2_bd.h +++ b/src/vnet/l2/l2_bd.h @@ -139,7 +139,6 @@ bd_is_valid (l2_bridge_domain_t * bd_config) /* Init bridge domain if not done already */ void bd_validate (l2_bridge_domain_t * bd_config); - void bd_add_member (l2_bridge_domain_t * bd_config, l2_flood_member_t * member); @@ -201,6 +200,16 @@ bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id) return bd_index; } +/** + * \brief Walk all the input interfaces in the BD + */ +typedef walk_rc_t (*bd_input_walk_fn_t) (u32 bd_index, u32 sw_if_index); + +u32 bd_input_walk (u32 bd_index, bd_input_walk_fn_t fn, void *data); + +l2_bridge_domain_t *bd_get (u32 bd_index); +l2_bridge_domain_t *bd_get_by_table_id (u32 table_id); + u32 bd_add_del_ip_mac (u32 bd_index, ip46_type_t type, const ip46_address_t * ip_addr, diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 4b7fdcb28b5..bb4d61404a2 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -146,6 +146,26 @@ l2fib_table_dump (u32 bd_index, *l2fe_res = ctx.l2fe_res; } +void +l2_fib_extract_seq_num (l2fib_seq_num_t sn, u8 * bd_sn, u8 * if_sn) +{ + *bd_sn = sn >> 8; + *if_sn = sn & 0xff; +} + +u8 * +format_l2_fib_seq_num (u8 * s, va_list * a) +{ + l2fib_seq_num_t sn = va_arg (*a, int); + u8 bd_sn, if_sn; + + l2_fib_extract_seq_num (sn, &bd_sn, &if_sn); + + s = format (s, "%3d/%-3d", bd_sn, if_sn); + + return (s); +} + typedef struct l2fib_show_walk_ctx_t_ { u8 first_entry; @@ -207,12 +227,12 @@ l2fib_show_walk_cb (BVT (clib_bihash_kv) * kvp, void *arg) } vlib_cli_output (ctx->vm, - "%=19U%=7d%=7d %3d/%-3d%=9v%=7s%=7s%=5s%=30U", + "%=19U%=7d%=7d %U%=9v%=7s%=7s%=5s%=30U", format_ethernet_address, key.fields.mac, key.fields.bd_index, result.fields.sw_if_index == ~0 ? -1 : result.fields.sw_if_index, - result.fields.sn.bd, result.fields.sn.swif, s, + format_l2_fib_seq_num, result.fields.sn, s, l2fib_entry_result_is_set_STATIC (&result) ? "*" : "-", l2fib_entry_result_is_set_FILTER (&result) ? "*" : "-", l2fib_entry_result_is_set_BVI (&result) ? "*" : "-", @@ -404,16 +424,13 @@ VLIB_CLI_COMMAND (clear_l2fib_cli, static) = { }; /* *INDENT-ON* */ -static inline l2fib_seq_num_t +static l2fib_seq_num_t l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index) { l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); - /* *INDENT-OFF* */ - return (l2fib_seq_num_t) { - .swif = *l2fib_swif_seq_num (sw_if_index), - .bd = bd_config->seq_num, - }; - /* *INDENT-ON* */ + + return l2_fib_mk_seq_num (bd_config->seq_num, + l2_input_seq_num (sw_if_index)); } /** @@ -835,7 +852,7 @@ l2fib_start_ager_scan (vlib_main_t * vm) void l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index) { - *l2fib_swif_seq_num (sw_if_index) += 1; + l2_input_seq_num_inc (sw_if_index); l2fib_start_ager_scan (vm); } @@ -989,7 +1006,8 @@ clib_error_t * l2fib_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) { l2_input_config_t *config = l2input_intf_config (sw_if_index); - if ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0 && config->bridge) + if ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0 && + l2_input_is_bridge (config)) l2fib_flush_int_mac (vnm->vlib_main, sw_if_index); return 0; } @@ -1136,8 +1154,8 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) /* start aging processing */ u32 bd_index = key.fields.bd_index; u32 sw_if_index = result.fields.sw_if_index; - u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index).as_u16; - if (result.fields.sn.as_u16 != sn) + u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index); + if (result.fields.sn != sn) goto age_out; /* stale mac */ l2_bridge_domain_t *bd_config = diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h index 41ef4ab3461..7f7cd761e20 100644 --- a/src/vnet/l2/l2_fib.h +++ b/src/vnet/l2/l2_fib.h @@ -54,9 +54,6 @@ typedef struct /* hash table initialized */ u8 mac_table_initialized; - /* per swif vector of sequence number for interface based flush of MACs */ - u8 *swif_seq_num; - /* last event or ager scan duration */ f64 evt_scan_duration; f64 age_scan_duration; @@ -97,19 +94,36 @@ typedef struct STATIC_ASSERT_SIZEOF (l2fib_entry_key_t, 8); +/** + * A combined representation of the sequence number associated + * with the interface and the BD. + * The BD is in higher bits, the interface in the lower bits, but + * the order is not important. + * + * It's convenient to represent this as an union of two u8s, + * but then in the DP one is forced to do short writes, followed + * by long reads, which is a sure thing for a stall + */ +typedef u16 l2fib_seq_num_t; -typedef struct +static_always_inline l2fib_seq_num_t +l2_fib_mk_seq_num (u8 bd_sn, u8 if_sn) { - union - { - struct - { - u8 swif; - u8 bd; - }; - u16 as_u16; - }; -} l2fib_seq_num_t; + return (((u16) bd_sn) << 8) | if_sn; +} + +static_always_inline l2fib_seq_num_t +l2_fib_update_seq_num (l2fib_seq_num_t sn, u8 if_sn) +{ + sn &= 0xff00; + sn |= if_sn; + + return (sn); +} + +extern void l2_fib_extract_seq_num (l2fib_seq_num_t sn, u8 * bd_sn, + u8 * if_sn); +extern u8 *format_l2_fib_seq_num (u8 * s, va_list * a); /** * Flags associated with an L2 Fib Entry @@ -459,21 +473,6 @@ l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, u8 *format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args); -static_always_inline u8 * -l2fib_swif_seq_num (u32 sw_if_index) -{ - l2fib_main_t *mp = &l2fib_main; - return vec_elt_at_index (mp->swif_seq_num, sw_if_index); -} - -static_always_inline u8 * -l2fib_valid_swif_seq_num (u32 sw_if_index) -{ - l2fib_main_t *mp = &l2fib_main; - vec_validate (mp->swif_seq_num, sw_if_index); - return l2fib_swif_seq_num (sw_if_index); -} - BVT (clib_bihash) * get_mac_table (void); #endif diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c index a1674e09b77..e7accc29878 100644 --- a/src/vnet/l2/l2_fwd.c +++ b/src/vnet/l2/l2_fwd.c @@ -156,13 +156,12 @@ l2fwd_process (vlib_main_t * vm, /* check l2fib seq num for stale entries */ if (!l2fib_entry_result_is_set_AGE_NOT (result0)) { - l2fib_seq_num_t in_sn = {.as_u16 = vnet_buffer (b0)->l2.l2fib_sn }; - l2fib_seq_num_t expected_sn = { - .bd = in_sn.bd, - .swif = *l2fib_swif_seq_num (result0->fields.sw_if_index), - }; - l2fib_seq_num_valid = - expected_sn.as_u16 == result0->fields.sn.as_u16; + l2fib_seq_num_t in_sn = vnet_buffer (b0)->l2.l2fib_sn; + l2fib_seq_num_t expected_sn = l2_fib_update_seq_num (in_sn, + l2_input_seq_num + (result0->fields.sw_if_index)); + + l2fib_seq_num_valid = expected_sn == result0->fields.sn; } if (PREDICT_FALSE (!l2fib_seq_num_valid)) @@ -505,7 +504,7 @@ int_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) } /* set the interface flag */ - if (l2input_intf_config (sw_if_index)->xconnect) + if (l2input_intf_config (sw_if_index)) { l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_XCONNECT, enable); } diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index 5e73faa28eb..9dc452e1558 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -48,8 +48,6 @@ * For interfaces in Layer 3 mode, the packets will be routed. */ -#ifndef CLIB_MARCH_VARIANT - /* Feature graph node names */ static char *l2input_feat_names[] = { #define _(sym,name) name, @@ -64,7 +62,7 @@ l2input_get_feat_names (void) } u8 * -format_l2_input_features (u8 * s, va_list * args) +format_l2_input_feature_bitmap (u8 * s, va_list * args) { static char *display_names[] = { #define _(sym,name) #sym, @@ -95,394 +93,51 @@ format_l2_input_features (u8 * s, va_list * args) } return s; } -#endif /* CLIB_MARCH_VARIANT */ - -typedef struct -{ - /* per-pkt trace data */ - u8 dst_and_src[12]; - u32 next_index; - u32 sw_if_index; -} l2input_trace_t; - -/* packet trace format function */ -static u8 * -format_l2input_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - l2input_trace_t *t = va_arg (*args, l2input_trace_t *); - - s = format (s, "l2-input: sw_if_index %d dst %U src %U", - t->sw_if_index, - format_ethernet_address, t->dst_and_src, - format_ethernet_address, t->dst_and_src + 6); - return s; -} - -extern l2input_main_t l2input_main; - -#ifndef CLIB_MARCH_VARIANT -l2input_main_t l2input_main; -#endif /* CLIB_MARCH_VARIANT */ - -#define foreach_l2input_error \ -_(L2INPUT, "L2 input packets") \ -_(DROP, "L2 input drops") - -typedef enum -{ -#define _(sym,str) L2INPUT_ERROR_##sym, - foreach_l2input_error -#undef _ - L2INPUT_N_ERROR, -} l2input_error_t; -static char *l2input_error_strings[] = { -#define _(sym,string) string, - foreach_l2input_error -#undef _ -}; - -typedef enum -{ /* */ - L2INPUT_NEXT_LEARN, - L2INPUT_NEXT_FWD, - L2INPUT_NEXT_DROP, - L2INPUT_N_NEXT, -} l2input_next_t; - - -static_always_inline void -classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u32 * next0) +u8 * +format_l2_input_features (u8 * s, va_list * args) { - /* - * Load L2 input feature struct - * Load bridge domain struct - * Parse ethernet header to determine unicast/mcast/broadcast - * take L2 input stat - * classify packet as IP/UDP/TCP, control, other - * mask feature bitmap - * go to first node in bitmap - * Later: optimize VTM - * - * For L2XC, - * set tx sw-if-handle - */ - - u32 feat_mask = ~0; - u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - ethernet_header_t *h0 = vlib_buffer_get_current (b0); - - /* Get config for the input interface */ - l2_input_config_t *config = vec_elt_at_index (msm->configs, sw_if_index0); - - /* Save split horizon group */ - vnet_buffer (b0)->l2.shg = config->shg; - - /* determine layer2 kind for stat and mask */ - if (PREDICT_FALSE (ethernet_address_cast (h0->dst_address))) - { - u8 *l3h0 = (u8 *) h0 + vnet_buffer (b0)->l2.l2_len; - -#define get_u16(addr) ( *((u16 *)(addr)) ) - u16 ethertype = clib_net_to_host_u16 (get_u16 (l3h0 - 2)); - u8 protocol = ((ip6_header_t *) l3h0)->protocol; - - /* Disable bridge forwarding (flooding will execute instead if not xconnect) */ - feat_mask &= ~(L2INPUT_FEAT_FWD | - L2INPUT_FEAT_UU_FLOOD | - L2INPUT_FEAT_UU_FWD | L2INPUT_FEAT_GBP_FWD); - - if (ethertype != ETHERNET_TYPE_ARP) - feat_mask &= ~(L2INPUT_FEAT_ARP_UFWD); - - /* Disable ARP-term for non-ARP and non-ICMP6 packet */ - if (ethertype != ETHERNET_TYPE_ARP && - (ethertype != ETHERNET_TYPE_IP6 || protocol != IP_PROTOCOL_ICMP6)) - feat_mask &= ~(L2INPUT_FEAT_ARP_TERM); - /* - * For packet from BVI - set SHG of ARP request or ICMPv6 neighbor - * solicitation packet from BVI to 0 so it can also flood to VXLAN - * tunnels or other ports with the same SHG as that of the BVI. - */ - else if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] == - L2INPUT_BVI)) - { - if (ethertype == ETHERNET_TYPE_ARP) - { - ethernet_arp_header_t *arp0 = (ethernet_arp_header_t *) l3h0; - if (arp0->opcode == - clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request)) - vnet_buffer (b0)->l2.shg = 0; - } - else /* must be ICMPv6 */ - { - ip6_header_t *iph0 = (ip6_header_t *) l3h0; - icmp6_neighbor_solicitation_or_advertisement_header_t *ndh0; - ndh0 = ip6_next_header (iph0); - if (ndh0->icmp.type == ICMP6_neighbor_solicitation) - vnet_buffer (b0)->l2.shg = 0; - } - } - } - else - { - /* - * For packet from BVI - set SHG of unicast packet from BVI to 0 so it - * is not dropped on output to VXLAN tunnels or other ports with the - * same SHG as that of the BVI. - */ - if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] == - L2INPUT_BVI)) - vnet_buffer (b0)->l2.shg = 0; - } - - - if (config->bridge) - { - /* Do bridge-domain processing */ - u16 bd_index0 = config->bd_index; - /* save BD ID for next feature graph nodes */ - vnet_buffer (b0)->l2.bd_index = bd_index0; - - /* Get config for the bridge domain interface */ - l2_bridge_domain_t *bd_config = - vec_elt_at_index (msm->bd_configs, bd_index0); - - /* Save bridge domain and interface seq_num */ - /* *INDENT-OFF* */ - l2fib_seq_num_t sn = { - .swif = *l2fib_swif_seq_num(sw_if_index0), - .bd = bd_config->seq_num, - }; - /* *INDENT-ON* */ - vnet_buffer (b0)->l2.l2fib_sn = sn.as_u16;; - vnet_buffer (b0)->l2.bd_age = bd_config->mac_age; + u32 sw_if_index = va_arg (*args, u32); + u32 verbose = va_arg (*args, u32); - /* - * Process bridge domain feature enables. - * To perform learning/flooding/forwarding, the corresponding bit - * must be enabled in both the input interface config and in the - * bridge domain config. In the bd_bitmap, bits for features other - * than learning/flooding/forwarding should always be set. - */ - feat_mask = feat_mask & bd_config->feature_bitmap; - } - else if (config->xconnect) - { - /* Set the output interface */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index; - } - else - feat_mask = L2INPUT_FEAT_DROP; + l2_input_config_t *l2_input = l2input_intf_config (sw_if_index); + u32 fb = l2_input->feature_bitmap; - /* mask out features from bitmap using packet type and bd config */ - u32 feature_bitmap = config->feature_bitmap & feat_mask; + /* intf input features are masked by bridge domain */ + if (l2_input_is_bridge (l2_input)) + fb &= l2_input->bd_feature_bitmap; - /* save for next feature graph nodes */ - vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap; + s = + format (s, "\nl2-input:\n%U", format_l2_input_feature_bitmap, fb, + verbose); - /* Determine the next node */ - *next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index, - feature_bitmap); + return (s); } -static_always_inline uword -l2input_node_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame, - int do_trace) +u8 * +format_l2_input (u8 * s, va_list * args) { - u32 n_left_from, *from, *to_next; - l2input_next_t next_index; - l2input_main_t *msm = &l2input_main; - vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; /* number of packets to process */ - next_index = node->cached_next_index; - vlib_get_buffers (vm, from, bufs, n_left_from); + u32 sw_if_index = va_arg (*args, u32); + l2_input_config_t *l2_input = l2input_intf_config (sw_if_index); - while (n_left_from > 0) + /* intf input features are masked by bridge domain */ + if (l2_input_is_bridge (l2_input)) { - u32 n_left_to_next; - - /* get space to enqueue frame to graph node "next_index" */ - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + bd_main_t *bdm = &bd_main; + u32 bd_id = l2input_main.bd_configs[l2_input->bd_index].bd_id; - while (n_left_from >= 8 && n_left_to_next >= 4) - { - u32 next0, next1, next2, next3; - u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; - - /* Prefetch next iteration. */ - { - - /* Prefetch the buffer header and packet for the N+2 loop iteration */ - vlib_prefetch_buffer_header (b[4], LOAD); - vlib_prefetch_buffer_header (b[5], LOAD); - vlib_prefetch_buffer_header (b[6], LOAD); - vlib_prefetch_buffer_header (b[7], LOAD); - - CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (b[6]->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (b[7]->data, CLIB_CACHE_LINE_BYTES, STORE); - - /* - * Don't bother prefetching the bridge-domain config (which - * depends on the input config above). Only a small number of - * bridge domains are expected. Plus the structure is small - * and several fit in a cache line. - */ - } - - /* speculatively enqueue b0 and b1 to the current next frame */ - /* bi is "buffer index", b is pointer to the buffer */ - - if (do_trace) - { - /* RX interface handles */ - sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; - sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_RX]; - sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_RX]; - sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_RX]; - - if (b[0]->flags & VLIB_BUFFER_IS_TRACED) - { - ethernet_header_t *h0 = vlib_buffer_get_current (b[0]); - l2input_trace_t *t = - vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->sw_if_index = sw_if_index0; - clib_memcpy_fast (t->dst_and_src, h0->dst_address, - sizeof (h0->dst_address) + - sizeof (h0->src_address)); - } - if (b[1]->flags & VLIB_BUFFER_IS_TRACED) - { - ethernet_header_t *h1 = vlib_buffer_get_current (b[1]); - l2input_trace_t *t = - vlib_add_trace (vm, node, b[1], sizeof (*t)); - t->sw_if_index = sw_if_index1; - clib_memcpy_fast (t->dst_and_src, h1->dst_address, - sizeof (h1->dst_address) + - sizeof (h1->src_address)); - } - if (b[2]->flags & VLIB_BUFFER_IS_TRACED) - { - ethernet_header_t *h2 = vlib_buffer_get_current (b[2]); - l2input_trace_t *t = - vlib_add_trace (vm, node, b[2], sizeof (*t)); - t->sw_if_index = sw_if_index2; - clib_memcpy_fast (t->dst_and_src, h2->dst_address, - sizeof (h2->dst_address) + - sizeof (h2->src_address)); - } - if (b[3]->flags & VLIB_BUFFER_IS_TRACED) - { - ethernet_header_t *h3 = vlib_buffer_get_current (b[3]); - l2input_trace_t *t = - vlib_add_trace (vm, node, b[3], sizeof (*t)); - t->sw_if_index = sw_if_index3; - clib_memcpy_fast (t->dst_and_src, h3->dst_address, - sizeof (h3->dst_address) + - sizeof (h3->src_address)); - } - } - - classify_and_dispatch (msm, b[0], &next0); - classify_and_dispatch (msm, b[1], &next1); - //show the better performance when clib_memcpy_fast is put here. - clib_memcpy_fast (to_next, from, sizeof (from[0]) * 4); - to_next += 4; - classify_and_dispatch (msm, b[2], &next2); - classify_and_dispatch (msm, b[3], &next3); - b += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - /* verify speculative enqueues, maybe switch current next frame */ - /* if next0==next1==next_index then nothing special needs to be done */ - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, - to_next, n_left_to_next, - from[0], from[1], from[2], from[3], - next0, next1, next2, next3); - from += 4; - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 next0; - u32 sw_if_index0; - - /* speculatively enqueue b0 to the current next frame */ - - if (do_trace && PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) - { - ethernet_header_t *h0 = vlib_buffer_get_current (b[0]); - l2input_trace_t *t = - vlib_add_trace (vm, node, b[0], sizeof (*t)); - sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; - t->sw_if_index = sw_if_index0; - clib_memcpy_fast (t->dst_and_src, h0->dst_address, - sizeof (h0->dst_address) + - sizeof (h0->src_address)); - } - - classify_and_dispatch (msm, b[0], &next0); - b += 1; - to_next[0] = from[0]; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - from[0], next0); - from += 1; - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + s = format (s, " L2 bridge bd-id %d idx %d shg %d %s", + bd_id, bd_find_index (bdm, bd_id), l2_input->shg, + l2_input_is_bvi (l2_input) ? "bvi" : " "); } + else if (l2_input_is_xconnect (l2_input)) + s = format (s, " L2 xconnect %U", + format_vnet_sw_if_index_name, vnet_get_main (), + l2_input->output_sw_if_index); - vlib_node_increment_counter (vm, l2input_node.index, - L2INPUT_ERROR_L2INPUT, frame->n_vectors); - - return frame->n_vectors; + return (s); } -VLIB_NODE_FN (l2input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - return l2input_node_inline (vm, node, frame, 1 /* do_trace */ ); - return l2input_node_inline (vm, node, frame, 0 /* do_trace */ ); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (l2input_node) = { - .name = "l2-input", - .vector_size = sizeof (u32), - .format_trace = format_l2input_trace, - .format_buffer = format_ethernet_header_with_length, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(l2input_error_strings), - .error_strings = l2input_error_strings, - - .n_next_nodes = L2INPUT_N_NEXT, - - /* edit / add dispositions here */ - .next_nodes = { - [L2INPUT_NEXT_LEARN] = "l2-learn", - [L2INPUT_NEXT_FWD] = "l2-fwd", - [L2INPUT_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -#ifndef CLIB_MARCH_VARIANT clib_error_t * l2input_init (vlib_main_t * vm) { @@ -494,10 +149,6 @@ l2input_init (vlib_main_t * vm) /* Get packets RX'd from L2 interfaces */ ethernet_register_l2_input (vm, l2input_node.index); - /* Create the config vector */ - vec_validate (mp->configs, 100); - /* create 100 sw interface entries and zero them */ - /* Initialize the feature next-node indexes */ feat_bitmap_init_next_nodes (vm, l2input_node.index, @@ -555,7 +206,7 @@ l2input_interface_mac_change (u32 sw_if_index, intf_config = l2input_intf_config (sw_if_index); - if (intf_config->bridge && intf_config->bvi) + if (l2_input_is_bridge (intf_config) && l2_input_is_bvi (intf_config)) { /* delete and re-add l2fib entry for the bvi interface */ l2fib_del_entry (old_address, intf_config->bd_index, sw_if_index); @@ -567,6 +218,32 @@ l2input_interface_mac_change (u32 sw_if_index, } } +walk_rc_t +l2input_recache (u32 bd_index, u32 sw_if_index) +{ + l2_input_config_t *input; + l2_bridge_domain_t *bd; + + bd = bd_get (bd_index); + input = l2input_intf_config (sw_if_index); + + input->bd_mac_age = bd->mac_age; + input->bd_seq_num = bd->seq_num; + input->bd_feature_bitmap = bd->feature_bitmap; + + return (WALK_CONTINUE); +} + +void +l2_input_seq_num_inc (u32 sw_if_index) +{ + l2_input_config_t *input; + + input = vec_elt_at_index (l2input_main.configs, sw_if_index); + + input->seq_num++; +} + /** * Set the subinterface to run in l2 or l3 mode. * For L3 mode, just the sw_if_index is specified. @@ -584,7 +261,6 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ u32 shg, /* the bridged interface split horizon group */ u32 xc_sw_if_index) /* peer interface for xconnect */ { - l2input_main_t *mp = &l2input_main; l2output_main_t *l2om = &l2output_main; vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hi; @@ -600,10 +276,10 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ if (l2fib_main.mac_table_initialized == 0) l2fib_table_init (); - if (config->bridge) + if (l2_input_is_bridge (config)) { /* Interface is already in bridge mode. Undo the existing config. */ - bd_config = vec_elt_at_index (mp->bd_configs, config->bd_index); + bd_config = bd_get (config->bd_index); /* remove interface from flood vector */ bd_remove_member (bd_config, sw_if_index); @@ -614,7 +290,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ vnet_sw_interface_t *si; bd_config->bvi_sw_if_index = ~0; - config->bvi = 0; + config->flags &= ~L2_INPUT_FLAG_BVI; /* delete the l2fib entry for the bvi interface */ l2fib_del_entry (hi->hw_address, config->bd_index, sw_if_index); @@ -634,9 +310,10 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ (bd_config->feature_bitmap & L2INPUT_FEAT_LEARN)) l2fib_flush_int_mac (vm, sw_if_index); + bd_input_walk (config->bd_index, l2input_recache, NULL); l2_if_adjust--; } - else if (config->xconnect) + else if (l2_input_is_xconnect (config)) { l2_if_adjust--; } @@ -650,8 +327,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ { /* Set L2 config to BD index 0 so that if any packet accidentally * came in on L2 path, it will be dropped in BD 0 */ - config->xconnect = 0; - config->bridge = 0; + config->flags = L2_INPUT_FLAG_NONE; config->shg = 0; config->bd_index = 0; config->feature_bitmap = L2INPUT_FEAT_DROP; @@ -684,10 +360,9 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ if (!hi) return MODE_ERROR_ETH; /* non-ethernet */ - config->xconnect = 0; - config->bridge = 1; + config->flags = L2_INPUT_FLAG_BRIDGE; config->bd_index = bd_index; - *l2fib_valid_swif_seq_num (sw_if_index) += 1; + l2_input_seq_num_inc (sw_if_index); /* * Enable forwarding, flooding, learning and ARP termination by default @@ -724,7 +399,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ return MODE_ERROR_BVI_DEF; /* bd already has a bvi interface */ } bd_config->bvi_sw_if_index = sw_if_index; - config->bvi = 1; + config->flags |= L2_INPUT_FLAG_BVI; /* create the l2fib entry for the bvi interface */ l2fib_add_entry (hi->hw_address, bd_index, sw_if_index, @@ -763,8 +438,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ } else if (mode == MODE_L2_XC) { - config->xconnect = 1; - config->bridge = 0; + config->flags = L2_INPUT_FLAG_XCONNECT; config->output_sw_if_index = xc_sw_if_index; /* Make sure last-chance drop is configured */ @@ -779,8 +453,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ } else if (mode == MODE_L2_CLASSIFY) { - config->xconnect = 1; - config->bridge = 0; + config->flags = L2_INPUT_FLAG_XCONNECT; config->output_sw_if_index = xc_sw_if_index; /* Make sure last-chance drop is configured */ @@ -806,6 +479,8 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ */ l2_if_adjust++; + + bd_input_walk (bd_index, l2input_recache, NULL); } /* Adjust count of L2 interfaces */ @@ -841,7 +516,31 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ return 0; } -#endif /* CLIB_MARCH_VARIANT */ + +static clib_error_t * +l2_input_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) +{ + if (!is_add) + { + vlib_main_t *vm = vlib_get_main (); + l2_input_config_t *config; + + if (sw_if_index < vec_len (l2input_main.configs)) + { + config = vec_elt_at_index (l2input_main.configs, sw_if_index); + if (l2_input_is_xconnect (config)) + set_int_l2_mode (vm, vnm, MODE_L3, config->output_sw_if_index, 0, + L2_BD_PORT_TYPE_NORMAL, 0, 0); + if (l2_input_is_xconnect (config) || l2_input_is_bridge (config)) + set_int_l2_mode (vm, vnm, MODE_L3, sw_if_index, 0, + L2_BD_PORT_TYPE_NORMAL, 0, 0); + } + } + + return (NULL); +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (l2_input_interface_add_del); /** * Set subinterface in bridging mode with a bridge-domain ID. @@ -1119,16 +818,16 @@ show_int_mode (vlib_main_t * vm, vec_foreach (si, sis) { l2_input_config_t *config = l2input_intf_config (si->sw_if_index); - if (config->bridge) + if (l2_input_is_bridge (config)) { u32 bd_id; mode = "l2 bridge"; bd_id = l2input_main.bd_configs[config->bd_index].bd_id; - args = format (0, "bd_id %d%s%d", bd_id, - config->bvi ? " bvi shg " : " shg ", config->shg); + args = format (0, "bd_id %d%s shg %d", bd_id, + l2_input_is_bvi (config) ? " bvi" : "", config->shg); } - else if (config->xconnect) + else if (l2_input_is_xconnect (config)) { mode = "l2 xconnect"; args = format (0, "%U", @@ -1204,7 +903,6 @@ _(l2output_init) \ _(l2_patch_init) \ _(l2_xcrw_init) -#ifndef CLIB_MARCH_VARIANT clib_error_t * l2_init (vlib_main_t * vm) { @@ -1219,7 +917,6 @@ while (0); } VLIB_INIT_FUNCTION (l2_init); -#endif /* CLIB_MARCH_VARIANT */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h index 0e1e98f883d..9a59d3521d0 100644 --- a/src/vnet/l2/l2_input.h +++ b/src/vnet/l2/l2_input.h @@ -25,32 +25,57 @@ #include #include -/* Per-subinterface L2 feature configuration */ +/* l2 connection type */ +typedef enum l2_input_flags_t_ +{ + /* NONE imples L3 mode. */ + L2_INPUT_FLAG_NONE = 0, + L2_INPUT_FLAG_XCONNECT = (1 << 0), + L2_INPUT_FLAG_BRIDGE = (1 << 1), + L2_INPUT_FLAG_BVI = (1 << 2), +} __clib_packed l2_input_flags_t; +/* Per-subinterface L2 feature configuration */ typedef struct { + u8 __force_u64_alignement[0] __attribute__ ((aligned (8))); union { - u16 bd_index; /* bridge domain id */ - u32 output_sw_if_index; /* for xconnect */ + /* bridge domain id and values cached from the BD */ + struct + { + u16 bd_index; + u8 bd_seq_num; + u8 bd_mac_age; + }; + /* for xconnect */ + u32 output_sw_if_index; }; /* config for which input features are configured on this interface */ u32 feature_bitmap; + /* config for which input features are configured on this interface's + * BD - this is cahced from the BD struct*/ + u32 bd_feature_bitmap; + /* split horizon group */ u8 shg; - /* Interface mode. If both are 0, this interface is in L3 mode */ - u8 xconnect; - u8 bridge; + /* Interface sequence number */ + u8 seq_num; - /* this is the bvi interface for the bridge-domain */ - u8 bvi; + /* Flags describing this interface */ + l2_input_flags_t flags; + /* A wee bit of spare space */ + u8 __pad; } l2_input_config_t; +/* Ensure a struct is an even multiple of 8 bytes, + * so they do not stradle cache lines */ +STATIC_ASSERT_SIZEOF (l2_input_config_t, 2 * sizeof (u64)); typedef struct { @@ -157,7 +182,9 @@ STATIC_ASSERT ((u64) L2INPUT_VALID_MASK == (1ull << L2INPUT_N_FEAT) - 1, ""); char **l2input_get_feat_names (void); /* arg0 - u32 feature_bitmap, arg1 - u32 verbose */ +u8 *format_l2_input_feature_bitmap (u8 * s, va_list * args); u8 *format_l2_input_features (u8 * s, va_list * args); +u8 *format_l2_input (u8 * s, va_list * args); static_always_inline u8 bd_feature_flood (l2_bridge_domain_t * bd_config) @@ -200,6 +227,35 @@ bd_feature_arp_ufwd (l2_bridge_domain_t * bd_config) L2INPUT_FEAT_ARP_UFWD); } +static inline bool +l2_input_is_bridge (const l2_input_config_t * input) +{ + return (input->flags & L2_INPUT_FLAG_BRIDGE); +} + +static inline bool +l2_input_is_xconnect (const l2_input_config_t * input) +{ + return (input->flags & L2_INPUT_FLAG_XCONNECT); +} + +static inline bool +l2_input_is_bvi (const l2_input_config_t * input) +{ + return (input->flags & L2_INPUT_FLAG_BVI); +} + +static_always_inline u8 +l2_input_seq_num (u32 sw_if_index) +{ + l2_input_config_t *input; + + input = vec_elt_at_index (l2input_main.configs, sw_if_index); + + return input->seq_num; +} + + /** Masks for eliminating features that do not apply to a packet */ /** Get a pointer to the config for the given interface */ @@ -217,6 +273,9 @@ void l2input_interface_mac_change (u32 sw_if_index, const u8 * old_address, const u8 * new_address); +void l2_input_seq_num_inc (u32 sw_if_index); +walk_rc_t l2input_recache (u32 bd_index, u32 sw_if_index); + #define MODE_L3 0 #define MODE_L2_BRIDGE 1 #define MODE_L2_XC 2 diff --git a/src/vnet/l2/l2_input_node.c b/src/vnet/l2/l2_input_node.c new file mode 100644 index 00000000000..898e16ab347 --- /dev/null +++ b/src/vnet/l2/l2_input_node.c @@ -0,0 +1,401 @@ +/* + * l2_input.c : layer 2 input packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/** + * @file + * @brief Interface Input Mode (Layer 2 Cross-Connect or Bridge / Layer 3). + * + * This file contains the CLI Commands that modify the input mode of an + * interface. For interfaces in a Layer 2 cross-connect, all packets + * received on one interface will be transmitted to the other. For + * interfaces in a bridge-domain, packets will be forwarded to other + * interfaces in the same bridge-domain based on destination mac address. + * For interfaces in Layer 3 mode, the packets will be routed. + */ + +typedef struct +{ + /* per-pkt trace data */ + u8 dst_and_src[12]; + u32 next_index; + u32 sw_if_index; + u32 feat_mask; +} l2input_trace_t; + +/* packet trace format function */ +static u8 * +format_l2input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2input_trace_t *t = va_arg (*args, l2input_trace_t *); + + s = format (s, "l2-input: sw_if_index %d dst %U src %U [%U]", + t->sw_if_index, + format_ethernet_address, t->dst_and_src, + format_ethernet_address, t->dst_and_src + 6, + format_l2_input_feature_bitmap, t->feat_mask, 0); + return s; +} + +extern l2input_main_t l2input_main; + +#ifndef CLIB_MARCH_VARIANT +l2input_main_t l2input_main; +#endif /* CLIB_MARCH_VARIANT */ + +#define foreach_l2input_error \ +_(L2INPUT, "L2 input packets") \ +_(DROP, "L2 input drops") + +typedef enum +{ +#define _(sym,str) L2INPUT_ERROR_##sym, + foreach_l2input_error +#undef _ + L2INPUT_N_ERROR, +} l2input_error_t; + +static char *l2input_error_strings[] = { +#define _(sym,string) string, + foreach_l2input_error +#undef _ +}; + +typedef enum +{ /* */ + L2INPUT_NEXT_LEARN, + L2INPUT_NEXT_FWD, + L2INPUT_NEXT_DROP, + L2INPUT_N_NEXT, +} l2input_next_t; + +static_always_inline void +classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u16 * next0) +{ + /* + * Load L2 input feature struct + * Load bridge domain struct + * Parse ethernet header to determine unicast/mcast/broadcast + * take L2 input stat + * classify packet as IP/UDP/TCP, control, other + * mask feature bitmap + * go to first node in bitmap + * Later: optimize VTM + * + * For L2XC, + * set tx sw-if-handle + */ + + u32 feat_mask = ~0; + u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + ethernet_header_t *h0 = vlib_buffer_get_current (b0); + + /* Get config for the input interface */ + l2_input_config_t *config = vec_elt_at_index (msm->configs, sw_if_index0); + + /* Save split horizon group */ + vnet_buffer (b0)->l2.shg = config->shg; + + /* determine layer2 kind for stat and mask */ + if (PREDICT_FALSE (ethernet_address_cast (h0->dst_address))) + { + u8 *l3h0 = (u8 *) h0 + vnet_buffer (b0)->l2.l2_len; + +#define get_u16(addr) ( *((u16 *)(addr)) ) + u16 ethertype = clib_net_to_host_u16 (get_u16 (l3h0 - 2)); + u8 protocol = ((ip6_header_t *) l3h0)->protocol; + + /* Disable bridge forwarding (flooding will execute instead if not xconnect) */ + feat_mask &= ~(L2INPUT_FEAT_FWD | + L2INPUT_FEAT_UU_FLOOD | + L2INPUT_FEAT_UU_FWD | L2INPUT_FEAT_GBP_FWD); + + if (ethertype != ETHERNET_TYPE_ARP) + feat_mask &= ~(L2INPUT_FEAT_ARP_UFWD); + + /* Disable ARP-term for non-ARP and non-ICMP6 packet */ + if (ethertype != ETHERNET_TYPE_ARP && + (ethertype != ETHERNET_TYPE_IP6 || protocol != IP_PROTOCOL_ICMP6)) + feat_mask &= ~(L2INPUT_FEAT_ARP_TERM); + /* + * For packet from BVI - set SHG of ARP request or ICMPv6 neighbor + * solicitation packet from BVI to 0 so it can also flood to VXLAN + * tunnels or other ports with the same SHG as that of the BVI. + */ + else if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] == + L2INPUT_BVI)) + { + if (ethertype == ETHERNET_TYPE_ARP) + { + ethernet_arp_header_t *arp0 = (ethernet_arp_header_t *) l3h0; + if (arp0->opcode == + clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request)) + vnet_buffer (b0)->l2.shg = 0; + } + else /* must be ICMPv6 */ + { + ip6_header_t *iph0 = (ip6_header_t *) l3h0; + icmp6_neighbor_solicitation_or_advertisement_header_t *ndh0; + ndh0 = ip6_next_header (iph0); + if (ndh0->icmp.type == ICMP6_neighbor_solicitation) + vnet_buffer (b0)->l2.shg = 0; + } + } + } + else + { + /* + * For packet from BVI - set SHG of unicast packet from BVI to 0 so it + * is not dropped on output to VXLAN tunnels or other ports with the + * same SHG as that of the BVI. + */ + if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] == + L2INPUT_BVI)) + vnet_buffer (b0)->l2.shg = 0; + } + + + if (l2_input_is_bridge (config)) + { + /* Do bridge-domain processing */ + /* save BD ID for next feature graph nodes */ + vnet_buffer (b0)->l2.bd_index = config->bd_index; + + /* Save bridge domain and interface seq_num */ + vnet_buffer (b0)->l2.l2fib_sn = l2_fib_mk_seq_num + (config->bd_seq_num, config->seq_num); + vnet_buffer (b0)->l2.bd_age = config->bd_mac_age; + + /* + * Process bridge domain feature enables. + * To perform learning/flooding/forwarding, the corresponding bit + * must be enabled in both the input interface config and in the + * bridge domain config. In the bd_bitmap, bits for features other + * than learning/flooding/forwarding should always be set. + */ + feat_mask = feat_mask & config->bd_feature_bitmap; + } + else if (l2_input_is_xconnect (config)) + { + /* Set the output interface */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index; + } + else + feat_mask = L2INPUT_FEAT_DROP; + + /* mask out features from bitmap using packet type and bd config */ + u32 feature_bitmap = config->feature_bitmap & feat_mask; + + /* save for next feature graph nodes */ + vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap; + + /* Determine the next node */ + *next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index, + feature_bitmap); +} + +static_always_inline uword +l2input_node_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame, + int do_trace) +{ + u32 n_left, *from; + l2input_next_t next_index; + l2input_main_t *msm = &l2input_main; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + + from = vlib_frame_vector_args (frame); + n_left = frame->n_vectors; /* number of packets to process */ + next_index = node->cached_next_index; + + vlib_get_buffers (vm, from, bufs, n_left); + + while (n_left > 0) + { + while (n_left >= 8) + { + u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; + + /* Prefetch next iteration. */ + { + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (b[4], LOAD); + vlib_prefetch_buffer_header (b[5], LOAD); + vlib_prefetch_buffer_header (b[6], LOAD); + vlib_prefetch_buffer_header (b[7], LOAD); + + CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (b[6]->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (b[7]->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + classify_and_dispatch (msm, b[0], &next[0]); + classify_and_dispatch (msm, b[1], &next[1]); + classify_and_dispatch (msm, b[2], &next[2]); + classify_and_dispatch (msm, b[3], &next[3]); + + if (do_trace) + { + /* RX interface handles */ + sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_RX]; + sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_RX]; + + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h0 = vlib_buffer_get_current (b[0]); + l2input_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->feat_mask = vnet_buffer (b[0])->l2.feature_bitmap; + clib_memcpy_fast (t->dst_and_src, h0->dst_address, + sizeof (h0->dst_address) + + sizeof (h0->src_address)); + } + if (b[1]->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h1 = vlib_buffer_get_current (b[1]); + l2input_trace_t *t = + vlib_add_trace (vm, node, b[1], sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->feat_mask = vnet_buffer (b[1])->l2.feature_bitmap; + clib_memcpy_fast (t->dst_and_src, h1->dst_address, + sizeof (h1->dst_address) + + sizeof (h1->src_address)); + } + if (b[2]->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h2 = vlib_buffer_get_current (b[2]); + l2input_trace_t *t = + vlib_add_trace (vm, node, b[2], sizeof (*t)); + t->sw_if_index = sw_if_index2; + t->feat_mask = vnet_buffer (b[2])->l2.feature_bitmap; + clib_memcpy_fast (t->dst_and_src, h2->dst_address, + sizeof (h2->dst_address) + + sizeof (h2->src_address)); + } + if (b[3]->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *h3 = vlib_buffer_get_current (b[3]); + l2input_trace_t *t = + vlib_add_trace (vm, node, b[3], sizeof (*t)); + t->sw_if_index = sw_if_index3; + t->feat_mask = vnet_buffer (b[3])->l2.feature_bitmap; + clib_memcpy_fast (t->dst_and_src, h3->dst_address, + sizeof (h3->dst_address) + + sizeof (h3->src_address)); + } + } + + b += 4; + n_left -= 4; + next += 4; + } + + while (n_left > 0) + { + classify_and_dispatch (msm, b[0], &next[0]); + + if (do_trace && PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + ethernet_header_t *h0 = vlib_buffer_get_current (b[0]); + l2input_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + t->feat_mask = vnet_buffer (b[0])->l2.feature_bitmap; + clib_memcpy_fast (t->dst_and_src, h0->dst_address, + sizeof (h0->dst_address) + + sizeof (h0->src_address)); + } + + b += 1; + next += 1; + n_left -= 1; + } + } + + vlib_node_increment_counter (vm, l2input_node.index, + L2INPUT_ERROR_L2INPUT, frame->n_vectors); + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + + return frame->n_vectors; +} + +VLIB_NODE_FN (l2input_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + return l2input_node_inline (vm, node, frame, 1 /* do_trace */ ); + return l2input_node_inline (vm, node, frame, 0 /* do_trace */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2input_node) = { + .name = "l2-input", + .vector_size = sizeof (u32), + .format_trace = format_l2input_trace, + .format_buffer = format_ethernet_header_with_length, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2input_error_strings), + .error_strings = l2input_error_strings, + + .n_next_nodes = L2INPUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2INPUT_NEXT_LEARN] = "l2-learn", + [L2INPUT_NEXT_FWD] = "l2-fwd", + [L2INPUT_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index 3f5b48bfa99..6f2201b3ade 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -128,7 +128,7 @@ l2learn_process (vlib_node_runtime_t * node, { /* Entry in L2FIB with matching sw_if_index matched - normal fast path */ u32 dtime = timestamp - result0->fields.timestamp; - u32 dsn = result0->fields.sn.as_u16 - vnet_buffer (b0)->l2.l2fib_sn; + u32 dsn = (result0->fields.sn - vnet_buffer (b0)->l2.l2fib_sn); u32 check = (dtime && vnet_buffer (b0)->l2.bd_age) || dsn; if (PREDICT_TRUE (check == 0)) @@ -224,7 +224,7 @@ l2learn_process (vlib_node_runtime_t * node, /* Update the entry */ result0->fields.timestamp = timestamp; - result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; + result0->fields.sn = vnet_buffer (b0)->l2.l2fib_sn; BVT (clib_bihash_kv) kv; kv.key = key0->raw; -- cgit 1.2.3-korg