diff options
Diffstat (limited to 'src/vnet')
72 files changed, 3837 insertions, 1498 deletions
diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index d3b3ed21a26..cf14455f391 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -359,6 +359,16 @@ autoreply define bfd_udp_auth_deactivate bool is_delayed; }; +/** \brief BFD UDP - enable multihop support + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +autoreply define bfd_udp_enable_multihop +{ + u32 client_index; + u32 context; +}; + /* must be compatible with bfd_error_t */ counters bfd_udp { none { diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index 816e71081ff..bccf58ba4bb 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -46,8 +46,24 @@ pub_sub_handler (bfd_events, BFD_EVENTS); ip_address_decode(&mp->local_addr, &local_addr); \ ip_address_decode(&mp->peer_addr, &peer_addr); -#define BFD_UDP_API_PARAM_FROM_MP(mp) \ - clib_net_to_host_u32 (mp->sw_if_index), &local_addr, &peer_addr +#define BFD_UDP_API_PARAM_IS_MH(mp) \ + bfd_main.multihop_enabled && (mp->sw_if_index == ~0) + +#define BFD_UDP_API_PARAM_FROM_MP(mp) \ + BFD_UDP_API_PARAM_IS_MH (mp) ? true : false, \ + BFD_UDP_API_PARAM_IS_MH (mp) ? ~0 : \ + clib_net_to_host_u32 (mp->sw_if_index), \ + &local_addr, &peer_addr + +#define COND_VALIDATE_SW_IF_INDEX(mp) \ + do \ + { \ + if (!(bfd_main.multihop_enabled && mp->sw_if_index == ~0)) \ + { \ + VALIDATE_SW_IF_INDEX (mp) \ + } \ + } \ + while (0); static void vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp) @@ -55,7 +71,7 @@ vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp) vl_api_bfd_udp_add_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -76,7 +92,7 @@ vl_api_bfd_udp_upd_t_handler (vl_api_bfd_udp_add_t *mp) vl_api_bfd_udp_upd_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -97,7 +113,7 @@ vl_api_bfd_udp_mod_t_handler (vl_api_bfd_udp_mod_t * mp) vl_api_bfd_udp_mod_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -116,7 +132,7 @@ vl_api_bfd_udp_del_t_handler (vl_api_bfd_udp_del_t * mp) vl_api_bfd_udp_del_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -143,7 +159,14 @@ send_bfd_udp_session_details (vl_api_registration_t * reg, u32 context, mp->state = clib_host_to_net_u32 (bs->local_state); bfd_udp_session_t *bus = &bs->udp; bfd_udp_key_t *key = &bus->key; - mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + mp->sw_if_index = ~0; + } + else + { + mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); + } if ((!bs->auth.is_delayed && bs->auth.curr_key) || (bs->auth.is_delayed && bs->auth.next_key)) { @@ -186,7 +209,14 @@ send_bfd_udp_session_event (vl_api_registration_t *reg, u32 pid, mp->state = clib_host_to_net_u32 (bs->local_state); bfd_udp_session_t *bus = &bs->udp; bfd_udp_key_t *key = &bus->key; - mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + mp->sw_if_index = ~0; + } + else + { + mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); + } if ((!bs->auth.is_delayed && bs->auth.curr_key) || (bs->auth.is_delayed && bs->auth.next_key)) { @@ -315,7 +345,7 @@ vl_api_bfd_udp_auth_activate_t_handler (vl_api_bfd_udp_auth_activate_t * mp) vl_api_bfd_udp_auth_activate_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -334,7 +364,7 @@ vl_api_bfd_udp_auth_deactivate_t_handler (vl_api_bfd_udp_auth_deactivate_t * vl_api_bfd_udp_auth_deactivate_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -423,6 +453,17 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t * })) } +static void +vl_api_bfd_udp_enable_multihop_t_handler (vl_api_bfd_udp_enable_multihop_t *mp) +{ + vl_api_bfd_udp_enable_multihop_reply_t *rmp; + int rv = 0; + + bfd_main.multihop_enabled = true; + + REPLY_MACRO (VL_API_BFD_UDP_ENABLE_MULTIHOP_REPLY); +} + #include <vnet/bfd/bfd.api.c> static clib_error_t * bfd_api_hookup (vlib_main_t * vm) diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h index f051e6b679c..16501fcd272 100644 --- a/src/vnet/bfd/bfd_api.h +++ b/src/vnet/bfd/bfd_api.h @@ -37,44 +37,49 @@ typedef enum /** * @brief create a new bfd session */ -vnet_api_error_t -bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 desired_min_tx_usec, u32 required_min_rx_usec, - u8 detect_mult, u8 is_authenticated, u32 conf_key_id, - u8 bfd_key_id); +vnet_api_error_t bfd_udp_add_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult, + u8 is_authenticated, u32 conf_key_id, + u8 bfd_key_id); /** - * @brief create a new or modify and existing bfd session + * @brief create a new or modify an existing bfd session */ -vnet_api_error_t -bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr, - const ip46_address_t *peer_addr, u32 desired_min_tx_usec, - u32 required_min_rx_usec, u8 detect_mult, - u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id); +vnet_api_error_t bfd_udp_upd_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult, + u8 is_authenticated, u32 conf_key_id, + u8 bfd_key_id); /** * @brief modify existing session */ -vnet_api_error_t -bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 desired_min_tx_usec, u32 required_min_rx_usec, - u8 detect_mult); +vnet_api_error_t bfd_udp_mod_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, + u8 detect_mult); /** * @brief delete existing session */ -vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr); +vnet_api_error_t bfd_udp_del_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr); /** * @brief set session admin down/up */ -vnet_api_error_t bfd_udp_session_set_flags (vlib_main_t * vm, u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, +vnet_api_error_t bfd_udp_session_set_flags (vlib_main_t *vm, bool multihop, + u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u8 admin_up_down); /** @@ -91,18 +96,18 @@ vnet_api_error_t bfd_auth_del_key (u32 conf_key_id); /** * @brief activate authentication for existing session */ -vnet_api_error_t bfd_udp_auth_activate (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, +vnet_api_error_t bfd_udp_auth_activate (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u32 conf_key_id, u8 bfd_key_id, u8 is_delayed); /** * @brief deactivate authentication for existing session */ -vnet_api_error_t bfd_udp_auth_deactivate (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, +vnet_api_error_t bfd_udp_auth_deactivate (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u8 is_delayed); /** diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c index 33942bb89e6..194c62b507c 100644 --- a/src/vnet/bfd/bfd_cli.c +++ b/src/vnet/bfd/bfd_cli.c @@ -26,11 +26,39 @@ #include <vnet/bfd/bfd_api.h> #include <vnet/bfd/bfd_main.h> +#define BFD_MULTIHOP_CLI_CHECK \ + do \ + { \ + multihop = have_multihop; \ + if (multihop) \ + { \ + sw_if_index = ~0; \ + } \ + if (multihop && have_sw_if_index) \ + { \ + ret = clib_error_return ( \ + 0, "Incompatible parameter combination, " \ + "interface cannot be specified when multihop is enabled"); \ + goto out; \ + } \ + if (!multihop && !have_sw_if_index) \ + { \ + ret = \ + clib_error_return (0, "Incompatible parameter combination, " \ + "interface must be set if not multihop"); \ + goto out; \ + } \ + } \ + while (0); + static u8 * format_bfd_session_cli (u8 * s, va_list * args) { vlib_main_t *vm = va_arg (*args, vlib_main_t *); bfd_session_t *bs = va_arg (*args, bfd_session_t *); + s = format (s, "%10s %-32s %20s\n", "", "Hop Type", + bfd_hop_type_string (bs->hop_type)); + switch (bs->transport) { case BFD_TRANSPORT_UDP4: @@ -52,6 +80,8 @@ format_bfd_session_cli (u8 * s, va_list * args) bfd_diag_code_string (bs->remote_diag)); s = format (s, "%10s %-32s %20u %20u\n", "", "Detect multiplier", bs->local_detect_mult, bs->remote_detect_mult); + s = format (s, "%10s %-32s %20llu\n", "", "Detection Time (usec)", + bfd_nsec_to_usec (bs->detection_time_nsec)); s = format (s, "%10s %-32s %20u %20llu\n", "", "Required Min Rx Interval (usec)", bs->config_required_min_rx_usec, bs->remote_min_rx_usec); @@ -363,6 +393,7 @@ VLIB_CLI_COMMAND (bfd_cli_key_del_command, static) = { #define DETECT_MULT_STR "detect-mult" #define ADMIN_STR "admin" #define DELAYED_STR "delayed" +#define MULTIHOP_STR "multihop" static const unsigned mandatory = 1; static const unsigned optional = 0; @@ -401,7 +432,8 @@ bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_add_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -433,6 +465,7 @@ bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input, } foreach_bfd_cli_udp_session_add_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK if (1 == have_conf_key_id + have_bfd_key_id) { @@ -456,11 +489,9 @@ bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input, goto out; } - vnet_api_error_t rv = - bfd_udp_add_session (sw_if_index, &local_addr, &peer_addr, desired_min_tx, - required_min_rx, - detect_mult, have_conf_key_id, conf_key_id, - bfd_key_id); + vnet_api_error_t rv = bfd_udp_add_session ( + multihop, sw_if_index, &local_addr, &peer_addr, desired_min_tx, + required_min_rx, detect_mult, have_conf_key_id, conf_key_id, bfd_key_id); if (rv) { ret = @@ -477,16 +508,16 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = { .path = "bfd udp session add", .short_help = "bfd udp session add" - " interface <interface>" - " local-addr <local-address>" - " peer-addr <peer-address>" - " desired-min-tx <desired min tx interval>" - " required-min-rx <required min rx interval>" - " detect-mult <detect multiplier> " - "[" - " conf-key-id <config key ID>" - " bfd-key-id <BFD key ID>" - "]", + " <multihop | interface <interface>>" + " local-addr <local-address>" + " peer-addr <peer-address>" + " desired-min-tx <desired min tx interval>" + " required-min-rx <required min rx interval>" + " detect-mult <detect multiplier> " + "[" + " conf-key-id <config key ID>" + " bfd-key-id <BFD key ID>" + "]", .function = bfd_cli_udp_session_add, }; @@ -497,7 +528,8 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_mod_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -527,6 +559,7 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input, } foreach_bfd_cli_udp_session_mod_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK if (detect_mult > 255) { @@ -536,7 +569,7 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input, } vnet_api_error_t rv = - bfd_udp_mod_session (sw_if_index, &local_addr, &peer_addr, + bfd_udp_mod_session (multihop, sw_if_index, &local_addr, &peer_addr, desired_min_tx, required_min_rx, detect_mult); if (rv) { @@ -553,13 +586,13 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = { .path = "bfd udp session mod", - .short_help = "bfd udp session mod interface" - " <interface> local-addr" - " <local-address> peer-addr" - " <peer-address> desired-min-tx" - " <desired min tx interval> required-min-rx" - " <required min rx interval> detect-mult" - " <detect multiplier> ", + .short_help = "bfd udp session mod " + " <multihop | interface <interface>>" + " <local-address> peer-addr" + " <peer-address> desired-min-tx" + " <desired min tx interval> required-min-rx" + " <required min rx interval> detect-mult" + " <detect multiplier> ", .function = bfd_cli_udp_session_mod, }; @@ -570,7 +603,8 @@ bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_del_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -597,9 +631,10 @@ bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input, } foreach_bfd_cli_udp_session_del_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK vnet_api_error_t rv = - bfd_udp_del_session (sw_if_index, &local_addr, &peer_addr); + bfd_udp_del_session (multihop, sw_if_index, &local_addr, &peer_addr); if (rv) { ret = @@ -615,10 +650,10 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = { .path = "bfd udp session del", - .short_help = "bfd udp session del interface" - " <interface> local-addr" - " <local-address> peer-addr" - "<peer-address> ", + .short_help = "bfd udp session del <multihop |" + " interface <interface>> local-addr" + " <local-address> peer-addr" + "<peer-address> ", .function = bfd_cli_udp_session_del, }; @@ -629,7 +664,8 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -658,6 +694,7 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input, } foreach_bfd_cli_udp_session_set_flags_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK u8 admin_up_down; static const char up[] = "up"; @@ -677,9 +714,8 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input, ADMIN_STR, admin_up_down_token); goto out; } - vnet_api_error_t rv = - bfd_udp_session_set_flags (vm, sw_if_index, &local_addr, - &peer_addr, admin_up_down); + vnet_api_error_t rv = bfd_udp_session_set_flags ( + vm, multihop, sw_if_index, &local_addr, &peer_addr, admin_up_down); if (rv) { ret = @@ -696,10 +732,10 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = { .path = "bfd udp session set-flags", .short_help = "bfd udp session set-flags" - " interface <interface>" - " local-addr <local-address>" - " peer-addr <peer-address>" - " admin <up|down>", + " <multihop | interface <interface>>" + " local-addr <local-address>" + " peer-addr <peer-address>" + " admin <up|down>", .function = bfd_cli_udp_session_set_flags, }; @@ -711,7 +747,8 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -741,6 +778,7 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm, } foreach_bfd_cli_udp_session_auth_activate_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK u8 is_delayed = 0; if (have_delayed_token) @@ -773,8 +811,8 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm, } vnet_api_error_t rv = - bfd_udp_auth_activate (sw_if_index, &local_addr, &peer_addr, conf_key_id, - bfd_key_id, is_delayed); + bfd_udp_auth_activate (multihop, sw_if_index, &local_addr, &peer_addr, + conf_key_id, bfd_key_id, is_delayed); if (rv) { ret = @@ -791,12 +829,12 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_activate_command, static) = { .path = "bfd udp session auth activate", .short_help = "bfd udp session auth activate" - " interface <interface>" - " local-addr <local-address>" - " peer-addr <peer-address>" - " conf-key-id <config key ID>" - " bfd-key-id <BFD key ID>" - " [ delayed <yes|no> ]", + " <multihop | interface <interface>>" + " local-addr <local-address>" + " peer-addr <peer-address>" + " conf-key-id <config key ID>" + " bfd-key-id <BFD key ID>" + " [ delayed <yes|no> ]", .function = bfd_cli_udp_session_auth_activate, }; @@ -807,7 +845,8 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -835,6 +874,7 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input, } foreach_bfd_cli_udp_session_auth_deactivate_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK u8 is_delayed = 0; if (have_delayed_token) @@ -858,8 +898,8 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input, } } - vnet_api_error_t rv = bfd_udp_auth_deactivate (sw_if_index, &local_addr, - &peer_addr, is_delayed); + vnet_api_error_t rv = bfd_udp_auth_deactivate ( + multihop, sw_if_index, &local_addr, &peer_addr, is_delayed); if (rv) { ret = clib_error_return ( @@ -875,10 +915,10 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = { .path = "bfd udp session auth deactivate", .short_help = "bfd udp session auth deactivate" - " interface <interface>" - " local-addr <local-address>" - " peer-addr <peer-address>" - "[ delayed <yes|no> ]", + " <multihop | interface <interface>>" + " local-addr <local-address>" + " peer-addr <peer-address>" + "[ delayed <yes|no> ]", .function = bfd_cli_udp_session_auth_deactivate, }; diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 1423da91158..4ad0a16830f 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -30,6 +30,20 @@ #include <vlib/log.h> #include <vnet/crypto/crypto.h> +const char * +bfd_hop_type_string (bfd_hop_type_e hoptype) +{ + switch (hoptype) + { +#define F(x) \ + case BFD_HOP_TYPE_##x: \ + return "BFD_HOP_TYPE_" #x; + foreach_bfd_hop (F) +#undef F + } + return "UNKNOWN"; +} + static void bfd_validate_counters (bfd_main_t *bm) { @@ -1353,6 +1367,8 @@ VLIB_REGISTER_NODE (bfd_process_node, static) = [BFD_TX_IP6_REWRITE] = "ip6-rewrite", [BFD_TX_IP4_MIDCHAIN] = "ip4-midchain", [BFD_TX_IP6_MIDCHAIN] = "ip6-midchain", + [BFD_TX_IP4_LOOKUP] = "ip4-lookup", + [BFD_TX_IP6_LOOKUP] = "ip6-lookup", } }; // clang-format on @@ -2049,29 +2065,29 @@ u8 * format_bfd_session (u8 * s, va_list * args) { const bfd_session_t *bs = va_arg (*args, bfd_session_t *); - s = format (s, "bs_idx=%u local-state=%s remote-state=%s\n" - "local-discriminator=%u remote-discriminator=%u\n" - "local-diag=%s echo-active=%s\n" - "desired-min-tx=%u required-min-rx=%u\n" - "required-min-echo-rx=%u detect-mult=%u\n" - "remote-min-rx=%u remote-min-echo-rx=%u\n" - "remote-demand=%s poll-state=%s\n" - "auth: local-seq-num=%u remote-seq-num=%u\n" - " is-delayed=%s\n" - " curr-key=%U\n" - " next-key=%U", - bs->bs_idx, bfd_state_string (bs->local_state), - bfd_state_string (bs->remote_state), bs->local_discr, - bs->remote_discr, bfd_diag_code_string (bs->local_diag), - (bs->echo ? "yes" : "no"), bs->config_desired_min_tx_usec, - bs->config_required_min_rx_usec, 1, bs->local_detect_mult, - bs->remote_min_rx_usec, bs->remote_min_echo_rx_usec, - (bs->remote_demand ? "yes" : "no"), - bfd_poll_state_string (bs->poll_state), - bs->auth.local_seq_number, bs->auth.remote_seq_number, - (bs->auth.is_delayed ? "yes" : "no"), - format_bfd_auth_key, bs->auth.curr_key, format_bfd_auth_key, - bs->auth.next_key); + s = format ( + s, + "bs_idx=%u hop-type=%s local-state=%s remote-state=%s\n" + "local-discriminator=%u remote-discriminator=%u\n" + "local-diag=%s echo-active=%s\n" + "desired-min-tx=%u required-min-rx=%u\n" + "required-min-echo-rx=%u detect-mult=%u\n" + "remote-min-rx=%u remote-min-echo-rx=%u\n" + "remote-demand=%s poll-state=%s\n" + "auth: local-seq-num=%u remote-seq-num=%u\n" + " is-delayed=%s\n" + " curr-key=%U\n" + " next-key=%U", + bs->bs_idx, bfd_hop_type_string (bs->hop_type), + bfd_state_string (bs->local_state), bfd_state_string (bs->remote_state), + bs->local_discr, bs->remote_discr, bfd_diag_code_string (bs->local_diag), + (bs->echo ? "yes" : "no"), bs->config_desired_min_tx_usec, + bs->config_required_min_rx_usec, 1, bs->local_detect_mult, + bs->remote_min_rx_usec, bs->remote_min_echo_rx_usec, + (bs->remote_demand ? "yes" : "no"), bfd_poll_state_string (bs->poll_state), + bs->auth.local_seq_number, bs->auth.remote_seq_number, + (bs->auth.is_delayed ? "yes" : "no"), format_bfd_auth_key, + bs->auth.curr_key, format_bfd_auth_key, bs->auth.next_key); return s; } diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index 1d4617e1d7c..7d9253983ce 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -71,13 +71,13 @@ typedef enum /** * hop types */ -#define foreach_bfd_hop(F) \ - F (SINGLE, "single") \ - F (MULTI, "multi") \ +#define foreach_bfd_hop(F) \ + F (SINGLE) \ + F (MULTI) typedef enum { -#define F(sym, str) BFD_HOP_TYPE_##sym, +#define F(sym) BFD_HOP_TYPE_##sym, foreach_bfd_hop (F) #undef F } bfd_hop_type_e; @@ -318,6 +318,12 @@ typedef struct /** vector of callback notification functions */ bfd_notify_fn_t *listeners; + /** + * true if multihop support is enabled so sw_if_index of ~0 + * represents a multihop session + */ + bool multihop_enabled; + /** log class */ vlib_log_class_t log_class; @@ -449,6 +455,7 @@ vnet_api_error_t bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, u32 bfd_nsec_to_usec (u64 nsec); const char *bfd_poll_state_string (bfd_poll_state_e state); +const char *bfd_hop_type_string (bfd_hop_type_e state); #define USEC_PER_MS (1000LL) #define MSEC_PER_SEC (1000LL) @@ -482,6 +489,8 @@ typedef enum BFD_TX_IP6_REWRITE, BFD_TX_IP4_MIDCHAIN, BFD_TX_IP6_MIDCHAIN, + BFD_TX_IP4_LOOKUP, + BFD_TX_IP6_LOOKUP, BFD_TX_N_NEXT, } bfd_tx_next_t; diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index ec42cda1bc4..6d3202cc55c 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -64,12 +64,18 @@ typedef struct u32 echo_source_sw_if_index; /* log class */ vlib_log_class_t log_class; - /* number of active udp4 sessions */ - u32 udp4_sessions_count; - u32 udp4_sessions_count_stat_seg_entry; - /* number of active udp6 sessions */ - u32 udp6_sessions_count; - u32 udp6_sessions_count_stat_seg_entry; + /* number of active udp4 single-hop sessions */ + u32 udp4_sh_sessions_count; + u32 udp4_sh_sessions_count_stat_seg_entry; + /* number of active udp6 single-hop sessions */ + u32 udp6_sh_sessions_count; + u32 udp6_sh_sessions_count_stat_seg_entry; + /* number of active udp4 multi-hop sessions */ + u32 udp4_mh_sessions_count; + u32 udp4_mh_sessions_count_stat_seg_entry; + /* number of active udp6 multi-hop sessions */ + u32 udp6_mh_sessions_count; + u32 udp6_mh_sessions_count_stat_seg_entry; } bfd_udp_main_t; static vlib_node_registration_t bfd_udp4_input_node; @@ -258,8 +264,11 @@ bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, vlib_buffer_t *b = vlib_get_buffer (vm, bi); b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; - vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; - vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + if (bs->hop_type == BFD_HOP_TYPE_SINGLE) + { + vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + } vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; typedef struct @@ -290,7 +299,14 @@ bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, { headers->ip4.src_address.as_u32 = key->local_addr.ip4.as_u32; headers->ip4.dst_address.as_u32 = key->peer_addr.ip4.as_u32; - headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4_mh); + } + else + { + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + } } /* fix ip length, checksum and udp length */ @@ -313,8 +329,11 @@ bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, vlib_buffer_t *b = vlib_get_buffer (vm, bi); b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; - vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; - vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + if (bs->hop_type == BFD_HOP_TYPE_SINGLE) + { + vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + } vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; typedef struct @@ -350,7 +369,14 @@ bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, sizeof (headers->ip6.src_address)); clib_memcpy_fast (&headers->ip6.dst_address, &key->peer_addr.ip6, sizeof (headers->ip6.dst_address)); - headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6_mh); + } + else + { + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + } } /* fix ip payload length and udp length */ @@ -398,9 +424,25 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node) { vnet_main_t *vnm = vnet_get_main (); const bfd_udp_session_t *bus = &bs->udp; - ip_adjacency_t *adj = adj_get (bus->adj_index); - /* don't try to send the buffer if the interface is not up */ + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + *next_node = BFD_TX_IP4_LOOKUP; + return 1; + case BFD_TRANSPORT_UDP6: + *next_node = BFD_TX_IP6_LOOKUP; + return 1; + default: + /* drop */ + return 0; + } + } + + ip_adjacency_t *adj = adj_get (bus->adj_index); + /* For single-hop, don't try to send the buffer if the interface is not up */ if (!vnet_sw_interface_is_up (vnm, bus->key.sw_if_index)) return 0; @@ -495,7 +537,7 @@ bfd_udp_key_init (bfd_udp_key_t * key, u32 sw_if_index, const ip46_address_t * peer_addr) { clib_memset (key, 0, sizeof (*key)); - key->sw_if_index = sw_if_index; + key->sw_if_index = sw_if_index & 0xFFFF; key->local_addr.as_u64[0] = local_addr->as_u64[0]; key->local_addr.as_u64[1] = local_addr->as_u64[1]; key->peer_addr.as_u64[0] = peer_addr->as_u64[0]; @@ -503,12 +545,13 @@ bfd_udp_key_init (bfd_udp_key_t * key, u32 sw_if_index, } static vnet_api_error_t -bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum, - u32 sw_if_index, u32 desired_min_tx_usec, +bfd_udp_add_session_internal (vlib_main_t *vm, bfd_udp_main_t *bum, + bool multihop, u32 sw_if_index, + u32 desired_min_tx_usec, u32 required_min_rx_usec, u8 detect_mult, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - bfd_session_t ** bs_out) + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + bfd_session_t **bs_out) { /* get a pool entry and if we end up not needing it, give it back */ bfd_transport_e t = BFD_TRANSPORT_UDP4; @@ -536,8 +579,9 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum, return VNET_API_ERROR_BFD_EEXIST; } mhash_set (&bum->bfd_session_idx_by_bfd_key, key, bs->bs_idx, NULL); - BFD_DBG ("session created, bs_idx=%u, sw_if_index=%d, local=%U, peer=%U", - bs->bs_idx, key->sw_if_index, format_ip46_address, + BFD_DBG ("session created, bs_idx=%u, multihop=%u, sw_if_index=%d, " + "local=%U, peer=%U", + bs->bs_idx, multihop, key->sw_if_index, format_ip46_address, &key->local_addr, IP46_TYPE_ANY, format_ip46_address, &key->peer_addr, IP46_TYPE_ANY); vlib_log_info (bum->log_class, "create BFD session: %U", @@ -548,41 +592,82 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum, &key->peer_addr); if (BFD_TRANSPORT_UDP4 == t) { - bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4, - peer, key->sw_if_index); - BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) " - "returns %d", - format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index, - bus->adj_index); - ++bum->udp4_sessions_count; - bfd_udp_update_stat_segment_entry ( - bum->udp4_sessions_count_stat_seg_entry, bum->udp4_sessions_count); - if (1 == bum->udp4_sessions_count) + if (multihop) { - udp_register_dst_port (vm, UDP_DST_PORT_bfd4, - bfd_udp4_input_node.index, 1); - udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo4, - bfd_udp_echo4_input_node.index, 1); + ++bum->udp4_mh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp4_mh_sessions_count_stat_seg_entry, + bum->udp4_mh_sessions_count); + if (1 == bum->udp4_mh_sessions_count) + { + udp_register_dst_port (vm, UDP_DST_PORT_bfd4_mh, + bfd_udp4_input_node.index, 1); + } + } + else + { + bus->adj_index = adj_nbr_add_or_lock ( + FIB_PROTOCOL_IP4, VNET_LINK_IP4, peer, key->sw_if_index); + BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, " + " %U, %d) returns %d", + format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index, + bus->adj_index); + ++bum->udp4_sh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp4_sh_sessions_count_stat_seg_entry, + bum->udp4_sh_sessions_count); + if (1 == bum->udp4_sh_sessions_count) + { + udp_register_dst_port (vm, UDP_DST_PORT_bfd4, + bfd_udp4_input_node.index, 1); + udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo4, + bfd_udp_echo4_input_node.index, 1); + } } } else { - bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, - peer, key->sw_if_index); - BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, VNET_LINK_IP6, %U, %d) " - "returns %d", - format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index, - bus->adj_index); - ++bum->udp6_sessions_count; - bfd_udp_update_stat_segment_entry ( - bum->udp6_sessions_count_stat_seg_entry, bum->udp6_sessions_count); - if (1 == bum->udp6_sessions_count) + if (multihop) { - udp_register_dst_port (vm, UDP_DST_PORT_bfd6, - bfd_udp6_input_node.index, 0); - udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo6, - bfd_udp_echo6_input_node.index, 0); + ++bum->udp6_mh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp6_mh_sessions_count_stat_seg_entry, + bum->udp6_mh_sessions_count); + if (1 == bum->udp6_mh_sessions_count) + { + udp_register_dst_port (vm, UDP_DST_PORT_bfd6_mh, + bfd_udp6_input_node.index, 0); + } } + else + { + bus->adj_index = adj_nbr_add_or_lock ( + FIB_PROTOCOL_IP6, VNET_LINK_IP6, peer, key->sw_if_index); + BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, VNET_LINK_IP6, " + "%U, %d) returns %d", + format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index, + bus->adj_index); + ++bum->udp6_sh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp6_sh_sessions_count_stat_seg_entry, + bum->udp6_sh_sessions_count); + if (1 == bum->udp6_sh_sessions_count) + { + udp_register_dst_port (vm, UDP_DST_PORT_bfd6, + bfd_udp6_input_node.index, 0); + udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo6, + bfd_udp_echo6_input_node.index, 0); + } + } + } + + if (multihop) + { + bs->hop_type = BFD_HOP_TYPE_MULTI; + } + else + { + bs->hop_type = BFD_HOP_TYPE_SINGLE; } *bs_out = bs; return bfd_session_set_params (bum->bfd_main, bs, desired_min_tx_usec, @@ -590,20 +675,24 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum, } static vnet_api_error_t -bfd_udp_validate_api_input (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr) +bfd_udp_validate_api_input (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr) { bfd_udp_main_t *bum = &bfd_udp_main; - vnet_sw_interface_t *sw_if = - vnet_get_sw_interface_or_null (bfd_udp_main.vnet_main, sw_if_index); - if (!sw_if) + if (!multihop) { - vlib_log_err (bum->log_class, - "got NULL sw_if when getting interface by index %u", - sw_if_index); - return VNET_API_ERROR_INVALID_SW_IF_INDEX; + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface_or_null (bfd_udp_main.vnet_main, sw_if_index); + if (!sw_if) + { + vlib_log_err (bum->log_class, + "got NULL sw_if when getting interface by index %u", + sw_if_index); + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } } + if (ip46_address_is_ip4 (local_addr)) { if (!ip46_address_is_ip4 (peer_addr)) @@ -627,13 +716,13 @@ bfd_udp_validate_api_input (u32 sw_if_index, } static vnet_api_error_t -bfd_udp_find_session_by_api_input (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - bfd_session_t ** bs_out) +bfd_udp_find_session_by_api_input (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + bfd_session_t **bs_out) { vnet_api_error_t rv = - bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + bfd_udp_validate_api_input (multihop, sw_if_index, local_addr, peer_addr); if (!rv) { bfd_udp_main_t *bum = &bfd_udp_main; @@ -647,8 +736,9 @@ bfd_udp_find_session_by_api_input (u32 sw_if_index, else { vlib_log_err (bum->log_class, - "BFD session not found, sw_if_index=%u, local=%U, peer=%U", - sw_if_index, format_ip46_address, local_addr, + "BFD session not found, multihop=%d, sw_if_index=%u, " + "local=%U, peer=%U", + multihop, sw_if_index, format_ip46_address, local_addr, IP46_TYPE_ANY, format_ip46_address, peer_addr, IP46_TYPE_ANY); return VNET_API_ERROR_BFD_ENOENT; @@ -658,13 +748,13 @@ bfd_udp_find_session_by_api_input (u32 sw_if_index, } static vnet_api_error_t -bfd_api_verify_common (u32 sw_if_index, u32 desired_min_tx_usec, +bfd_api_verify_common (bool multihop, u32 sw_if_index, u32 desired_min_tx_usec, u8 detect_mult, const ip46_address_t *local_addr, const ip46_address_t *peer_addr) { bfd_udp_main_t *bum = &bfd_udp_main; vnet_api_error_t rv = - bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + bfd_udp_validate_api_input (multihop, sw_if_index, local_addr, peer_addr); if (rv) { return rv; @@ -693,31 +783,62 @@ bfd_udp_del_session_internal (vlib_main_t * vm, bfd_session_t * bs) switch (bs->transport) { case BFD_TRANSPORT_UDP4: - --bum->udp4_sessions_count; - bfd_udp_update_stat_segment_entry ( - bum->udp4_sessions_count_stat_seg_entry, bum->udp4_sessions_count); - if (!bum->udp4_sessions_count) + if (bs->hop_type == BFD_HOP_TYPE_MULTI) { - udp_unregister_dst_port (vm, UDP_DST_PORT_bfd4, 1); - udp_unregister_dst_port (vm, UDP_DST_PORT_bfd_echo4, 1); + --bum->udp4_mh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp4_mh_sessions_count_stat_seg_entry, + bum->udp4_mh_sessions_count); + if (!bum->udp4_mh_sessions_count) + { + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd4_mh, 1); + } + } + else + { + --bum->udp4_sh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp4_sh_sessions_count_stat_seg_entry, + bum->udp4_sh_sessions_count); + if (!bum->udp4_sh_sessions_count) + { + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd4, 1); + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd_echo4, 1); + } } break; case BFD_TRANSPORT_UDP6: - --bum->udp6_sessions_count; - bfd_udp_update_stat_segment_entry ( - bum->udp6_sessions_count_stat_seg_entry, bum->udp6_sessions_count); - if (!bum->udp6_sessions_count) + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + --bum->udp6_mh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp6_mh_sessions_count_stat_seg_entry, + bum->udp6_mh_sessions_count); + if (!bum->udp6_mh_sessions_count) + { + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd6_mh, 0); + } + } + else { - udp_unregister_dst_port (vm, UDP_DST_PORT_bfd6, 0); - udp_unregister_dst_port (vm, UDP_DST_PORT_bfd_echo6, 0); + --bum->udp6_sh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp6_sh_sessions_count_stat_seg_entry, + bum->udp6_sh_sessions_count); + if (!bum->udp6_sh_sessions_count) + { + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd6, 0); + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd_echo6, 0); + } } + break; } bfd_put_session (bum->bfd_main, bs); } static vnet_api_error_t -bfd_udp_add_and_start_session (u32 sw_if_index, +bfd_udp_add_and_start_session (bool multihop, u32 sw_if_index, const ip46_address_t *local_addr, const ip46_address_t *peer_addr, u32 desired_min_tx_usec, @@ -728,9 +849,10 @@ bfd_udp_add_and_start_session (u32 sw_if_index, bfd_session_t *bs = NULL; vnet_api_error_t rv; - rv = bfd_udp_add_session_internal ( - vlib_get_main (), &bfd_udp_main, sw_if_index, desired_min_tx_usec, - required_min_rx_usec, detect_mult, local_addr, peer_addr, &bs); + rv = bfd_udp_add_session_internal (vlib_get_main (), &bfd_udp_main, multihop, + sw_if_index, desired_min_tx_usec, + required_min_rx_usec, detect_mult, + local_addr, peer_addr, &bs); if (!rv && is_authenticated) { @@ -750,21 +872,22 @@ bfd_udp_add_and_start_session (u32 sw_if_index, } vnet_api_error_t -bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 desired_min_tx_usec, u32 required_min_rx_usec, - u8 detect_mult, u8 is_authenticated, u32 conf_key_id, - u8 bfd_key_id) +bfd_udp_add_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult, + u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id) { bfd_main_t *bm = &bfd_main; bfd_lock (bm); - vnet_api_error_t rv = bfd_api_verify_common ( - sw_if_index, desired_min_tx_usec, detect_mult, local_addr, peer_addr); + vnet_api_error_t rv = + bfd_api_verify_common (multihop, sw_if_index, desired_min_tx_usec, + detect_mult, local_addr, peer_addr); if (!rv) rv = bfd_udp_add_and_start_session ( - sw_if_index, local_addr, peer_addr, desired_min_tx_usec, + multihop, sw_if_index, local_addr, peer_addr, desired_min_tx_usec, required_min_rx_usec, detect_mult, is_authenticated, conf_key_id, bfd_key_id); @@ -773,7 +896,8 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, } vnet_api_error_t -bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr, +bfd_udp_upd_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, const ip46_address_t *peer_addr, u32 desired_min_tx_usec, u32 required_min_rx_usec, u8 detect_mult, u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id) @@ -781,17 +905,18 @@ bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr, bfd_main_t *bm = &bfd_main; bfd_lock (bm); - vnet_api_error_t rv = bfd_api_verify_common ( - sw_if_index, desired_min_tx_usec, detect_mult, local_addr, peer_addr); + vnet_api_error_t rv = + bfd_api_verify_common (multihop, sw_if_index, desired_min_tx_usec, + detect_mult, local_addr, peer_addr); if (!rv) { bfd_session_t *bs = NULL; - rv = bfd_udp_find_session_by_api_input (sw_if_index, local_addr, - peer_addr, &bs); + rv = bfd_udp_find_session_by_api_input (multihop, sw_if_index, + local_addr, peer_addr, &bs); if (VNET_API_ERROR_BFD_ENOENT == rv) rv = bfd_udp_add_and_start_session ( - sw_if_index, local_addr, peer_addr, desired_min_tx_usec, + multihop, sw_if_index, local_addr, peer_addr, desired_min_tx_usec, required_min_rx_usec, detect_mult, is_authenticated, conf_key_id, bfd_key_id); else @@ -805,7 +930,8 @@ bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr, } vnet_api_error_t -bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t *local_addr, +bfd_udp_mod_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, const ip46_address_t *peer_addr, u32 desired_min_tx_usec, u32 required_min_rx_usec, u8 detect_mult) { @@ -813,9 +939,8 @@ bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t *local_addr, bfd_main_t *bm = &bfd_main; vnet_api_error_t error; bfd_lock (bm); - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -830,16 +955,15 @@ bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t *local_addr, } vnet_api_error_t -bfd_udp_del_session (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr) +bfd_udp_del_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr) { bfd_session_t *bs = NULL; bfd_main_t *bm = &bfd_main; bfd_lock (bm); - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -851,16 +975,15 @@ bfd_udp_del_session (u32 sw_if_index, } vnet_api_error_t -bfd_udp_session_set_flags (vlib_main_t * vm, u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, u8 admin_up_down) +bfd_udp_session_set_flags (vlib_main_t *vm, bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u8 admin_up_down) { bfd_session_t *bs = NULL; bfd_main_t *bm = &bfd_main; bfd_lock (bm); - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -872,19 +995,18 @@ bfd_udp_session_set_flags (vlib_main_t * vm, u32 sw_if_index, } vnet_api_error_t -bfd_udp_auth_activate (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 conf_key_id, u8 key_id, u8 is_delayed) +bfd_udp_auth_activate (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u32 conf_key_id, + u8 key_id, u8 is_delayed) { bfd_main_t *bm = &bfd_main; bfd_lock (bm); vnet_api_error_t error; bfd_session_t *bs = NULL; - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -896,17 +1018,16 @@ bfd_udp_auth_activate (u32 sw_if_index, } vnet_api_error_t -bfd_udp_auth_deactivate (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, u8 is_delayed) +bfd_udp_auth_deactivate (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u8 is_delayed) { bfd_main_t *bm = &bfd_main; vnet_api_error_t error; bfd_lock (bm); bfd_session_t *bs = NULL; - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -985,13 +1106,19 @@ bfd_udp4_verify_transport (const ip4_header_t *ip4, const udp_header_t *udp, key->local_addr.ip4.as_u8); return BFD_UDP_ERROR_DST_MISMATCH; } - const u8 expected_ttl = 255; - if (ip4->ttl != expected_ttl) + + // For single-hop, TTL must be 255 + if (bs->hop_type == BFD_HOP_TYPE_SINGLE) { - BFD_ERR ("IPv4 unexpected TTL value %u, expected %u", ip4->ttl, - expected_ttl); - return BFD_UDP_ERROR_TTL; + const u8 expected_ttl = 255; + if (ip4->ttl != expected_ttl) + { + BFD_ERR ("IPv4 unexpected TTL value %u, expected %u", ip4->ttl, + expected_ttl); + return BFD_UDP_ERROR_TTL; + } } + if (clib_net_to_host_u16 (udp->src_port) < 49152) { BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", @@ -1062,7 +1189,14 @@ bfd_udp4_scan (vlib_main_t *vm, vlib_buffer_t *b, bfd_session_t **bs_out) { bfd_udp_key_t key; clib_memset (&key, 0, sizeof (key)); - key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + if (udp->dst_port == clib_host_to_net_u16 (UDP_DST_PORT_bfd4_mh)) + { + key.sw_if_index = ~0; + } + else + { + key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + } key.local_addr.ip4.as_u32 = ip4->dst_address.as_u32; key.peer_addr.ip4.as_u32 = ip4->src_address.as_u32; BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " @@ -1145,13 +1279,19 @@ bfd_udp6_verify_transport (const ip6_header_t *ip6, const udp_header_t *udp, &key->local_addr.ip6); return BFD_UDP_ERROR_DST_MISMATCH; } - const u8 expected_hop_limit = 255; - if (ip6->hop_limit != expected_hop_limit) + + // For single-hop, hop-limit must be 255 + if (bs->hop_type == BFD_HOP_TYPE_SINGLE) { - BFD_ERR ("IPv6 unexpected hop-limit value %u, expected %u", - ip6->hop_limit, expected_hop_limit); - return BFD_UDP_ERROR_TTL; + const u8 expected_hop_limit = 255; + if (ip6->hop_limit != expected_hop_limit) + { + BFD_ERR ("IPv6 unexpected hop-limit value %u, expected %u", + ip6->hop_limit, expected_hop_limit); + return BFD_UDP_ERROR_TTL; + } } + if (clib_net_to_host_u16 (udp->src_port) < 49152) { BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", @@ -1204,15 +1344,22 @@ bfd_udp6_scan (vlib_main_t *vm, vlib_buffer_t *b, bfd_session_t **bs_out) { bfd_udp_key_t key; clib_memset (&key, 0, sizeof (key)); - key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + if (udp->dst_port == clib_host_to_net_u16 (UDP_DST_PORT_bfd6_mh)) + { + key.sw_if_index = ~0; + } + else + { + key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + } key.local_addr.ip6.as_u64[0] = ip6->dst_address.as_u64[0]; key.local_addr.ip6.as_u64[1] = ip6->dst_address.as_u64[1]; key.peer_addr.ip6.as_u64[0] = ip6->src_address.as_u64[0]; key.peer_addr.ip6.as_u64[1] = ip6->src_address.as_u64[1]; - BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " - "peer=%U)", - key.sw_if_index, format_ip6_address, &key.local_addr, - format_ip6_address, &key.peer_addr); + BFD_DBG ("Looking up BFD session using discriminator %u", + pkt->your_disc); + bs = bfd_find_session_by_disc (bfd_udp_main.bfd_main, pkt->your_disc); + bs = bfd_lookup_session (&bfd_udp_main, &key); } if (!bs) @@ -1266,8 +1413,8 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, { u64 len; t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0)); - len = (b0->current_length < sizeof (t0->data)) ? b0->current_length - : sizeof (t0->data); + len = (b0->current_length < sizeof (t0->data)) ? b0->current_length : + sizeof (t0->data); t0->len = len; clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0), len); } @@ -1311,25 +1458,35 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_node_increment_counter (vm, bfd_udp4_input_node.index, error0, 1); } + const bfd_udp_session_t *bus = &bs->udp; - ip_adjacency_t *adj = adj_get (bus->adj_index); - switch (adj->lookup_next_index) + + if (bs->hop_type == BFD_HOP_TYPE_MULTI) { - case IP_LOOKUP_NEXT_ARP: - next0 = BFD_UDP_INPUT_NEXT_REPLY_ARP; - break; - case IP_LOOKUP_NEXT_REWRITE: next0 = BFD_UDP_INPUT_NEXT_REPLY_REWRITE; - break; - case IP_LOOKUP_NEXT_MIDCHAIN: - next0 = BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN; - break; - default: - /* drop */ - break; + } + else + { + ip_adjacency_t *adj = adj_get (bus->adj_index); + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + next0 = BFD_UDP_INPUT_NEXT_REPLY_ARP; + break; + case IP_LOOKUP_NEXT_REWRITE: + next0 = BFD_UDP_INPUT_NEXT_REPLY_REWRITE; + break; + case IP_LOOKUP_NEXT_MIDCHAIN: + next0 = BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN; + break; + default: + /* drop */ + break; + } } } } + bfd_unlock (bm); vlib_set_next_frame_buffer (vm, rt, next0, bi0); @@ -1566,6 +1723,10 @@ bfd_udp_sw_if_add_del (CLIB_UNUSED (vnet_main_t *vnm), u32 sw_if_index, { continue; } + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + continue; + } if (bs->udp.key.sw_if_index != sw_if_index) { continue; @@ -1593,24 +1754,47 @@ clib_error_t * bfd_udp_stats_init (bfd_udp_main_t *bum) { const char *name4 = "/bfd/udp4/sessions"; - bum->udp4_sessions_count_stat_seg_entry = vlib_stats_add_gauge ("%s", name4); + bum->udp4_sh_sessions_count_stat_seg_entry = + vlib_stats_add_gauge ("%s", name4); - vlib_stats_set_gauge (bum->udp4_sessions_count_stat_seg_entry, 0); - if (~0 == bum->udp4_sessions_count_stat_seg_entry) + vlib_stats_set_gauge (bum->udp4_sh_sessions_count_stat_seg_entry, 0); + if (~0 == bum->udp4_sh_sessions_count_stat_seg_entry) { return clib_error_return ( 0, "Could not create stat segment entry for %s", name4); } const char *name6 = "/bfd/udp6/sessions"; - bum->udp6_sessions_count_stat_seg_entry = vlib_stats_add_gauge ("%s", name6); + bum->udp6_sh_sessions_count_stat_seg_entry = + vlib_stats_add_gauge ("%s", name6); - vlib_stats_set_gauge (bum->udp6_sessions_count_stat_seg_entry, 0); - if (~0 == bum->udp6_sessions_count_stat_seg_entry) + vlib_stats_set_gauge (bum->udp6_sh_sessions_count_stat_seg_entry, 0); + if (~0 == bum->udp6_sh_sessions_count_stat_seg_entry) { return clib_error_return ( 0, "Could not create stat segment entry for %s", name6); } + const char *name4_mh = "/bfd/udp4/sessions_mh"; + bum->udp4_mh_sessions_count_stat_seg_entry = + vlib_stats_add_gauge ("%s", name4_mh); + + vlib_stats_set_gauge (bum->udp4_mh_sessions_count_stat_seg_entry, 0); + if (~0 == bum->udp4_mh_sessions_count_stat_seg_entry) + { + return clib_error_return ( + 0, "Could not create stat segment entry for %s", name4_mh); + } + const char *name6_mh = "/bfd/udp6/sessions_mh"; + bum->udp6_mh_sessions_count_stat_seg_entry = + vlib_stats_add_gauge ("%s", name6_mh); + + vlib_stats_set_gauge (bum->udp6_mh_sessions_count_stat_seg_entry, 0); + if (~0 == bum->udp6_mh_sessions_count_stat_seg_entry) + { + return clib_error_return ( + 0, "Could not create stat segment entry for %s", name6_mh); + } + return 0; } @@ -1620,8 +1804,10 @@ bfd_udp_stats_init (bfd_udp_main_t *bum) static clib_error_t * bfd_udp_init (vlib_main_t * vm) { - bfd_udp_main.udp4_sessions_count = 0; - bfd_udp_main.udp6_sessions_count = 0; + bfd_udp_main.udp4_sh_sessions_count = 0; + bfd_udp_main.udp6_sh_sessions_count = 0; + bfd_udp_main.udp4_mh_sessions_count = 0; + bfd_udp_main.udp6_mh_sessions_count = 0; mhash_init (&bfd_udp_main.bfd_session_idx_by_bfd_key, sizeof (uword), sizeof (bfd_udp_key_t)); bfd_udp_main.bfd_main = &bfd_main; diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index 8f4bfee2bd7..362e9541dfe 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -26,12 +26,10 @@ /** identifier of BFD session based on UDP transport only */ typedef CLIB_PACKED (struct { - union { - /** interface to which the session is tied - single-hop */ - u32 sw_if_index; - /** the FIB index the peer is in - multi-hop*/ - u32 fib_index; - }; + /** interface to which the session is tied - single-hop */ + u16 sw_if_index; + /** the FIB index the peer is in - multi-hop*/ + u16 fib_index; /** local address */ ip46_address_t local_addr; /** peer address */ diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 2f34aa4b5fc..247af56f403 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -219,16 +219,12 @@ typedef struct struct { /* input variables */ - struct - { - u32 next_index; /* index of next node - used by custom apps */ - u32 error_next_index; /* index of next node if error - used by custom apps */ - }; + u32 next_index; /* index of next node - used by custom apps */ + u32 error_next_index; /* index of next node if error - used by + custom apps */ + u8 _save_rewrite_length; /* handoff variables */ - struct - { - u16 owner_thread_index; - }; + u16 owner_thread_index; }; /* output variables */ struct @@ -245,7 +241,8 @@ typedef struct u8 ip_proto; /* protocol in ip header */ u8 icmp_type_or_tcp_flags; u8 is_non_first_fragment : 1; - u8 l4_layer_truncated : 7; + u8 l4_hdr_truncated : 1; + u8 unused : 6; u32 tcp_seq_number; }; /* full reassembly output variables */ @@ -422,25 +419,26 @@ typedef struct STATIC_ASSERT (VNET_REWRITE_TOTAL_BYTES <= VLIB_BUFFER_PRE_DATA_SIZE, "VNET_REWRITE_TOTAL_BYTES too big"); -STATIC_ASSERT (STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) - == STRUCT_SIZE_OF (vnet_buffer_opaque_t, - ip.reass.save_rewrite_length) - && STRUCT_SIZE_OF (vnet_buffer_opaque_t, - ip.reass.save_rewrite_length) == - STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) - && STRUCT_SIZE_OF (vnet_buffer_opaque_t, - mpls.save_rewrite_length) == 1 - && VNET_REWRITE_TOTAL_BYTES < UINT8_MAX, - "save_rewrite_length member must be able to hold the max value of rewrite length"); - -STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) - == STRUCT_OFFSET_OF (vnet_buffer_opaque_t, - ip.reass.save_rewrite_length) - && STRUCT_OFFSET_OF (vnet_buffer_opaque_t, - mpls.save_rewrite_length) == - STRUCT_OFFSET_OF (vnet_buffer_opaque_t, - ip.reass.save_rewrite_length), - "save_rewrite_length must be aligned so that reass doesn't overwrite it"); +STATIC_ASSERT ( + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) == + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.reass.save_rewrite_length) && + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) == + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.reass._save_rewrite_length) && + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.reass.save_rewrite_length) == + STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) && + STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) == 1 && + VNET_REWRITE_TOTAL_BYTES < UINT8_MAX, + "save_rewrite_length member must be able to hold the max value of rewrite " + "length"); + +STATIC_ASSERT ( + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) == + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.reass.save_rewrite_length) && + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) == + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.reass._save_rewrite_length) && + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) == + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.reass.save_rewrite_length), + "save_rewrite_length must be aligned so that reass doesn't overwrite it"); /* * The opaque field of the vlib_buffer_t is interpreted as a @@ -495,7 +493,22 @@ typedef struct }; } nat; - u32 unused[8]; + struct + { + /* + * Shallow virtual reassembly output values. + * Only populated if extended reassembly enabled via + * ipX_sv_reass_enable_disable_extended(). + */ + struct + { + u32 thread_index; + u32 pool_index; + u32 id; + } reass; + } ip; + + u32 unused[5]; } vnet_buffer_opaque2_t; #define vnet_buffer2(b) ((vnet_buffer_opaque2_t *) (b)->opaque2) diff --git a/src/vnet/dev/api.c b/src/vnet/dev/api.c index 114b63d6662..d968f66c316 100644 --- a/src/vnet/dev/api.c +++ b/src/vnet/dev/api.c @@ -156,6 +156,7 @@ vnet_dev_api_create_port_if (vlib_main_t *vm, { vnet_dev_t *dev = vnet_dev_by_index (args->dev_index); vnet_dev_port_t *port = 0; + vnet_dev_port_if_create_args_t a = {}; u16 n_threads = vlib_get_n_threads (); int default_is_intr_mode; vnet_dev_rv_t rv; @@ -181,7 +182,7 @@ vnet_dev_api_create_port_if (vlib_main_t *vm, if (!port) return VNET_DEV_ERR_INVALID_DEVICE_ID; - if (port->interface_created) + if (port->interfaces) return VNET_DEV_ERR_ALREADY_EXISTS; if (args->args) @@ -202,45 +203,82 @@ vnet_dev_api_create_port_if (vlib_main_t *vm, { if (args->num_rx_queues > port->attr.max_rx_queues) return VNET_DEV_ERR_INVALID_NUM_RX_QUEUES; - port->intf.num_rx_queues = args->num_rx_queues; + a.num_rx_queues = args->num_rx_queues; } else - port->intf.num_rx_queues = clib_min (port->attr.max_tx_queues, 1); + a.num_rx_queues = clib_min (port->attr.max_tx_queues, 1); if (args->num_tx_queues) { if (args->num_tx_queues > port->attr.max_tx_queues) return VNET_DEV_ERR_INVALID_NUM_TX_QUEUES; - port->intf.num_tx_queues = args->num_tx_queues; + a.num_tx_queues = args->num_tx_queues; } else - port->intf.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads); + a.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads); if (args->rx_queue_size) { if (!_vnet_dev_queue_size_validate (args->rx_queue_size, port->rx_queue_config)) return VNET_DEV_ERR_INVALID_RX_QUEUE_SIZE; - port->intf.rxq_sz = args->rx_queue_size; + a.rxq_sz = args->rx_queue_size; } else - port->intf.rxq_sz = port->rx_queue_config.default_size; + a.rxq_sz = port->rx_queue_config.default_size; if (args->tx_queue_size) { if (!_vnet_dev_queue_size_validate (args->tx_queue_size, port->tx_queue_config)) return VNET_DEV_ERR_INVALID_TX_QUEUE_SIZE; - port->intf.txq_sz = args->tx_queue_size; + a.txq_sz = args->tx_queue_size; } else - port->intf.txq_sz = port->tx_queue_config.default_size; + a.txq_sz = port->tx_queue_config.default_size; - clib_memcpy (port->intf.name, args->intf_name, sizeof (port->intf.name)); - port->intf.default_is_intr_mode = default_is_intr_mode; + clib_memcpy (a.name, args->intf_name, sizeof (a.name)); + a.default_is_intr_mode = default_is_intr_mode; + a.consistent_qp = (args->flags.n & VNET_DEV_PORT_F_CONSISTENT_QP) != 0; - rv = vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_create); - args->sw_if_index = (rv == VNET_DEV_OK) ? port->intf.sw_if_index : ~0; + rv = vnet_dev_process_call_port_op_with_ptr (vm, port, + vnet_dev_port_if_create, &a); + args->sw_if_index = (rv == VNET_DEV_OK) ? a.sw_if_index : ~0; + + return rv; +} + +vnet_dev_rv_t +vnet_dev_api_port_add_sec_if (vlib_main_t *vm, + vnet_dev_api_port_add_sec_if_args_t *args) +{ + vnet_dev_port_t *port = 0; + vnet_dev_t *dev = 0; + vnet_dev_port_sec_if_create_args_t a = {}; + vnet_dev_rv_t rv = VNET_DEV_OK; + + port = vnet_dev_get_port_from_sw_if_index (args->primary_sw_if_index); + if (port == 0) + return VNET_DEV_ERR_NOT_FOUND; + + log_debug (dev, + "create_port_if: primary_sw_if_index %u intf_name '%s' " + "args '%v'", + args->primary_sw_if_index, args->intf_name, args->args); + + if (port->interfaces == 0) + return VNET_DEV_ERR_PRIMARY_INTERFACE_MISSING; + + clib_memcpy (a.name, args->intf_name, sizeof (a.name)); + a.args = args->args; + + rv = vnet_dev_process_call_port_op_with_ptr (vm, port, + vnet_dev_port_add_sec_if, &a); + + if (rv != VNET_DEV_OK) + args->sw_if_index = ~0; + else + args->sw_if_index = a.sw_if_index; return rv; } @@ -249,9 +287,23 @@ vnet_dev_rv_t vnet_dev_api_remove_port_if (vlib_main_t *vm, vnet_dev_api_remove_port_if_args_t *args) { + vnet_dev_port_t *port; + + port = vnet_dev_get_port_from_sw_if_index (args->sw_if_index); + + if (port == 0) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove); +} + +vnet_dev_rv_t +vnet_dev_api_port_del_sec_if (vlib_main_t *vm, + vnet_dev_api_port_del_sec_if_args_t *args) +{ vnet_dev_main_t *dm = &vnet_dev_main; vnet_main_t *vnm = vnet_get_main (); - vnet_sw_interface_t *si; + vnet_sw_interface_t *si, *sup_si; vnet_hw_interface_t *hi; vnet_dev_port_t *port; @@ -259,17 +311,26 @@ vnet_dev_api_remove_port_if (vlib_main_t *vm, if (!si) return VNET_DEV_ERR_UNKNOWN_INTERFACE; - hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index); + if (si->sup_sw_if_index == si->sw_if_index) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + sup_si = vnet_get_sw_interface_or_null (vnm, si->sup_sw_if_index); + if (!sup_si) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + hi = vnet_get_hw_interface_or_null (vnm, sup_si->hw_if_index); if (!hi) return VNET_DEV_ERR_UNKNOWN_INTERFACE; - if (pool_is_free_index (dm->ports_by_dev_instance, hi->dev_instance)) + if (pool_is_free_index (dm->dev_instances, hi->dev_instance)) return VNET_DEV_ERR_UNKNOWN_INTERFACE; port = vnet_dev_get_port_from_dev_instance (hi->dev_instance); - if (port->intf.hw_if_index != si->hw_if_index) + if (port->interfaces->primary_interface.hw_if_index != si->hw_if_index) return VNET_DEV_ERR_UNKNOWN_INTERFACE; - return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove); + return vnet_dev_process_call_port_op_with_ptr ( + vm, port, vnet_dev_port_del_sec_if, + &(vnet_dev_port_del_sec_if_args_t){ .sw_if_index = args->sw_if_index }); } diff --git a/src/vnet/dev/api.h b/src/vnet/dev/api.h index 1b7bf27d62a..3e552e4326e 100644 --- a/src/vnet/dev/api.h +++ b/src/vnet/dev/api.h @@ -65,4 +65,27 @@ vnet_dev_rv_t vnet_dev_api_remove_port_if (vlib_main_t *, vnet_dev_api_remove_port_if_args_t *); +typedef struct +{ + u32 primary_sw_if_index; + vnet_dev_if_name_t intf_name; + u8 *args; + + /* return */ + u32 sw_if_index; +} vnet_dev_api_port_add_sec_if_args_t; + +vnet_dev_rv_t +vnet_dev_api_port_add_sec_if (vlib_main_t *, + vnet_dev_api_port_add_sec_if_args_t *); + +typedef struct +{ + u32 sw_if_index; +} vnet_dev_api_port_del_sec_if_args_t; + +vnet_dev_rv_t +vnet_dev_api_port_del_sec_if (vlib_main_t *, + vnet_dev_api_port_del_sec_if_args_t *); + #endif /* _VNET_DEV_API_H_ */ diff --git a/src/vnet/dev/cli.c b/src/vnet/dev/cli.c index 53be4483183..6002a2f0dee 100644 --- a/src/vnet/dev/cli.c +++ b/src/vnet/dev/cli.c @@ -223,6 +223,94 @@ VLIB_CLI_COMMAND (device_remove_if_cmd, static) = { }; static clib_error_t * +device_create_sec_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_api_port_add_sec_if_args_t a = {}; + vnet_main_t *vnm = vnet_get_main (); + vnet_dev_rv_t rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!a.intf_name[0] && + unformat (input, "if-name %U", unformat_c_string_array, a.intf_name, + sizeof (a.intf_name))) + ; + else if (unformat (input, "primary-if-name %U", + unformat_vnet_sw_interface, vnm, + &a.primary_sw_if_index)) + ; + else if (unformat (input, "primary-sw-if-index %u", + &a.primary_sw_if_index)) + ; + else if (!a.args && unformat (input, "args %v", &a.args)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + rv = vnet_dev_api_port_add_sec_if (vm, &a); + + vec_free (a.args); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to create secondary interface: %U", + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_create_sec_if_cmd, static) = { + .path = "device create-secondary-interface", + .short_help = "device create-secondary-interface [<interface-name> | " + "sw-if-index <n>] id <n> [args <sec-if-args>]", + .function = device_create_sec_if_cmd_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * +device_remove_sec_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_api_port_del_sec_if_args_t a = { .sw_if_index = ~0 }; + vnet_main_t *vnm = vnet_get_main (); + vnet_dev_rv_t rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, + &a.sw_if_index)) + ; + else if (unformat (input, "sw-if-index %u", &a.sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (a.sw_if_index == ~0) + return clib_error_return ( + 0, "please specify existing secondary interface name"); + + rv = vnet_dev_api_port_del_sec_if (vm, &a); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to remove secondary interface: %U", + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_remove_sec_if_cmd, static) = { + .path = "device remove-secondary-interface", + .short_help = + "device remove-secondary-interface [<interface-name> | sw-if-index <n>]", + .function = device_remove_sec_if_cmd_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * show_devices_cmd_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { @@ -300,22 +388,23 @@ show_device_counters_cmd_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_output (vm, "device '%s':", dev->device_id); foreach_vnet_dev_port (p, dev) { + vlib_cli_output (vm, " Port %u:", p->port_id); vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, p->counter_main); foreach_vnet_dev_port_rx_queue (q, p) if (q->counter_main) { - vlib_cli_output (vm, " RX queue %u:", q->queue_id); - vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, + vlib_cli_output (vm, " RX queue %u:", q->queue_id); + vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, q->counter_main); } foreach_vnet_dev_port_tx_queue (q, p) if (q->counter_main) { - vlib_cli_output (vm, " TX queue %u:", q->queue_id); - vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, + vlib_cli_output (vm, " TX queue %u:", q->queue_id); + vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, q->counter_main); } } diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c index d02839d664f..05cfc0ad290 100644 --- a/src/vnet/dev/counters.c +++ b/src/vnet/dev/counters.c @@ -89,6 +89,8 @@ format_vnet_dev_counter_name (u8 *s, va_list *va) char *units[] = { [VNET_DEV_CTR_UNIT_BYTES] = "bytes", [VNET_DEV_CTR_UNIT_PACKETS] = "packets", + [VNET_DEV_CTR_UNIT_DESCRIPTORS] = "descriptors", + [VNET_DEV_CTR_UNIT_BUFFERS] = "buffers", }; if (c->type == VNET_DEV_CTR_TYPE_VENDOR) diff --git a/src/vnet/dev/counters.h b/src/vnet/dev/counters.h index 33d08ffbecd..411ccdfb785 100644 --- a/src/vnet/dev/counters.h +++ b/src/vnet/dev/counters.h @@ -30,6 +30,8 @@ typedef enum VNET_DEV_CTR_UNIT_NA, VNET_DEV_CTR_UNIT_BYTES, VNET_DEV_CTR_UNIT_PACKETS, + VNET_DEV_CTR_UNIT_DESCRIPTORS, + VNET_DEV_CTR_UNIT_BUFFERS, } __clib_packed vnet_dev_counter_unit_t; typedef struct vnet_dev_counter diff --git a/src/vnet/dev/dev.api b/src/vnet/dev/dev.api index 552b778949b..423d0ee8505 100644 --- a/src/vnet/dev/dev.api +++ b/src/vnet/dev/dev.api @@ -12,6 +12,7 @@ enumflag dev_flags : u32 enumflag dev_port_flags : u32 { VL_API_DEV_PORT_FLAG_INTERRUPT_MODE = 0x1, + VL_API_DEV_PORT_FLAG_CONSISTENT_QP = 0x2, }; autoendian define dev_attach diff --git a/src/vnet/dev/dev.c b/src/vnet/dev/dev.c index e04fa161ce2..7954707dd32 100644 --- a/src/vnet/dev/dev.c +++ b/src/vnet/dev/dev.c @@ -130,7 +130,7 @@ vnet_dev_deinit (vlib_main_t *vm, vnet_dev_t *dev) vnet_dev_validate (vm, dev); foreach_vnet_dev_port (p, dev) - ASSERT (p->interface_created == 0); + ASSERT (p->interfaces == 0); if (dev->ops.deinit) dev->ops.deinit (vm, dev); @@ -188,7 +188,7 @@ void vnet_dev_detach (vlib_main_t *vm, vnet_dev_t *dev) { foreach_vnet_dev_port (p, dev) - if (p->interface_created) + if (p->interfaces) vnet_dev_port_if_remove (vm, p); vnet_dev_deinit (vm, dev); vnet_dev_free (vm, dev); @@ -260,6 +260,8 @@ vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable, vnet_feature_config_main_t *cm; vnet_dev_main_t *vdm = &vnet_dev_main; vnet_dev_port_t *port; + vnet_dev_port_interface_t *intf; + vnet_dev_instance_t *di; vnet_hw_interface_t *hw; u32 current_config_index = ~0; u32 next_index = ~0; @@ -269,9 +271,18 @@ vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable, return; hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - port = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + di = vnet_dev_get_dev_instance (hw->dev_instance); - if (port == 0 || port->intf.sw_if_index != sw_if_index) + if (!di) + return; + + intf = di->is_primary_if ? + vnet_dev_port_get_primary_if (di->port) : + vnet_dev_port_get_sec_if_by_index (di->port, di->sec_if_index); + + port = di->port; + + if (port == 0 || intf->sw_if_index != sw_if_index) return; if (vnet_have_features (arc_index, sw_if_index)) @@ -281,28 +292,27 @@ vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable, vec_elt (cm->config_index_by_sw_if_index, sw_if_index); vnet_get_config_data (&cm->config_main, ¤t_config_index, &next_index, 0); - if (port->intf.feature_arc == 0 || - port->intf.rx_next_index != next_index || - port->intf.current_config_index != current_config_index) + if (intf->feature_arc == 0 || intf->rx_next_index != next_index || + intf->current_config_index != current_config_index) { - port->intf.current_config_index = current_config_index; - port->intf.rx_next_index = next_index; - port->intf.feature_arc_index = arc_index; - port->intf.feature_arc = 1; + intf->current_config_index = current_config_index; + intf->rx_next_index = next_index; + intf->feature_arc_index = arc_index; + intf->feature_arc = 1; update_runtime = 1; } } else { - if (port->intf.feature_arc) + if (intf->feature_arc) { - port->intf.current_config_index = 0; - port->intf.rx_next_index = - port->intf.redirect_to_node ? - port->intf.redirect_to_node_next_index : - vnet_dev_default_next_index_by_port_type[port->attr.type]; - port->intf.feature_arc_index = 0; - port->intf.feature_arc = 0; + intf->current_config_index = 0; + intf->rx_next_index = + intf->redirect_to_node ? + intf->redirect_to_node_next_index : + vnet_dev_default_next_index_by_port_type[port->attr.type]; + intf->feature_arc_index = 0; + intf->feature_arc = 0; update_runtime = 1; } } diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h index eb06eeba34e..f3f7563317e 100644 --- a/src/vnet/dev/dev.h +++ b/src/vnet/dev/dev.h @@ -29,7 +29,8 @@ typedef enum _ (interrupt_mode) \ _ (rss) \ _ (change_max_rx_frame_size) \ - _ (mac_filter) + _ (mac_filter) \ + _ (secondary_interfaces) #define foreach_vnet_dev_port_rx_offloads _ (ip4_cksum) @@ -104,6 +105,11 @@ typedef void (vnet_dev_rx_queue_op_no_rv_t) (vlib_main_t *, vnet_dev_rx_queue_t *); typedef void (vnet_dev_tx_queue_op_no_rv_t) (vlib_main_t *, vnet_dev_tx_queue_t *); +typedef vnet_dev_rv_t (vnet_dev_op_with_ptr_t) (vlib_main_t *, vnet_dev_t *, + void *); +typedef vnet_dev_rv_t (vnet_dev_port_op_with_ptr_t) (vlib_main_t *, + vnet_dev_port_t *, + void *); typedef u16 vnet_dev_queue_id_t; typedef u16 vnet_dev_bus_index_t; @@ -248,6 +254,8 @@ typedef struct vnet_dev_port_op_no_rv_t *deinit; vnet_dev_port_op_no_rv_t *free; vnet_dev_port_op_no_rv_t *clear_counters; + vnet_dev_port_op_with_ptr_t *add_sec_if; + vnet_dev_port_op_with_ptr_t *del_sec_if; format_function_t *format_status; format_function_t *format_flow; } vnet_dev_port_ops_t; @@ -264,30 +272,41 @@ typedef union u8 as_number; } vnet_dev_rx_queue_rt_req_t; +typedef struct +{ + vlib_buffer_template_t buffer_template; + u32 sw_if_index; + u16 next_index; + u16 sec_if_index; +} vnet_dev_rx_queue_if_rt_data_t; + typedef struct vnet_dev_rx_queue { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); vnet_dev_port_t *port; u16 rx_thread_index; u16 index; - vnet_dev_counter_main_t *counter_main; - CLIB_CACHE_LINE_ALIGN_MARK (runtime0); - vnet_dev_rx_queue_t *next_on_thread; + u16 size; u8 interrupt_mode : 1; u8 enabled : 1; u8 started : 1; u8 suspended : 1; - vnet_dev_queue_id_t queue_id; - u16 size; - u16 next_index; vnet_dev_rx_queue_rt_req_t runtime_request; + vnet_dev_counter_main_t *counter_main; + vnet_dev_rx_queue_t *next_on_thread; + vnet_dev_queue_id_t queue_id; + vnet_dev_rx_queue_if_rt_data_t **sec_if_rt_data; CLIB_CACHE_LINE_ALIGN_MARK (runtime1); - vlib_buffer_template_t buffer_template; + vnet_dev_rx_queue_if_rt_data_t if_rt_data; CLIB_CACHE_LINE_ALIGN_MARK (driver_data); u8 data[]; } vnet_dev_rx_queue_t; +#if CLIB_CACHE_LINE_BYTES > 64 +STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 2 * CLIB_CACHE_LINE_BYTES); +#else STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 3 * CLIB_CACHE_LINE_BYTES); +#endif typedef struct vnet_dev_tx_queue { @@ -309,6 +328,38 @@ typedef struct vnet_dev_tx_queue STATIC_ASSERT_SIZEOF (vnet_dev_tx_queue_t, 2 * CLIB_CACHE_LINE_BYTES); +typedef struct +{ + vnet_dev_if_name_t name; + u8 interface_created : 1; + u8 feature_arc : 1; + u8 redirect_to_node : 1; + u8 feature_arc_index; + u16 rx_next_index; + u32 index; + u32 sw_if_index; + u32 hw_if_index; + u32 dev_instance; + u32 tx_node_index; + u32 next_index; + u32 current_config_index; + u16 redirect_to_node_next_index; + u32 user_data; + vnet_dev_arg_t *args; +} vnet_dev_port_interface_t; + +typedef struct +{ + u32 rx_node_index; + u8 default_is_intr_mode : 1; + u16 num_rx_queues; + u16 num_tx_queues; + u16 txq_sz; + u16 rxq_sz; + vnet_dev_port_interface_t primary_interface; + vnet_dev_port_interface_t **secondary_interfaces; +} vnet_dev_port_interfaces_t; + typedef struct vnet_dev_port { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -319,7 +370,6 @@ typedef struct vnet_dev_port u8 started : 1; u8 link_up : 1; u8 promisc : 1; - u8 interface_created : 1; u8 rx_node_assigned : 1; vnet_dev_counter_main_t *counter_main; vnet_dev_queue_config_t rx_queue_config; @@ -334,31 +384,12 @@ typedef struct vnet_dev_port vnet_dev_tx_queue_t **tx_queues; vnet_dev_port_ops_t port_ops; vnet_dev_arg_t *args; + vnet_dev_arg_t *sec_if_args; vnet_dev_rx_queue_ops_t rx_queue_ops; vnet_dev_tx_queue_ops_t tx_queue_ops; vnet_dev_node_t rx_node; vnet_dev_node_t tx_node; - - struct - { - vnet_dev_if_name_t name; - u32 dev_instance; - u32 rx_node_index; - u32 current_config_index; - u16 rx_next_index; - u16 redirect_to_node_next_index; - u8 feature_arc_index; - u8 feature_arc : 1; - u8 redirect_to_node : 1; - u8 default_is_intr_mode : 1; - u32 tx_node_index; - u32 hw_if_index; - u32 sw_if_index; - u16 num_rx_queues; - u16 num_tx_queues; - u16 txq_sz; - u16 rxq_sz; - } intf; + vnet_dev_port_interfaces_t *interfaces; CLIB_CACHE_LINE_ALIGN_MARK (data0); u8 data[]; @@ -456,10 +487,17 @@ typedef struct typedef struct { + vnet_dev_port_t *port; + u32 sec_if_index; + u8 is_primary_if : 1; +} vnet_dev_instance_t; + +typedef struct +{ vnet_dev_bus_t *buses; vnet_dev_driver_t *drivers; vnet_dev_t **devices; - vnet_dev_port_t **ports_by_dev_instance; + vnet_dev_instance_t *dev_instances; vnet_dev_bus_registration_t *bus_registrations; vnet_dev_driver_registration_t *driver_registrations; void *runtime_temp_spaces; @@ -482,6 +520,7 @@ typedef struct vnet_dev_port_attr_t attr; vnet_dev_port_ops_t ops; vnet_dev_arg_t *args; + vnet_dev_arg_t *sec_if_args; u16 data_size; void *initial_data; } port; @@ -531,11 +570,11 @@ format_function_t format_vnet_dev_args; /* dev.c */ vnet_dev_t *vnet_dev_alloc (vlib_main_t *, vnet_dev_device_id_t, vnet_dev_driver_t *); -void vnet_dev_free (vlib_main_t *, vnet_dev_t *); -vnet_dev_rv_t vnet_dev_init (vlib_main_t *, vnet_dev_t *); -void vnet_dev_deinit (vlib_main_t *, vnet_dev_t *); -vnet_dev_rv_t vnet_dev_reset (vlib_main_t *, vnet_dev_t *); -void vnet_dev_detach (vlib_main_t *, vnet_dev_t *); +vnet_dev_op_no_rv_t vnet_dev_free; +vnet_dev_op_t vnet_dev_init; +vnet_dev_op_no_rv_t vnet_dev_deinit; +vnet_dev_op_t vnet_dev_reset; +vnet_dev_op_no_rv_t vnet_dev_detach; vnet_dev_rv_t vnet_dev_port_add (vlib_main_t *, vnet_dev_t *, vnet_dev_port_id_t, vnet_dev_port_add_args_t *); @@ -567,51 +606,84 @@ void vnet_dev_clear_hw_interface_counters (u32); void vnet_dev_set_interface_next_node (vnet_main_t *, u32, u32); /* port.c */ -vnet_dev_rv_t vnet_dev_port_start (vlib_main_t *, vnet_dev_port_t *); -vnet_dev_rv_t vnet_dev_port_start_all_rx_queues (vlib_main_t *, - vnet_dev_port_t *); -vnet_dev_rv_t vnet_dev_port_start_all_tx_queues (vlib_main_t *, - vnet_dev_port_t *); -void vnet_dev_port_stop (vlib_main_t *, vnet_dev_port_t *); -void vnet_dev_port_deinit (vlib_main_t *, vnet_dev_port_t *); -void vnet_dev_port_free (vlib_main_t *, vnet_dev_port_t *); + +typedef struct +{ + vnet_dev_if_name_t name; + u16 num_rx_queues; + u16 num_tx_queues; + u16 rxq_sz; + u16 txq_sz; + u8 default_is_intr_mode : 1; + u8 consistent_qp : 1; + + /* return */ + u32 sw_if_index; +} vnet_dev_port_if_create_args_t; + +typedef struct +{ + vnet_dev_if_name_t name; + u8 *args; + + /* return */ + u32 sw_if_index; +} vnet_dev_port_sec_if_create_args_t; + +typedef struct +{ + u32 sw_if_index; +} vnet_dev_port_del_sec_if_args_t; + +vnet_dev_port_op_t vnet_dev_port_start; +vnet_dev_port_op_t vnet_dev_port_start_all_rx_queues; +vnet_dev_port_op_t vnet_dev_port_start_all_tx_queues; +vnet_dev_port_op_no_rv_t vnet_dev_port_stop; +vnet_dev_port_op_no_rv_t vnet_dev_port_deinit; +vnet_dev_port_op_no_rv_t vnet_dev_port_free; +vnet_dev_port_op_with_ptr_t vnet_dev_port_add_sec_if; +vnet_dev_port_op_with_ptr_t vnet_dev_port_del_sec_if; + void vnet_dev_port_add_counters (vlib_main_t *, vnet_dev_port_t *, vnet_dev_counter_t *, u16); -void vnet_dev_port_free_counters (vlib_main_t *, vnet_dev_port_t *); -void vnet_dev_port_update_tx_node_runtime (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_port_op_no_rv_t vnet_dev_port_free_counters; +vnet_dev_port_op_no_rv_t vnet_dev_port_update_tx_node_runtime; void vnet_dev_port_state_change (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_state_changes_t); -void vnet_dev_port_clear_counters (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_port_op_no_rv_t vnet_dev_port_clear_counters; vnet_dev_rv_t vnet_dev_port_cfg_change_req_validate (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *); vnet_dev_rv_t vnet_dev_port_cfg_change (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *); -vnet_dev_rv_t vnet_dev_port_if_create (vlib_main_t *, vnet_dev_port_t *); -vnet_dev_rv_t vnet_dev_port_if_remove (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_port_op_with_ptr_t vnet_dev_port_if_create; +vnet_dev_port_op_t vnet_dev_port_if_remove; /* queue.c */ vnet_dev_rv_t vnet_dev_rx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16); vnet_dev_rv_t vnet_dev_tx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16); -void vnet_dev_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *); -void vnet_dev_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_rx_queue_op_no_rv_t vnet_dev_rx_queue_free; +vnet_dev_tx_queue_op_no_rv_t vnet_dev_tx_queue_free; void vnet_dev_rx_queue_add_counters (vlib_main_t *, vnet_dev_rx_queue_t *, vnet_dev_counter_t *, u16); -void vnet_dev_rx_queue_free_counters (vlib_main_t *, vnet_dev_rx_queue_t *); +vnet_dev_rx_queue_op_no_rv_t vnet_dev_rx_queue_free_counters; void vnet_dev_tx_queue_add_counters (vlib_main_t *, vnet_dev_tx_queue_t *, vnet_dev_counter_t *, u16); -void vnet_dev_tx_queue_free_counters (vlib_main_t *, vnet_dev_tx_queue_t *); -vnet_dev_rv_t vnet_dev_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *); -vnet_dev_rv_t vnet_dev_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *); -void vnet_dev_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *); -void vnet_dev_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_tx_queue_op_no_rv_t vnet_dev_tx_queue_free_counters; +vnet_dev_rx_queue_op_t vnet_dev_rx_queue_start; +vnet_dev_tx_queue_op_t vnet_dev_tx_queue_start; +vnet_dev_rx_queue_op_no_rv_t vnet_dev_rx_queue_stop; +vnet_dev_tx_queue_op_no_rv_t vnet_dev_tx_queue_stop; /* process.c */ -vnet_dev_rv_t vnet_dev_process_create (vlib_main_t *, vnet_dev_t *); +vnet_dev_op_t vnet_dev_process_create; vnet_dev_rv_t vnet_dev_process_call_op (vlib_main_t *, vnet_dev_t *, vnet_dev_op_t *); vnet_dev_rv_t vnet_dev_process_call_op_no_rv (vlib_main_t *, vnet_dev_t *, vnet_dev_op_no_rv_t *); +vnet_dev_rv_t vnet_dev_process_call_op_with_ptr (vlib_main_t *, vnet_dev_t *, + vnet_dev_op_with_ptr_t *, + void *); void vnet_dev_process_call_op_no_wait (vlib_main_t *, vnet_dev_t *, vnet_dev_op_no_rv_t *); vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *, @@ -619,12 +691,15 @@ vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *, vnet_dev_rv_t vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *, vnet_dev_port_op_no_rv_t *); +vnet_dev_rv_t +vnet_dev_process_call_port_op_with_ptr (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_op_with_ptr_t *, void *); void vnet_dev_process_call_port_op_no_wait (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_op_no_rv_t *); vnet_dev_rv_t vnet_dev_process_port_cfg_change_req (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *); -void vnet_dev_process_quit (vlib_main_t *, vnet_dev_t *); +vnet_dev_op_no_rv_t vnet_dev_process_quit; void vnet_dev_poll_dev_add (vlib_main_t *, vnet_dev_t *, f64, vnet_dev_op_no_rv_t *); void vnet_dev_poll_dev_remove (vlib_main_t *, vnet_dev_t *, diff --git a/src/vnet/dev/dev_funcs.h b/src/vnet/dev/dev_funcs.h index 521157abbec..f47344b0cea 100644 --- a/src/vnet/dev/dev_funcs.h +++ b/src/vnet/dev/dev_funcs.h @@ -51,13 +51,33 @@ vnet_dev_get_port_by_index (vnet_dev_t *dev, u32 index) return pool_elt_at_index (dev->ports, index)[0]; } -static_always_inline vnet_dev_port_t * -vnet_dev_get_port_from_dev_instance (u32 dev_instance) +static_always_inline vnet_dev_instance_t * +vnet_dev_get_dev_instance (u32 dev_instance) { vnet_dev_main_t *dm = &vnet_dev_main; - if (pool_is_free_index (dm->ports_by_dev_instance, dev_instance)) + if (pool_is_free_index (dm->dev_instances, dev_instance)) return 0; - return pool_elt_at_index (dm->ports_by_dev_instance, dev_instance)[0]; + return pool_elt_at_index (dm->dev_instances, dev_instance); +} + +static_always_inline vnet_dev_port_interface_t * +vnet_dev_port_get_primary_if (vnet_dev_port_t *p) +{ + return &p->interfaces->primary_interface; +} + +static_always_inline vnet_dev_port_interface_t * +vnet_dev_port_get_sec_if_by_index (vnet_dev_port_t *p, u32 index) +{ + return *pool_elt_at_index (p->interfaces->secondary_interfaces, index); +} + +static_always_inline vnet_dev_port_t * +vnet_dev_get_port_from_dev_instance (u32 dev_instance) +{ + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (dev_instance); + + return di ? di->port : 0; } static_always_inline vnet_dev_port_t * @@ -68,12 +88,44 @@ vnet_dev_get_port_from_hw_if_index (u32 hw_if_index) hw = vnet_get_hw_interface (vnet_get_main (), hw_if_index); port = vnet_dev_get_port_from_dev_instance (hw->dev_instance); - if (!port || port->intf.hw_if_index != hw_if_index) + if (!port || !port->interfaces || + port->interfaces->primary_interface.hw_if_index != hw_if_index) return 0; return port; } +static_always_inline u32 +vnet_dev_get_rx_queue_if_sw_if_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->port->interfaces->primary_interface.sw_if_index; +} + +static_always_inline u32 +vnet_dev_get_rx_queue_if_hw_if_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->port->interfaces->primary_interface.hw_if_index; +} + +static_always_inline u32 +vnet_dev_get_port_rx_node_index (vnet_dev_port_t *port) +{ + return port->interfaces->rx_node_index; +} + +static_always_inline vnet_dev_port_t * +vnet_dev_get_port_from_sw_if_index (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *si; + + si = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (!si) + return 0; + + return vnet_dev_get_port_from_hw_if_index (si->hw_if_index); +} + static_always_inline vnet_dev_t * vnet_dev_by_index (u32 index) { @@ -128,12 +180,6 @@ vnet_dev_port_validate (vlib_main_t *vm, vnet_dev_port_t *port) ASSERT (vm->thread_index == 0); } -static_always_inline u32 -vnet_dev_port_get_sw_if_index (vnet_dev_port_t *port) -{ - return port->intf.sw_if_index; -} - static_always_inline vnet_dev_port_t * vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id) { @@ -144,7 +190,7 @@ vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id) } static_always_inline vnet_dev_rx_queue_t * -vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port, +vnet_dev_get_port_rx_queue_by_id (vnet_dev_port_t *port, vnet_dev_queue_id_t queue_id) { foreach_vnet_dev_port_rx_queue (q, port) @@ -154,7 +200,7 @@ vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port, } static_always_inline vnet_dev_tx_queue_t * -vnet_dev_port_get_tx_queue_by_id (vnet_dev_port_t *port, +vnet_dev_get_port_tx_queue_by_id (vnet_dev_port_t *port, vnet_dev_queue_id_t queue_id) { foreach_vnet_dev_port_tx_queue (q, port) @@ -199,10 +245,49 @@ vnet_dev_tx_queue_unlock_if_needed (vnet_dev_tx_queue_t *txq) __atomic_store_n (&txq->lock, 0, __ATOMIC_RELEASE); } +static_always_inline vnet_dev_rx_queue_if_rt_data_t * +vnet_dev_get_rx_queue_if_rt_data (vnet_dev_rx_queue_t *rxq) +{ + return &rxq->if_rt_data; +} + +static_always_inline vnet_dev_rx_queue_if_rt_data_t * +vnet_dev_get_rx_queue_sec_if_rt_data (vnet_dev_rx_queue_t *rxq, + u32 sec_if_index) +{ + return rxq->sec_if_rt_data[sec_if_index]; +} + +static_always_inline vlib_buffer_template_t +vnet_dev_get_rx_queue_if_buffer_template (vnet_dev_rx_queue_t *rxq) +{ + return rxq->if_rt_data.buffer_template; +} + +static_always_inline vlib_buffer_template_t +vnet_dev_get_rx_queue_sec_if_buffer_template (vnet_dev_rx_queue_t *rxq, + u32 sec_if_index) +{ + return rxq->sec_if_rt_data[sec_if_index]->buffer_template; +} + +static_always_inline u16 +vnet_dev_get_rx_queue_if_next_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->if_rt_data.next_index; +} + +static_always_inline u16 +vnet_dev_get_rx_queue_sec_if_next_index (vnet_dev_rx_queue_t *rxq, + u32 sec_if_index) +{ + return rxq->sec_if_rt_data[sec_if_index]->next_index; +} + static_always_inline u8 vnet_dev_get_rx_queue_buffer_pool_index (vnet_dev_rx_queue_t *rxq) { - return rxq->buffer_template.buffer_pool_index; + return rxq->if_rt_data.buffer_template.buffer_pool_index; } static_always_inline u32 @@ -237,8 +322,8 @@ static_always_inline vnet_dev_rx_queue_t * foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node, vnet_dev_rx_queue_t *rxq) { - vnet_dev_port_t *port; vnet_dev_rx_queue_rt_req_t req; + vnet_dev_port_interfaces_t *ifs; if (rxq == 0) rxq = vnet_dev_get_rx_node_runtime (node)->first_rx_queue; @@ -255,15 +340,34 @@ foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node, req.as_number = __atomic_exchange_n (&rxq->runtime_request.as_number, 0, __ATOMIC_ACQUIRE); - port = rxq->port; + ifs = rxq->port->interfaces; if (req.update_next_index) - rxq->next_index = port->intf.rx_next_index; + { + vnet_dev_port_interface_t **si = + rxq->port->interfaces->secondary_interfaces; + rxq->if_rt_data.next_index = ifs->primary_interface.rx_next_index; + vec_foreach_pointer (rtd, rxq->sec_if_rt_data) + if (rtd) + rtd->next_index = si[rtd->sec_if_index]->next_index; + } if (req.update_feature_arc) { - vlib_buffer_template_t *bt = &rxq->buffer_template; - bt->current_config_index = port->intf.current_config_index; - vnet_buffer (bt)->feature_arc_index = port->intf.feature_arc_index; + vnet_dev_port_interface_t **si = + rxq->port->interfaces->secondary_interfaces; + vlib_buffer_template_t *bt = &rxq->if_rt_data.buffer_template; + bt->current_config_index = ifs->primary_interface.current_config_index; + vnet_buffer (bt)->feature_arc_index = + ifs->primary_interface.feature_arc_index; + vec_foreach_pointer (rtd, rxq->sec_if_rt_data) + if (rtd) + { + vlib_buffer_template_t *bt = &rtd->buffer_template; + bt->current_config_index = + si[rtd->sec_if_index]->current_config_index; + vnet_buffer (bt)->feature_arc_index = + si[rtd->sec_if_index]->feature_arc_index; + } } if (req.suspend_on) diff --git a/src/vnet/dev/errors.h b/src/vnet/dev/errors.h index 6ececad12ec..243b10e698e 100644 --- a/src/vnet/dev/errors.h +++ b/src/vnet/dev/errors.h @@ -37,9 +37,12 @@ _ (TIMEOUT, "timeout") \ _ (UNKNOWN_DEVICE, "unknown device") \ _ (UNKNOWN_INTERFACE, "unknown interface") \ + _ (NOT_PRIMARY_INTERFACE, "not primary interface") \ + _ (PRIMARY_INTERFACE_MISSING, "primary interface missing") \ _ (UNSUPPORTED_CONFIG, "unsupported config") \ _ (UNSUPPORTED_DEVICE, "unsupported device") \ _ (UNSUPPORTED_DEVICE_VER, "unsupported device version") \ + _ (UNSUPPORTED_INTERFACE, "unsupported interface") \ _ (ALREADY_DONE, "already done") \ _ (NO_SUCH_INTERFACE, "no such interface") \ _ (INIT_FAILED, "init failed") diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c index f599c0f8b85..ffc4a3a70b4 100644 --- a/src/vnet/dev/format.c +++ b/src/vnet/dev/format.c @@ -44,9 +44,15 @@ u8 * format_vnet_dev_interface_name (u8 *s, va_list *args) { u32 i = va_arg (*args, u32); - vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (i); + vnet_dev_port_interface_t *si; + vnet_dev_port_t *p = di->port; + + if (di->is_primary_if) + return format (s, "%s", p->interfaces->primary_interface.name); - return format (s, "%s", port->intf.name); + si = vnet_dev_port_get_sec_if_by_index (p, di->sec_if_index); + return format (s, "%s", si->name); } u8 * @@ -138,11 +144,22 @@ format_vnet_dev_port_info (u8 *s, va_list *args) format_vnet_dev_args, port->args); s = format (s, "\n%UInterface ", format_white_space, indent); - if (port->interface_created) + if (port->interfaces) { - s = format (s, "assigned, interface name is '%U', RX node is '%U'", - format_vnet_sw_if_index_name, vnm, port->intf.sw_if_index, - format_vlib_node_name, vm, port->intf.rx_node_index); + s = format ( + s, "assigned, primary interface name is '%U', RX node is '%U'", + format_vnet_sw_if_index_name, vnm, + port->interfaces->primary_interface.sw_if_index, format_vlib_node_name, + vm, vnet_dev_get_port_rx_node_index (port)); + pool_foreach_pointer (sif, port->interfaces->secondary_interfaces) + { + s = format (s, "\n%USecondary interface '%U'", format_white_space, + indent, format_vnet_sw_if_index_name, vnm, + sif->sw_if_index); + if (sif->args) + s = format (s, "\n%U args '%U", format_white_space, indent, + format_vnet_dev_args, sif->args); + } } else s = format (s, "not assigned"); @@ -318,7 +335,7 @@ unformat_vnet_dev_port_flags (unformat_input_t *input, va_list *args) #undef _ }; u64 flag_values[] = { -#define _(b, n, d) 1ull << (b) +#define _(b, n, d) 1ull << (b), foreach_vnet_dev_port_flag #undef _ }; @@ -394,7 +411,7 @@ format_vnet_dev_port_flags (u8 *s, va_list *args) #undef _ }; u64 flag_values[] = { -#define _(b, n, d) 1ull << (b) +#define _(b, n, d) 1ull << (b), foreach_vnet_dev_port_flag #undef _ }; diff --git a/src/vnet/dev/handlers.c b/src/vnet/dev/handlers.c index 2a55affe3e3..bfacbe27c99 100644 --- a/src/vnet/dev/handlers.c +++ b/src/vnet/dev/handlers.c @@ -19,7 +19,8 @@ vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw, u32 frame_size) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hw->dev_instance); + vnet_dev_port_t *p; vnet_dev_rv_t rv; vnet_dev_port_cfg_change_req_t req = { @@ -27,6 +28,11 @@ vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw, .max_rx_frame_size = frame_size, }; + p = di->port; + + if (!di->is_primary_if) + return vnet_dev_port_err (vm, p, VNET_DEV_ERR_NOT_PRIMARY_INTERFACE, ""); + log_debug (p->dev, "size %u", frame_size); rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req); @@ -49,13 +55,17 @@ vnet_dev_port_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw, u32 flags) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hw->dev_instance); + vnet_dev_port_t *p = di->port; vnet_dev_rv_t rv; vnet_dev_port_cfg_change_req_t req = { .type = VNET_DEV_PORT_CFG_PROMISC_MODE, }; + if (!di->is_primary_if) + return ~0; + switch (flags) { case ETHERNET_INTERFACE_FLAG_DEFAULT_L3: @@ -87,13 +97,17 @@ vnet_dev_port_mac_change (vnet_hw_interface_t *hi, const u8 *old, const u8 *new) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hi->dev_instance); + vnet_dev_port_t *p = di->port; vnet_dev_rv_t rv; vnet_dev_port_cfg_change_req_t req = { .type = VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR, }; + if (!di->is_primary_if) + return vnet_dev_port_err (vm, p, VNET_DEV_ERR_NOT_PRIMARY_INTERFACE, ""); + vnet_dev_set_hw_addr_eth_mac (&req.addr, new); log_debug (p->dev, "new mac %U", format_vnet_dev_hw_addr, &req.addr); @@ -116,7 +130,8 @@ vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address, u8 is_add) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hi->dev_instance); + vnet_dev_port_t *p = di->port; vnet_dev_rv_t rv; vnet_dev_port_cfg_change_req_t req = { @@ -124,6 +139,9 @@ vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address, VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR, }; + if (!di->is_primary_if) + return vnet_dev_port_err (vm, p, VNET_DEV_ERR_NOT_PRIMARY_INTERFACE, ""); + vnet_dev_set_hw_addr_eth_mac (&req.addr, address); log_debug (p->dev, "received (addr %U is_add %u", format_vnet_dev_hw_addr, @@ -147,10 +165,19 @@ vnet_dev_flow_ops_fn (vnet_main_t *vnm, vnet_flow_dev_op_t op, u32 dev_instance, u32 flow_index, uword *private_data) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (dev_instance); + vnet_dev_port_t *p; vnet_dev_port_cfg_change_req_t req; vnet_dev_rv_t rv; + if (!di) + return VNET_FLOW_ERROR_NO_SUCH_INTERFACE; + + if (di->is_primary_if) + return VNET_FLOW_ERROR_NOT_SUPPORTED; + + p = di->port; + switch (op) { case VNET_FLOW_DEV_OP_ADD_FLOW: @@ -201,10 +228,12 @@ vnet_dev_interface_set_rss_queues (vnet_main_t *vnm, vnet_hw_interface_t *hi, void vnet_dev_clear_hw_interface_counters (u32 instance) { - vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (instance); vlib_main_t *vm = vlib_get_main (); - vnet_dev_process_call_port_op_no_rv (vm, port, vnet_dev_port_clear_counters); + if (di->is_primary_if) + vnet_dev_process_call_port_op_no_rv (vm, di->port, + vnet_dev_port_clear_counters); } void @@ -213,44 +242,49 @@ vnet_dev_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index, { vlib_main_t *vm = vlib_get_main (); vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - vnet_dev_port_t *port = - vnet_dev_get_port_from_dev_instance (hw->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hw->dev_instance); + vnet_dev_port_interface_t *intf; int runtime_update = 0; + if (di->is_primary_if) + intf = vnet_dev_port_get_primary_if (di->port); + else + intf = vnet_dev_port_get_sec_if_by_index (di->port, di->sec_if_index); + if (node_index == ~0) { - port->intf.redirect_to_node_next_index = 0; - if (port->intf.feature_arc == 0) + intf->redirect_to_node_next_index = 0; + if (intf->feature_arc == 0) { - port->intf.rx_next_index = - vnet_dev_default_next_index_by_port_type[port->attr.type]; + intf->rx_next_index = + vnet_dev_default_next_index_by_port_type[di->port->attr.type]; runtime_update = 1; } - port->intf.redirect_to_node = 0; + intf->redirect_to_node = 0; } else { u16 next_index = vlib_node_add_next (vlib_get_main (), port_rx_eth_node.index, node_index); - port->intf.redirect_to_node_next_index = next_index; - if (port->intf.feature_arc == 0) + intf->redirect_to_node_next_index = next_index; + if (intf->feature_arc == 0) { - port->intf.rx_next_index = next_index; + intf->rx_next_index = next_index; runtime_update = 1; } - port->intf.redirect_to_node = 1; + intf->redirect_to_node = 1; } - port->intf.rx_next_index = + intf->rx_next_index = node_index == ~0 ? - vnet_dev_default_next_index_by_port_type[port->attr.type] : - node_index; + vnet_dev_default_next_index_by_port_type[di->port->attr.type] : + node_index; if (runtime_update) { - foreach_vnet_dev_port_rx_queue (rxq, port) + foreach_vnet_dev_port_rx_queue (rxq, di->port) vnet_dev_rx_queue_rt_request ( vm, rxq, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 }); - log_debug (port->dev, "runtime update requested due to chgange in " - "reditect-to-next configuration"); + log_debug (di->port->dev, "runtime update requested due to chgange in " + "reditect-to-next configuration"); } } diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c index df7805c1ff2..fccedebdcf4 100644 --- a/src/vnet/dev/port.c +++ b/src/vnet/dev/port.c @@ -94,6 +94,7 @@ vnet_dev_port_free (vlib_main_t *vm, vnet_dev_port_t *port) pool_free (port->rx_queues); pool_free (port->tx_queues); vnet_dev_arg_free (&port->args); + vnet_dev_arg_free (&port->sec_if_args); pool_put_index (dev->ports, port->index); clib_mem_free (port); } @@ -109,11 +110,23 @@ vnet_dev_port_update_tx_node_runtime (vlib_main_t *vm, vnet_dev_port_t *port) clib_bitmap_foreach (ti, q->assigned_threads) { vlib_main_t *tvm = vlib_get_main_by_index (ti); - vlib_node_runtime_t *nr = - vlib_node_get_runtime (tvm, port->intf.tx_node_index); - vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (nr); - tnr->hw_if_index = port->intf.hw_if_index; + vlib_node_runtime_t *nr; + vnet_dev_tx_node_runtime_t *tnr; + vnet_dev_port_interfaces_t *ifs = port->interfaces; + + nr = + vlib_node_get_runtime (tvm, ifs->primary_interface.tx_node_index); + tnr = vnet_dev_get_tx_node_runtime (nr); + tnr->hw_if_index = ifs->primary_interface.hw_if_index; tnr->tx_queue = q; + + pool_foreach_pointer (sif, port->interfaces->secondary_interfaces) + { + nr = vlib_node_get_runtime (tvm, sif->tx_node_index); + tnr = vnet_dev_get_tx_node_runtime (nr); + tnr->hw_if_index = sif->hw_if_index; + tnr->tx_queue = q; + } } } } @@ -271,6 +284,11 @@ vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id, for (vnet_dev_arg_t *a = args->port.args; a->type != VNET_DEV_ARG_END; a++) vec_add1 (port->args, *a); + if (args->port.sec_if_args) + for (vnet_dev_arg_t *a = args->port.sec_if_args; + a->type != VNET_DEV_ARG_END; a++) + vec_add1 (port->sec_if_args, *a); + /* defaults out of port attributes */ port->max_rx_frame_size = args->port.attr.max_supported_rx_frame_size; port->primary_hw_addr = args->port.attr.hw_addr; @@ -371,7 +389,7 @@ vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, { if (req->all_queues == 0) { - rxq = vnet_dev_port_get_rx_queue_by_id (port, req->queue_id); + rxq = vnet_dev_get_port_rx_queue_by_id (port, req->queue_id); if (rxq == 0) return VNET_DEV_ERR_BUG; } @@ -466,25 +484,34 @@ vnet_dev_port_state_change (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_state_changes_t changes) { vnet_main_t *vnm = vnet_get_main (); + vnet_dev_port_interfaces_t *ifs = port->interfaces; vnet_dev_port_validate (vm, port); if (changes.change.link_speed) { port->speed = changes.link_speed; - if (port->interface_created) - vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index, - changes.link_speed); + if (port->interfaces) + vnet_hw_interface_set_link_speed ( + vnm, ifs->primary_interface.hw_if_index, changes.link_speed); log_debug (port->dev, "port speed changed to %u", changes.link_speed); } if (changes.change.link_state) { port->link_up = changes.link_state; - if (port->interface_created) - vnet_hw_interface_set_flags ( - vnm, port->intf.hw_if_index, - changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + if (ifs) + { + vnet_hw_interface_set_flags ( + vnm, ifs->primary_interface.hw_if_index, + changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + pool_foreach_pointer (sif, ifs->secondary_interfaces) + { + vnet_hw_interface_set_flags ( + vnm, sif->hw_if_index, + changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + } log_debug (port->dev, "port link state changed to %s", changes.link_state ? "up" : "down"); } @@ -510,18 +537,51 @@ vnet_dev_port_free_counters (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_counters_free (vm, port->counter_main); } +static void +vnet_dev_port_init_if_rt_data (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_rx_queue_if_rt_data_t *rtd, + u32 sw_if_index) +{ + vnet_dev_t *dev = port->dev; + u8 buffer_pool_index = + vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node); + vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); + + rtd->buffer_template = bp->buffer_template; + vnet_buffer (&rtd->buffer_template)->sw_if_index[VLIB_RX] = sw_if_index; + vnet_buffer (&rtd->buffer_template)->sw_if_index[VLIB_TX] = ~0; + rtd->next_index = ~0; + rtd->sw_if_index = sw_if_index; +} + vnet_dev_rv_t -vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) +vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port, void *ptr) { vnet_main_t *vnm = vnet_get_main (); u16 n_threads = vlib_get_n_threads (); vnet_dev_main_t *dm = &vnet_dev_main; vnet_dev_t *dev = port->dev; - vnet_dev_port_t **pp; + vnet_dev_port_if_create_args_t *a = ptr; + vnet_dev_port_interfaces_t *ifs = port->interfaces; + vnet_dev_instance_t *di; vnet_dev_rv_t rv; u16 ti = 0; - if (port->intf.name[0] == 0) + if (ifs) + return VNET_DEV_ERR_ALREADY_EXISTS; + + port->interfaces = ifs = + clib_mem_alloc (sizeof (vnet_dev_port_interfaces_t)); + + *(ifs) = (vnet_dev_port_interfaces_t){ + .num_rx_queues = a->num_rx_queues, + .num_tx_queues = a->num_tx_queues, + .rxq_sz = a->rxq_sz, + .txq_sz = a->txq_sz, + .default_is_intr_mode = a->default_is_intr_mode, + }; + + if (a->name[0] == 0) { u8 *s; s = format (0, "%s%u/%u", @@ -529,44 +589,47 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) port->dev->index, port->index); u32 n = vec_len (s); - if (n >= sizeof (port->intf.name)) + if (n >= sizeof (a->name)) { vec_free (s); return VNET_DEV_ERR_BUG; } - clib_memcpy (port->intf.name, s, n); - port->intf.name[n] = 0; + clib_memcpy (ifs->primary_interface.name, s, n); + ifs->primary_interface.name[n] = 0; vec_free (s); } + else + clib_memcpy (ifs->primary_interface.name, a->name, + sizeof (ifs->primary_interface.name)); log_debug ( dev, "allocating %u rx queues with size %u and %u tx queues with size %u", - port->intf.num_rx_queues, port->intf.rxq_sz, port->intf.num_tx_queues, - port->intf.txq_sz); + a->num_rx_queues, a->rxq_sz, a->num_tx_queues, a->txq_sz); - for (int i = 0; i < port->intf.num_rx_queues; i++) - if ((rv = vnet_dev_rx_queue_alloc (vm, port, port->intf.rxq_sz)) != - VNET_DEV_OK) + for (int i = 0; i < ifs->num_rx_queues; i++) + if ((rv = vnet_dev_rx_queue_alloc (vm, port, ifs->rxq_sz)) != VNET_DEV_OK) goto error; - for (u32 i = 0; i < port->intf.num_tx_queues; i++) - if ((rv = vnet_dev_tx_queue_alloc (vm, port, port->intf.txq_sz)) != - VNET_DEV_OK) + for (u32 i = 0; i < ifs->num_tx_queues; i++) + if ((rv = vnet_dev_tx_queue_alloc (vm, port, ifs->txq_sz)) != VNET_DEV_OK) goto error; foreach_vnet_dev_port_tx_queue (q, port) { - q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1); + /* if consistent_qp is enabled, we start by assigning queues to workers + * and we end with main */ + u16 real_ti = (ti + a->consistent_qp) % n_threads; + q->assigned_threads = clib_bitmap_set (q->assigned_threads, real_ti, 1); log_debug (dev, "port %u tx queue %u assigned to thread %u", - port->port_id, q->queue_id, ti); + port->port_id, q->queue_id, real_ti); if (++ti >= n_threads) break; } - /* pool of port pointers helps us to assign unique dev_instance */ - pool_get (dm->ports_by_dev_instance, pp); - port->intf.dev_instance = pp - dm->ports_by_dev_instance; - pp[0] = port; + pool_get (dm->dev_instances, di); + ifs->primary_interface.dev_instance = di - dm->dev_instances; + di->port = port; + di->is_primary_if = 1; if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET) { @@ -575,7 +638,7 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) vnet_sw_interface_t *sw; vnet_hw_interface_t *hw; vnet_hw_if_caps_t caps = 0; - u32 rx_node_index; + u32 rx_node_index, hw_if_index, sw_if_index; driver = pool_elt_at_index (dm->drivers, dev->driver_index); @@ -587,27 +650,28 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) dev_class->tx_function_n_errors = port->tx_node.n_error_counters; /* create new interface including tx and output nodes */ - port->intf.hw_if_index = vnet_eth_register_interface ( + hw_if_index = vnet_eth_register_interface ( vnm, &(vnet_eth_interface_registration_t){ .address = port->primary_hw_addr.eth_mac, .max_frame_size = port->max_rx_frame_size, .dev_class_index = driver->dev_class_index, - .dev_instance = port->intf.dev_instance, + .dev_instance = ifs->primary_interface.dev_instance, .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size, .cb.flag_change = vnet_dev_port_eth_flag_change, }); + ifs->primary_interface.hw_if_index = hw_if_index; - sw = vnet_get_hw_sw_interface (vnm, port->intf.hw_if_index); - hw = vnet_get_hw_interface (vnm, port->intf.hw_if_index); - port->intf.sw_if_index = sw->sw_if_index; + sw = vnet_get_hw_sw_interface (vnm, hw_if_index); + hw = vnet_get_hw_interface (vnm, hw_if_index); + sw_if_index = ifs->primary_interface.sw_if_index = sw->sw_if_index; vnet_hw_interface_set_flags ( - vnm, port->intf.hw_if_index, + vnm, ifs->primary_interface.hw_if_index, port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); if (port->speed) - vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index, - port->speed); + vnet_hw_interface_set_link_speed ( + vnm, ifs->primary_interface.hw_if_index, port->speed); - port->intf.tx_node_index = hw->tx_node_index; + ifs->primary_interface.tx_node_index = hw->tx_node_index; caps |= port->attr.caps.interrupt_mode ? VNET_HW_IF_CAP_INT_MODE : 0; caps |= port->attr.caps.mac_filter ? VNET_HW_IF_CAP_MAC_FILTER : 0; @@ -615,14 +679,15 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) caps |= port->attr.tx_offloads.ip4_cksum ? VNET_HW_IF_CAP_TX_CKSUM : 0; if (caps) - vnet_hw_if_set_caps (vnm, port->intf.hw_if_index, caps); + vnet_hw_if_set_caps (vnm, hw_if_index, caps); /* create / reuse rx node */ if (vec_len (dm->free_rx_node_indices)) { vlib_node_t *n; rx_node_index = vec_pop (dm->free_rx_node_indices); - vlib_node_rename (vm, rx_node_index, "%s-rx", port->intf.name); + vlib_node_rename (vm, rx_node_index, "%s-rx", + port->interfaces->primary_interface.name); n = vlib_get_node (vm, rx_node_index); n->function = vlib_node_get_preferred_node_fn_variant ( vm, port->rx_node.registrations); @@ -646,30 +711,28 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) .error_counters = port->rx_node.error_counters, .n_errors = port->rx_node.n_error_counters, }; - rx_node_index = - vlib_register_node (vm, &rx_node_reg, "%s-rx", port->intf.name); + rx_node_index = vlib_register_node (vm, &rx_node_reg, "%s-rx", + ifs->primary_interface.name); } port->rx_node_assigned = 1; - port->intf.rx_node_index = rx_node_index; - port->intf.rx_next_index = + ifs->rx_node_index = rx_node_index; + ifs->primary_interface.rx_next_index = vnet_dev_default_next_index_by_port_type[port->attr.type]; vlib_worker_thread_node_runtime_update (); log_debug (dev, "ethernet interface created, hw_if_index %u sw_if_index %u " "rx_node_index %u tx_node_index %u", - port->intf.hw_if_index, port->intf.sw_if_index, - port->intf.rx_node_index, port->intf.tx_node_index); + hw_if_index, sw_if_index, rx_node_index, + ifs->primary_interface.tx_node_index); } - port->interface_created = 1; foreach_vnet_dev_port_rx_queue (q, port) { - vnet_buffer (&q->buffer_template)->sw_if_index[VLIB_RX] = - port->intf.sw_if_index; + vnet_dev_port_init_if_rt_data (vm, port, &q->if_rt_data, + ifs->primary_interface.sw_if_index); /* poison to catch node not calling runtime update function */ - q->next_index = ~0; - q->interrupt_mode = port->intf.default_is_intr_mode; + q->interrupt_mode = ifs->default_is_intr_mode; vnet_dev_rx_queue_rt_request ( vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 }); } @@ -682,6 +745,8 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) error: if (rv != VNET_DEV_OK) vnet_dev_port_if_remove (vm, port); + else + a->sw_if_index = ifs->primary_interface.sw_if_index; return rv; } @@ -690,6 +755,7 @@ vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port) { vnet_dev_main_t *dm = &vnet_dev_main; vnet_main_t *vnm = vnet_get_main (); + vnet_dev_port_interfaces_t *ifs = port->interfaces; vnet_dev_port_validate (vm, port); @@ -698,23 +764,22 @@ vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port) if (port->rx_node_assigned) { - vlib_node_rename (vm, port->intf.rx_node_index, "deleted-%u", - port->intf.rx_node_index); - vec_add1 (dm->free_rx_node_indices, port->intf.rx_node_index); + vlib_node_rename (vm, ifs->rx_node_index, "deleted-%u", + ifs->rx_node_index); + vec_add1 (dm->free_rx_node_indices, ifs->rx_node_index); port->rx_node_assigned = 0; } - if (port->interface_created) + if (ifs) { vlib_worker_thread_barrier_sync (vm); - vnet_delete_hw_interface (vnm, port->intf.hw_if_index); + vnet_delete_hw_interface (vnm, ifs->primary_interface.hw_if_index); vlib_worker_thread_barrier_release (vm); - pool_put_index (dm->ports_by_dev_instance, port->intf.dev_instance); - port->interface_created = 0; + pool_put_index (dm->dev_instances, ifs->primary_interface.dev_instance); + clib_mem_free (port->interfaces); + port->interfaces = 0; } - port->intf = (typeof (port->intf)){}; - if (port->port_ops.deinit) port->port_ops.deinit (vm, port); @@ -731,6 +796,171 @@ vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port) return VNET_DEV_OK; } + +vnet_dev_rv_t +vnet_dev_port_del_sec_if_internal (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_interface_t *sif) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + + if (sif && port->port_ops.add_sec_if) + rv = port->port_ops.add_sec_if (vm, port, sif); + + if (rv != VNET_DEV_OK) + return rv; + + foreach_vnet_dev_port_rx_queue (q, port) + { + vec_foreach_pointer (p, q->sec_if_rt_data) + if (p) + clib_mem_free (p); + vec_free (q->sec_if_rt_data); + } + + if (sif->interface_created) + ethernet_delete_interface (vnet_get_main (), sif->hw_if_index); + + pool_put_index (port->interfaces->secondary_interfaces, sif->index); + vnet_dev_arg_free (&sif->args); + clib_mem_free (sif); + return rv; +} + +vnet_dev_rv_t +vnet_dev_port_add_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *ptr) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_port_sec_if_create_args_t *a = ptr; + vnet_main_t *vnm = vnet_get_main (); + vnet_dev_t *dev = port->dev; + vnet_dev_port_interface_t *sif = 0; + vnet_dev_port_interface_t **sip; + vnet_dev_rv_t rv = VNET_DEV_OK; + + sif = clib_mem_alloc (sizeof (vnet_dev_port_interface_t)); + pool_get (port->interfaces->secondary_interfaces, sip); + *sip = sif; + + *sif = (vnet_dev_port_interface_t){ + .index = sip - port->interfaces->secondary_interfaces, + .args = vec_dup (port->sec_if_args), + }; + + clib_memcpy (sif->name, a->name, sizeof (sif->name)); + + if (sif->args) + { + rv = vnet_dev_arg_parse (vm, dev, sif->args, a->args); + if (rv != VNET_DEV_OK) + return rv; + } + + if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET) + { + vnet_device_class_t *dev_class; + vnet_dev_driver_t *driver; + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hw; + vnet_dev_instance_t *di; + vnet_hw_if_caps_t caps = 0; + + pool_get (dm->dev_instances, di); + sif->dev_instance = di - dm->dev_instances; + di->port = port; + di->sec_if_index = sip - port->interfaces->secondary_interfaces; + + driver = pool_elt_at_index (dm->drivers, dev->driver_index); + + /* hack to provide per-port tx node function */ + dev_class = vnet_get_device_class (vnm, driver->dev_class_index); + dev_class->tx_fn_registrations = port->tx_node.registrations; + dev_class->format_tx_trace = port->tx_node.format_trace; + dev_class->tx_function_error_counters = port->tx_node.error_counters; + dev_class->tx_function_n_errors = port->tx_node.n_error_counters; + + /* create new interface including tx and output nodes */ + sif->hw_if_index = vnet_eth_register_interface ( + vnm, &(vnet_eth_interface_registration_t){ + .address = port->primary_hw_addr.eth_mac, + .max_frame_size = port->max_rx_frame_size, + .dev_class_index = driver->dev_class_index, + .dev_instance = sif->dev_instance, + .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size, + .cb.flag_change = vnet_dev_port_eth_flag_change, + }); + + sw = vnet_get_hw_sw_interface (vnm, sif->hw_if_index); + hw = vnet_get_hw_interface (vnm, sif->hw_if_index); + sif->sw_if_index = sw->sw_if_index; + sif->next_index = + vnet_dev_default_next_index_by_port_type[port->attr.type]; + sif->interface_created = 1; + vnet_dev_port_update_tx_node_runtime (vm, port); + vnet_hw_interface_set_flags ( + vnm, sif->hw_if_index, + port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + if (port->speed) + vnet_hw_interface_set_link_speed (vnm, sif->hw_if_index, port->speed); + + sif->tx_node_index = hw->tx_node_index; + + caps |= port->attr.caps.interrupt_mode ? VNET_HW_IF_CAP_INT_MODE : 0; + caps |= port->attr.caps.mac_filter ? VNET_HW_IF_CAP_MAC_FILTER : 0; + caps |= port->attr.tx_offloads.tcp_gso ? VNET_HW_IF_CAP_TCP_GSO : 0; + caps |= port->attr.tx_offloads.ip4_cksum ? VNET_HW_IF_CAP_TX_CKSUM : 0; + + if (caps) + vnet_hw_if_set_caps (vnm, sif->hw_if_index, caps); + } + else + return VNET_DEV_ERR_NOT_SUPPORTED; + + foreach_vnet_dev_port_rx_queue (q, port) + { + vnet_dev_rx_queue_if_rt_data_t *rtd; + vec_validate (q->sec_if_rt_data, sif->index); + + rtd = clib_mem_alloc_aligned (sizeof (vnet_dev_rx_queue_if_rt_data_t), + CLIB_CACHE_LINE_BYTES); + + q->sec_if_rt_data[sif->index] = rtd; + + vnet_dev_port_init_if_rt_data (vm, port, rtd, sif->sw_if_index); + vnet_dev_rx_queue_rt_request ( + vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 }); + } + + if (sif && port->port_ops.add_sec_if) + rv = port->port_ops.add_sec_if (vm, port, sif); + + if (rv != VNET_DEV_OK) + vnet_dev_port_del_sec_if_internal (vm, port, sif); + + return rv; +} + +vnet_dev_rv_t +vnet_dev_port_del_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *ptr) +{ + vnet_dev_port_del_sec_if_args_t *a = ptr; + vnet_sw_interface_t *si; + vnet_hw_interface_t *hi; + vnet_dev_instance_t *di; + vnet_main_t *vnm = vnet_get_main (); + + log_debug (port->dev, "%u", a->sw_if_index); + + si = vnet_get_sw_interface_or_null (vnm, a->sw_if_index); + if (!si) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + hi = vnet_get_hw_interface (vnm, si->hw_if_index); + di = vnet_dev_get_dev_instance (hi->dev_instance); + + return vnet_dev_port_del_sec_if_internal ( + vm, port, vnet_dev_port_get_sec_if_by_index (port, di->sec_if_index)); +} + void vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) { diff --git a/src/vnet/dev/process.c b/src/vnet/dev/process.c index 3d3b49c01b6..4df56296444 100644 --- a/src/vnet/dev/process.c +++ b/src/vnet/dev/process.c @@ -20,9 +20,11 @@ typedef enum VNET_DEV_EVENT_PROCESS_QUIT, VNET_DEV_EVENT_CALL_OP, VNET_DEV_EVENT_CALL_OP_NO_RV, + VNET_DEV_EVENT_CALL_OP_WITH_PTR, VNET_DEV_EVENT_CALL_OP_NO_WAIT, VNET_DEV_EVENT_CALL_PORT_OP, VNET_DEV_EVENT_CALL_PORT_OP_NO_RV, + VNET_DEV_EVENT_CALL_PORT_OP_WITH_PTR, VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT, VNET_DEV_EVENT_CLOCK = ~0 } __clib_packed vnet_dev_event_t; @@ -51,6 +53,11 @@ typedef struct } call_op_no_rv; struct { + vnet_dev_op_with_ptr_t *op; + void *ptr; + } call_op_with_ptr; + struct + { vnet_dev_op_no_rv_t *op; } call_op_no_wait; struct @@ -65,6 +72,12 @@ typedef struct } call_port_op_no_rv; struct { + vnet_dev_port_op_with_ptr_t *op; + vnet_dev_port_t *port; + void *ptr; + } call_port_op_with_ptr; + struct + { vnet_dev_port_op_no_rv_t *op; vnet_dev_port_t *port; } call_port_op_no_wait; @@ -132,6 +145,10 @@ vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev, ev_log_debug (vm, dev, ed, "call op no rv"); ed->call_op_no_rv.op (vm, dev); break; + case VNET_DEV_EVENT_CALL_OP_WITH_PTR: + ev_log_debug (vm, dev, ed, "call op woth ptr"); + rv = ed->call_op_with_ptr.op (vm, dev, ed->call_op_with_ptr.ptr); + break; case VNET_DEV_EVENT_CALL_OP_NO_WAIT: ev_log_debug (vm, dev, ed, "call op no wait"); ed->call_op_no_wait.op (vm, dev); @@ -144,6 +161,11 @@ vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev, ev_log_debug (vm, dev, ed, "call port op no rv"); ed->call_port_op_no_rv.op (vm, ed->call_port_op_no_rv.port); break; + case VNET_DEV_EVENT_CALL_PORT_OP_WITH_PTR: + ev_log_debug (vm, dev, ed, "call port op woth ptr"); + rv = ed->call_port_op_with_ptr.op (vm, ed->call_port_op_with_ptr.port, + ed->call_port_op_with_ptr.ptr); + break; case VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT: ev_log_debug (vm, dev, ed, "call port op no wait"); ed->call_port_op_no_wait.op (vm, ed->call_port_op_no_wait.port); @@ -477,6 +499,19 @@ vnet_dev_process_call_op_no_rv (vlib_main_t *vm, vnet_dev_t *dev, return vnet_dev_process_event_send_and_wait (vm, dev, ed); } +vnet_dev_rv_t +vnet_dev_process_call_op_with_ptr (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_op_with_ptr_t *op, void *p) +{ + vnet_dev_event_data_t *ed = vnet_dev_event_data_alloc (vm, dev); + *ed = (vnet_dev_event_data_t){ + .event = VNET_DEV_EVENT_CALL_OP_WITH_PTR, + .call_op_with_ptr = { .op = op, .ptr = p }, + }; + + return vnet_dev_process_event_send_and_wait (vm, dev, ed); +} + void vnet_dev_process_call_op_no_wait (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_op_no_rv_t *op) @@ -516,6 +551,20 @@ vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *port, return vnet_dev_process_event_send_and_wait (vm, port->dev, ed); } +vnet_dev_rv_t +vnet_dev_process_call_port_op_with_ptr (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_op_with_ptr_t *op, + void *p) +{ + vnet_dev_event_data_t *ed = vnet_dev_event_data_alloc (vm, port->dev); + *ed = (vnet_dev_event_data_t){ + .event = VNET_DEV_EVENT_CALL_PORT_OP_WITH_PTR, + .call_port_op_with_ptr = { .op = op, .port = port, .ptr = p }, + }; + + return vnet_dev_process_event_send_and_wait (vm, port->dev, ed); +} + void vnet_dev_process_call_port_op_no_wait (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_op_no_rv_t *op) diff --git a/src/vnet/dev/queue.c b/src/vnet/dev/queue.c index 9a016a626fb..57ed3dcae3b 100644 --- a/src/vnet/dev/queue.c +++ b/src/vnet/dev/queue.c @@ -36,7 +36,6 @@ vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_t *dev = port->dev; vnet_dev_rv_t rv = VNET_DEV_OK; u16 n_threads = vlib_get_n_threads (); - u8 buffer_pool_index; vnet_dev_port_validate (vm, port); @@ -65,15 +64,6 @@ vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port, dm->next_rx_queue_thread = 1; } - buffer_pool_index = - vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node); - vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); - - rxq->buffer_template = bp->buffer_template; - vnet_buffer (&rxq->buffer_template)->sw_if_index[VLIB_TX] = ~0; - - rxq->next_index = vnet_dev_default_next_index_by_port_type[port->attr.type]; - if (port->rx_queue_ops.alloc) rv = port->rx_queue_ops.alloc (vm, rxq); @@ -107,7 +97,7 @@ vnet_dev_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) { if (rxq->port->rx_queue_ops.stop) rxq->port->rx_queue_ops.stop (vm, rxq); - vlib_node_set_state (vm, rxq->port->intf.rx_node_index, + vlib_node_set_state (vm, vnet_dev_get_port_rx_node_index (rxq->port), VLIB_NODE_STATE_DISABLED); rxq->started = 0; } diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c index 79c55cfbd53..944c3ef32fa 100644 --- a/src/vnet/dev/runtime.c +++ b/src/vnet/dev/runtime.c @@ -23,7 +23,7 @@ _vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op) vnet_dev_rx_queue_t *previous = 0, *first = 0; vnet_dev_rx_node_runtime_t *rtd; vlib_node_state_t state = VLIB_NODE_STATE_DISABLED; - u32 node_index = port->intf.rx_node_index; + u32 node_index = vnet_dev_get_port_rx_node_index (port); rtd = vlib_node_get_runtime_data (vm, node_index); @@ -47,7 +47,7 @@ _vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op) } rtd->first_rx_queue = first; - vlib_node_set_state (vm, port->intf.rx_node_index, state); + vlib_node_set_state (vm, node_index, state); __atomic_store_n (&op->completed, 1, __ATOMIC_RELEASE); } diff --git a/src/vnet/dev/types.h b/src/vnet/dev/types.h index 006d18e5bc5..24799ac8138 100644 --- a/src/vnet/dev/types.h +++ b/src/vnet/dev/types.h @@ -50,7 +50,8 @@ typedef union /* do not change bit assignments - API dependency */ #define foreach_vnet_dev_port_flag \ - _ (0, INTERRUPT_MODE, "enable interrupt mode") + _ (0, INTERRUPT_MODE, "enable interrupt mode") \ + _ (1, CONSISTENT_QP, "consistent queue pairs") typedef union { diff --git a/src/vnet/devices/tap/cli.c b/src/vnet/devices/tap/cli.c index 5c676d32d60..5738ef237b6 100644 --- a/src/vnet/devices/tap/cli.c +++ b/src/vnet/devices/tap/cli.c @@ -105,6 +105,8 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.tap_flags |= TAP_FLAG_PACKED; else if (unformat (line_input, "in-order")) args.tap_flags |= TAP_FLAG_IN_ORDER; + else if (unformat (line_input, "consistent-qp")) + args.tap_flags |= TAP_FLAG_CONSISTENT_QP; else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, args.mac_addr.bytes)) args.mac_addr_set = 1; diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c index b0b0a3af13f..bb91200a525 100644 --- a/src/vnet/devices/tap/tap.c +++ b/src/vnet/devices/tap/tap.c @@ -212,6 +212,9 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) } } + if (args->tap_flags & TAP_FLAG_CONSISTENT_QP) + vif->consistent_qp = 1; + /* if namespace is specified, all further netlink messages should be executed * after we change our net namespace */ if (args->host_namespace) diff --git a/src/vnet/devices/tap/tap.h b/src/vnet/devices/tap/tap.h index 1df2fb7e1ad..66f5576c5be 100644 --- a/src/vnet/devices/tap/tap.h +++ b/src/vnet/devices/tap/tap.h @@ -22,15 +22,16 @@ #define MIN(x,y) (((x)<(y))?(x):(y)) #endif -#define foreach_tapv2_flags \ - _ (GSO, 0) \ - _ (CSUM_OFFLOAD, 1) \ - _ (PERSIST, 2) \ - _ (ATTACH, 3) \ - _ (TUN, 4) \ - _ (GRO_COALESCE, 5) \ - _ (PACKED, 6) \ - _ (IN_ORDER, 7) +#define foreach_tapv2_flags \ + _ (GSO, 0) \ + _ (CSUM_OFFLOAD, 1) \ + _ (PERSIST, 2) \ + _ (ATTACH, 3) \ + _ (TUN, 4) \ + _ (GRO_COALESCE, 5) \ + _ (PACKED, 6) \ + _ (IN_ORDER, 7) \ + _ (CONSISTENT_QP, 8) typedef enum { diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c index c4364600722..34c74ac91ac 100644 --- a/src/vnet/devices/virtio/cli.c +++ b/src/vnet/devices/virtio/cli.c @@ -64,6 +64,8 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.bind = VIRTIO_BIND_DEFAULT; else if (unformat (line_input, "rss-enabled")) args.rss_enabled = 1; + else if (unformat (line_input, "consistent-qp")) + args.virtio_flags |= VIRTIO_FLAG_CONSISTENT_QP; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c index 140cdb94153..9562bdc6369 100644 --- a/src/vnet/devices/virtio/pci.c +++ b/src/vnet/devices/virtio/pci.c @@ -1418,9 +1418,10 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) if (args->virtio_flags & VIRTIO_FLAG_PACKED) vif->is_packed = 1; - if ((error = - vlib_pci_device_open (vm, (vlib_pci_addr_t *) & vif->pci_addr, - virtio_pci_device_ids, &h))) + if (args->virtio_flags & VIRTIO_FLAG_CONSISTENT_QP) + vif->consistent_qp = 1; + if ((error = vlib_pci_device_open (vm, (vlib_pci_addr_t *) &vif->pci_addr, + virtio_pci_device_ids, &h))) { args->rv = VNET_API_ERROR_INVALID_INTERFACE; args->error = diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h index 745ad6fce87..59778533316 100644 --- a/src/vnet/devices/virtio/pci.h +++ b/src/vnet/devices/virtio/pci.h @@ -283,7 +283,8 @@ typedef struct _virtio_pci_func _ (PACKED, 3) \ _ (IN_ORDER, 4) \ _ (BUFFERING, 5) \ - _ (RSS, 6) + _ (RSS, 6) \ + _ (CONSISTENT_QP, 7) typedef enum { diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c index 840936a43ff..682ec32ceff 100644 --- a/src/vnet/devices/virtio/virtio.c +++ b/src/vnet/devices/virtio/virtio.c @@ -294,6 +294,8 @@ virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif) { vnet_main_t *vnm = vnet_get_main (); vnet_virtio_vring_t *vring; + uword n_threads = vlib_get_n_threads (); + u8 consistent = vif->consistent_qp; vec_foreach (vring, vif->txq_vrings) { @@ -308,10 +310,11 @@ virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif) return; } - for (u32 j = 0; j < vlib_get_n_threads (); j++) + for (u32 j = 0; j < n_threads; j++) { u32 qi = vif->txq_vrings[j % vif->num_txqs].queue_index; - vnet_hw_if_tx_queue_assign_thread (vnm, qi, j); + vnet_hw_if_tx_queue_assign_thread (vnm, qi, + (j + consistent) % n_threads); } vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index); diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h index a8e258884a4..fb72051ce45 100644 --- a/src/vnet/devices/virtio/virtio.h +++ b/src/vnet/devices/virtio/virtio.h @@ -213,6 +213,7 @@ typedef struct }; const virtio_pci_func_t *virtio_pci_func; int is_packed; + u8 consistent_qp : 1; } virtio_if_t; typedef struct diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h index 57c2b6ff78b..d356fd5411c 100644 --- a/src/vnet/ip/ip4_to_ip6.h +++ b/src/vnet/ip/ip4_to_ip6.h @@ -46,10 +46,9 @@ static u8 icmp_to_icmp6_updater_pointer_table[] = * @returns Port number on success, 0 otherwise. */ always_inline u16 -ip4_get_port (ip4_header_t * ip, u8 sender) +ip4_get_port (ip4_header_t *ip, u8 sender) { - if (ip->ip_version_and_header_length != 0x45 || - ip4_get_fragment_offset (ip)) + if (ip->ip_version_and_header_length != 0x45 || ip4_get_fragment_offset (ip)) return 0; if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || @@ -65,7 +64,15 @@ ip4_get_port (ip4_header_t * ip, u8 sender) { return *((u16 *) (icmp + 1)); } - else if (clib_net_to_host_u16 (ip->length) >= 64) + /* + * Minimum length here consists of: + * - outer IP header length + * - outer ICMP header length (2*sizeof (icmp46_header_t)) + * - inner IP header length + * - first 8 bytes of payload of original packet in case of ICMP error + */ + else if (clib_net_to_host_u16 (ip->length) >= + 2 * sizeof (ip4_header_t) + 2 * sizeof (icmp46_header_t) + 8) { ip = (ip4_header_t *) (icmp + 2); if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h index 29d5718d4da..ebabcd0b797 100644 --- a/src/vnet/ip/ip6_to_ip4.h +++ b/src/vnet/ip/ip6_to_ip4.h @@ -96,10 +96,10 @@ ip6_parse (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, u32 buff_len, * @returns 1 on success, 0 otherwise. */ always_inline u16 -ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6, - u16 buffer_len, u8 * ip_protocol, u16 * src_port, - u16 * dst_port, u8 * icmp_type_or_tcp_flags, - u32 * tcp_ack_number, u32 * tcp_seq_number) +ip6_get_port (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, + u16 buffer_len, u8 *ip_protocol, u16 *src_port, u16 *dst_port, + u8 *icmp_type_or_tcp_flags, u32 *tcp_ack_number, + u32 *tcp_seq_number, void **l4_hdr) { u8 l4_protocol; u16 l4_offset; @@ -120,8 +120,19 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6, *ip_protocol = l4_protocol; } l4 = u8_ptr_add (ip6, l4_offset); + if (l4_hdr) + *l4_hdr = l4; if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP) { + if ((IP_PROTOCOL_UDP == l4_protocol && + u8_ptr_add (l4, sizeof (udp_header_t)) > + u8_ptr_add (vlib_buffer_get_current (b), b->current_length)) || + (IP_PROTOCOL_TCP == l4_protocol && + u8_ptr_add (l4, sizeof (tcp_header_t)) > + u8_ptr_add (vlib_buffer_get_current (b), b->current_length))) + { + return 0; + } if (src_port) *src_port = ((udp_header_t *) (l4))->src_port; if (dst_port) @@ -135,6 +146,11 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6, } else if (l4_protocol == IP_PROTOCOL_ICMP6) { + if (u8_ptr_add (l4, sizeof (icmp46_header_t)) > + u8_ptr_add (vlib_buffer_get_current (b), b->current_length)) + { + return 0; + } icmp46_header_t *icmp = (icmp46_header_t *) (l4); if (icmp_type_or_tcp_flags) *icmp_type_or_tcp_flags = ((icmp46_header_t *) (l4))->type; diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 5ced88fec2e..1f025fa1113 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -1924,7 +1924,7 @@ vl_api_ip_local_reass_get_t_handler (vl_api_ip_local_reass_get_t *mp) { vl_api_ip_local_reass_get_reply_t *rmp; int rv = 0; - REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET, { + REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET_REPLY, { rmp->ip4_is_enabled = ip4_local_full_reass_enabled (); rmp->ip6_is_enabled = ip6_local_full_reass_enabled (); }); diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c index 7c3c2fff217..50b4b22eb60 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.c +++ b/src/vnet/ip/reass/ip4_sv_reass.c @@ -28,12 +28,13 @@ #include <vppinfra/bihash_16_8.h> #include <vnet/ip/reass/ip4_sv_reass.h> -#define MSEC_PER_SEC 1000 +#define MSEC_PER_SEC 1000 #define IP4_SV_REASS_TIMEOUT_DEFAULT_MS 100 -#define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default -#define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024 +#define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS \ + 10000 // 10 seconds default +#define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024 #define IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 -#define IP4_SV_REASS_HT_LOAD_FACTOR (0.75) +#define IP4_SV_REASS_HT_LOAD_FACTOR (0.75) typedef enum { @@ -94,17 +95,23 @@ typedef struct // buffer indexes of buffers in this reassembly in chronological order - // including overlaps and duplicate fragments u32 *cached_buffers; - // set to true when this reassembly is completed - bool is_complete; - // ip protocol + + bool first_fragment_seen; + bool last_fragment_seen; + + // vnet_buffer data u8 ip_proto; u8 icmp_type_or_tcp_flags; u32 tcp_ack_number; u32 tcp_seq_number; - // l4 src port u16 l4_src_port; - // l4 dst port u16 l4_dst_port; + + // vnet_buffer2 data + u32 total_ip_payload_length; + u32 first_fragment_total_ip_header_length; + u32 first_fragment_clone_bi; + u32 next_index; // lru indexes u32 lru_prev; @@ -114,13 +121,11 @@ typedef struct typedef struct { ip4_sv_reass_t *pool; - u32 reass_n; u32 id_counter; clib_spinlock_t lock; // lru indexes u32 lru_first; u32 lru_last; - } ip4_sv_reass_per_thread_t; typedef struct @@ -143,13 +148,12 @@ typedef struct vlib_main_t *vlib_main; vnet_main_t *vnet_main; - // node index of ip4-drop node - u32 ip4_drop_idx; u32 ip4_sv_reass_expire_node_idx; /** Worker handoff */ u32 fq_index; u32 fq_feature_index; + u32 fq_output_feature_index; u32 fq_custom_context_index; // reference count for enabling/disabling feature - per interface @@ -158,6 +162,8 @@ typedef struct // reference count for enabling/disabling feature - per interface u32 *output_feature_use_refcount_per_intf; + // extended reassembly refcount - see ip4_sv_reass_enable_disable_extended() + u32 extended_refcount; } ip4_sv_reass_main_t; extern ip4_sv_reass_main_t ip4_sv_reass_main; @@ -177,9 +183,15 @@ typedef enum typedef enum { REASS_FRAGMENT_CACHE, - REASS_FINISH, + REASS_FIRST_FRAG, + REASS_LAST_FRAG, REASS_FRAGMENT_FORWARD, REASS_PASSTHROUGH, + REASS_HANDOFF, + REASS_KEY, + REASS_FREE_TIMEOUT, + REASS_FREE_LRU, + REASS_FREE_ERROR, } ip4_sv_reass_trace_operation_e; typedef struct @@ -190,19 +202,23 @@ typedef struct u8 ip_proto; u16 l4_src_port; u16 l4_dst_port; - int l4_layer_truncated; + int l4_hdr_truncated; + u32 handoff_thread_index; + clib_bihash_kv_16_8_t kv; } ip4_sv_reass_trace_t; extern vlib_node_registration_t ip4_sv_reass_node; extern vlib_node_registration_t ip4_sv_reass_node_feature; static u8 * -format_ip4_sv_reass_trace (u8 * s, va_list * args) +format_ip4_sv_reass_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *); - if (REASS_PASSTHROUGH != t->action) + if (REASS_PASSTHROUGH != t->action && REASS_HANDOFF != t->action && + REASS_KEY != t->action && REASS_FREE_TIMEOUT != t->action && + REASS_FREE_LRU != t->action && REASS_FREE_ERROR != t->action) { s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); } @@ -211,25 +227,42 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args) case REASS_FRAGMENT_CACHE: s = format (s, "[cached]"); break; - case REASS_FINISH: + case REASS_FIRST_FRAG: s = - format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]", + format (s, "[first-frag-seen, ip proto=%u, src_port=%u, dst_port=%u]", t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), clib_net_to_host_u16 (t->l4_dst_port)); break; + case REASS_LAST_FRAG: + s = format (s, "[last-frag-seen]"); + break; + case REASS_HANDOFF: + s = format (s, "[handoff, thread index: %u]", t->handoff_thread_index); + break; + case REASS_KEY: + s = format (s, "[lookup, key: %U]", format_bihash_kvp_16_8, &t->kv); + break; + case REASS_FREE_LRU: + s = format (s, "[free, LRU pressure]"); + break; + case REASS_FREE_TIMEOUT: + s = format (s, "[free, timed out]"); + break; + case REASS_FREE_ERROR: + s = format (s, "[free, error occurred]"); + break; case REASS_FRAGMENT_FORWARD: - s = - format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]", - t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), - clib_net_to_host_u16 (t->l4_dst_port)); + s = format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]", + t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), + clib_net_to_host_u16 (t->l4_dst_port)); break; case REASS_PASSTHROUGH: s = format (s, "[not-fragmented]"); break; } - if (t->l4_layer_truncated) + if (t->l4_hdr_truncated) { - s = format (s, " [l4-layer-truncated]"); + s = format (s, " [l4-hdr-truncated]"); } return s; } @@ -238,12 +271,12 @@ static void ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, ip4_sv_reass_t *reass, u32 bi, ip4_sv_reass_trace_operation_e action, u32 ip_proto, - u16 l4_src_port, u16 l4_dst_port, - int l4_layer_truncated) + u16 l4_src_port, u16 l4_dst_port, int l4_hdr_truncated, + u32 handoff_thread_index) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (pool_is_free_index - (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b))) + if (pool_is_free_index (vm->trace_main.trace_buffer_pool, + vlib_buffer_get_trace_index (b))) { // this buffer's trace is gone b->flags &= ~VLIB_BUFFER_IS_TRACED; @@ -260,7 +293,8 @@ ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, t->ip_proto = ip_proto; t->l4_src_port = l4_src_port; t->l4_dst_port = l4_dst_port; - t->l4_layer_truncated = l4_layer_truncated; + t->l4_hdr_truncated = l4_hdr_truncated; + t->handoff_thread_index = handoff_thread_index; #if 0 static u8 *s = NULL; s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t); @@ -270,29 +304,56 @@ ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, #endif } +static void +ip4_sv_reass_trace_timeout (vlib_main_t *vm, vlib_node_runtime_t *node, + ip4_sv_reass_t *reass, u32 bi) +{ + return ip4_sv_reass_add_trace (vm, node, reass, bi, REASS_FREE_TIMEOUT, ~0, + ~0, ~0, 0, ~0); +} + +static void +ip4_sv_reass_trace_lru_free (vlib_main_t *vm, vlib_node_runtime_t *node, + ip4_sv_reass_t *reass, u32 bi) +{ + return ip4_sv_reass_add_trace (vm, node, reass, bi, REASS_FREE_LRU, ~0, ~0, + ~0, 0, ~0); +} + +static void +ip4_sv_reass_trace_error_free (vlib_main_t *vm, vlib_node_runtime_t *node, + ip4_sv_reass_t *reass, u32 bi) +{ + return ip4_sv_reass_add_trace (vm, node, reass, bi, REASS_FREE_ERROR, ~0, ~0, + ~0, 0, ~0); +} always_inline void -ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm, - ip4_sv_reass_per_thread_t * rt, ip4_sv_reass_t * reass) +ip4_sv_reass_free (vlib_main_t *vm, ip4_sv_reass_main_t *rm, + ip4_sv_reass_per_thread_t *rt, ip4_sv_reass_t *reass, + bool del_bihash) { - clib_bihash_kv_16_8_t kv; - kv.key[0] = reass->key.as_u64[0]; - kv.key[1] = reass->key.as_u64[1]; - clib_bihash_add_del_16_8 (&rm->hash, &kv, 0); + if (del_bihash) + { + clib_bihash_kv_16_8_t kv; + kv.key[0] = reass->key.as_u64[0]; + kv.key[1] = reass->key.as_u64[1]; + clib_bihash_add_del_16_8 (&rm->hash, &kv, 0); + } vlib_buffer_free (vm, reass->cached_buffers, vec_len (reass->cached_buffers)); vec_free (reass->cached_buffers); reass->cached_buffers = NULL; + if (~0 != reass->first_fragment_clone_bi) + vlib_buffer_free_one (vm, reass->first_fragment_clone_bi); if (~0 != reass->lru_prev) { - ip4_sv_reass_t *lru_prev = - pool_elt_at_index (rt->pool, reass->lru_prev); + ip4_sv_reass_t *lru_prev = pool_elt_at_index (rt->pool, reass->lru_prev); lru_prev->lru_next = reass->lru_next; } if (~0 != reass->lru_next) { - ip4_sv_reass_t *lru_next = - pool_elt_at_index (rt->pool, reass->lru_next); + ip4_sv_reass_t *lru_next = pool_elt_at_index (rt->pool, reass->lru_next); lru_next->lru_prev = reass->lru_prev; } if (rt->lru_first == reass - rt->pool) @@ -304,20 +365,13 @@ ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm, rt->lru_last = reass->lru_prev; } pool_put (rt->pool, reass); - --rt->reass_n; -} - -always_inline void -ip4_sv_reass_init (ip4_sv_reass_t * reass) -{ - reass->cached_buffers = NULL; - reass->is_complete = false; } always_inline ip4_sv_reass_t * -ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm, - ip4_sv_reass_per_thread_t * rt, - ip4_sv_reass_kv_t * kv, u8 * do_handoff) +ip4_sv_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi, ip4_sv_reass_main_t *rm, + ip4_sv_reass_per_thread_t *rt, + ip4_sv_reass_kv_t *kv, u8 *do_handoff) { ip4_sv_reass_t *reass = NULL; f64 now = vlib_time_now (vm); @@ -335,7 +389,8 @@ again: if (now > reass->last_heard + rm->timeout) { - ip4_sv_reass_free (vm, rm, rt, reass); + ip4_sv_reass_trace_timeout (vm, node, reass, bi); + ip4_sv_reass_free (vm, rm, rt, reass, true); reass = NULL; } } @@ -346,18 +401,17 @@ again: return reass; } - if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n) + if (pool_elts (rt->pool) >= rm->max_reass_n && rm->max_reass_n) { reass = pool_elt_at_index (rt->pool, rt->lru_first); - ip4_sv_reass_free (vm, rm, rt, reass); + ip4_sv_reass_trace_lru_free (vm, node, reass, bi); + ip4_sv_reass_free (vm, rm, rt, reass, true); } - pool_get (rt->pool, reass); - clib_memset (reass, 0, sizeof (*reass)); + pool_get_zero (rt->pool, reass); + reass->first_fragment_clone_bi = ~0; reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter; ++rt->id_counter; - ip4_sv_reass_init (reass); - ++rt->reass_n; reass->lru_prev = reass->lru_next = ~0; if (~0 != rt->lru_last) @@ -381,7 +435,7 @@ again: int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2); if (rv) { - ip4_sv_reass_free (vm, rm, rt, reass); + ip4_sv_reass_free (vm, rm, rt, reass, false); reass = NULL; // if other worker created a context already work with the other copy if (-2 == rv) @@ -391,10 +445,23 @@ again: return reass; } +always_inline bool +ip4_sv_reass_is_complete (ip4_sv_reass_t *reass, bool extended) +{ + /* + * Both first and last fragments have to be seen for extended reassembly to + * be complete. Otherwise first fragment is enough. + */ + if (extended) + return reass->first_fragment_seen && reass->last_fragment_seen; + + return reass->first_fragment_seen; +} + always_inline ip4_sv_reass_rc_t ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, ip4_sv_reass_main_t *rm, ip4_header_t *ip0, - ip4_sv_reass_t *reass, u32 bi0) + ip4_sv_reass_t *reass, u32 bi0, bool extended) { vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK; @@ -408,33 +475,59 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, return IP4_SV_REASS_RC_UNSUPP_IP_PROTO; if (IP_PROTOCOL_TCP == reass->ip_proto) { - reass->icmp_type_or_tcp_flags = ((tcp_header_t *) (ip0 + 1))->flags; - reass->tcp_ack_number = ((tcp_header_t *) (ip0 + 1))->ack_number; - reass->tcp_seq_number = ((tcp_header_t *) (ip0 + 1))->seq_number; + tcp_header_t *th = ip4_next_header (ip0); + reass->icmp_type_or_tcp_flags = th->flags; + reass->tcp_ack_number = th->ack_number; + reass->tcp_seq_number = th->seq_number; } else if (IP_PROTOCOL_ICMP == reass->ip_proto) { reass->icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip0)))->type; + } + reass->first_fragment_seen = true; + if (extended) + { + reass->first_fragment_total_ip_header_length = + ip4_header_bytes (ip0); + vlib_buffer_t *clone = vlib_buffer_copy_no_chain ( + vm, b0, &reass->first_fragment_clone_bi); + if (!clone) + reass->first_fragment_clone_bi = ~0; } - reass->is_complete = true; vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip4_sv_reass_add_trace ( - vm, node, reass, bi0, REASS_FINISH, reass->ip_proto, + vm, node, reass, bi0, REASS_FIRST_FRAG, reass->ip_proto, reass->l4_src_port, reass->l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } } + if (!ip4_get_fragment_more (ip0)) + { + const u32 fragment_length = + clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); + reass->last_fragment_seen = true; + reass->total_ip_payload_length = fragment_first + fragment_length; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_add_trace ( + vm, node, reass, bi0, REASS_LAST_FRAG, ~0, ~0, ~0, + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); + } + } + vec_add1 (reass->cached_buffers, bi0); - if (!reass->is_complete) + + if (!ip4_sv_reass_is_complete (reass, extended)) { if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip4_sv_reass_add_trace ( vm, node, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } if (vec_len (reass->cached_buffers) > rm->max_reass_len) { @@ -445,30 +538,63 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, } always_inline int -l4_layer_truncated (ip4_header_t *ip) +l4_hdr_truncated (ip4_header_t *ip) { - static const int l4_layer_length[256] = { - [IP_PROTOCOL_TCP] = sizeof (tcp_header_t), - [IP_PROTOCOL_UDP] = sizeof (udp_header_t), - [IP_PROTOCOL_ICMP] = sizeof (icmp46_header_t), - }; + if (IP_PROTOCOL_UDP == ip->protocol) + return ((u8 *) ip + ip4_header_bytes (ip) + sizeof (udp_header_t) > + (u8 *) ip + clib_net_to_host_u16 (ip->length)); + if (IP_PROTOCOL_ICMP == ip->protocol) + return ((u8 *) ip + ip4_header_bytes (ip) + sizeof (icmp46_header_t) > + (u8 *) ip + clib_net_to_host_u16 (ip->length)); + + if (IP_PROTOCOL_TCP != ip->protocol) + return false; + + tcp_header_t *th = ip4_next_header (ip); + const u32 tcp_opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t); - return ((u8 *) ip + ip4_header_bytes (ip) + l4_layer_length[ip->protocol] > + return ((u8 *) ip + ip4_header_bytes (ip) + sizeof (tcp_header_t) + + tcp_opts_len > (u8 *) ip + clib_net_to_host_u16 (ip->length)); } +always_inline void +ip4_sv_reass_reset_vnet_buffer2 (vlib_buffer_t *b) +{ + vnet_buffer2 (b)->ip.reass.pool_index = ~0; + vnet_buffer2 (b)->ip.reass.thread_index = ~0; + vnet_buffer2 (b)->ip.reass.id = ~0; +} + +always_inline void +ip4_sv_reass_set_vnet_buffer2_from_reass (vlib_main_t *vm, vlib_buffer_t *b, + ip4_sv_reass_t *reass) +{ + vnet_buffer2 (b)->ip.reass.thread_index = vm->thread_index; + vnet_buffer2 (b)->ip.reass.id = reass->id; + vnet_buffer2 (b)->ip.reass.pool_index = + reass - ip4_sv_reass_main.per_thread_data[vm->thread_index].pool; +} + +struct ip4_sv_reass_args +{ + bool is_feature; + bool is_output_feature; + bool is_custom; + bool with_custom_context; + bool extended; +}; + always_inline uword ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, bool is_feature, - bool is_output_feature, bool is_custom, - bool with_custom_context) + vlib_frame_t *frame, struct ip4_sv_reass_args a) { u32 *from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index; ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; u32 *context; - if (with_custom_context) + if (a.with_custom_context) context = vlib_frame_aux_args (frame); clib_spinlock_lock (&rt->lock); @@ -506,20 +632,18 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, clib_prefetch_load (p3->data); } - ip4_header_t *ip0 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), - (is_output_feature ? 1 : 0) * - vnet_buffer (b0)-> - ip.save_rewrite_length); - ip4_header_t *ip1 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b1), - (is_output_feature ? 1 : 0) * - vnet_buffer (b1)-> - ip.save_rewrite_length); - - if (PREDICT_FALSE - (ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0)) - || (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1))) + ip4_header_t *ip0 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); + ip4_header_t *ip1 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b1), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b1)->ip.save_rewrite_length); + + if (PREDICT_FALSE (ip4_get_fragment_more (ip0) || + ip4_get_fragment_offset (ip0)) || + (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1))) { // fragment found, go slow path b -= 2; @@ -530,39 +654,41 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } goto slow_path; } - if (is_feature) + if (a.is_feature) { vnet_feature_next (&next0, b0); } else { - next0 = is_custom ? vnet_buffer (b0)->ip.reass.next_index : - IP4_SV_REASSEMBLY_NEXT_INPUT; + next0 = a.is_custom ? vnet_buffer (b0)->ip.reass.next_index : + IP4_SV_REASSEMBLY_NEXT_INPUT; } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - if (l4_layer_truncated (ip0)) + + if (a.extended) + ip4_sv_reass_reset_vnet_buffer2 (b0); + + if (l4_hdr_truncated (ip0)) { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1; - vnet_buffer (b0)->ip.reass.l4_src_port = 0; - vnet_buffer (b0)->ip.reass.l4_dst_port = 0; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 1; } else { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; if (IP_PROTOCOL_TCP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip0 + 1))->flags; + ((tcp_header_t *) (ip4_next_header (ip0)))->flags; vnet_buffer (b0)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip0 + 1))->ack_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->ack_number; vnet_buffer (b0)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip0 + 1))->seq_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->seq_number; } else if (IP_PROTOCOL_ICMP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip0)))->type; } vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); @@ -574,41 +700,43 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_buffer (b0)->ip.reass.ip_proto, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } - if (is_feature) + if (a.is_feature) { vnet_feature_next (&next1, b1); } else { - next1 = is_custom ? vnet_buffer (b1)->ip.reass.next_index : - IP4_SV_REASSEMBLY_NEXT_INPUT; + next1 = a.is_custom ? vnet_buffer (b1)->ip.reass.next_index : + IP4_SV_REASSEMBLY_NEXT_INPUT; } vnet_buffer (b1)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b1)->ip.reass.ip_proto = ip1->protocol; - if (l4_layer_truncated (ip1)) + + if (a.extended) + ip4_sv_reass_reset_vnet_buffer2 (b1); + + if (l4_hdr_truncated (ip1)) { - vnet_buffer (b1)->ip.reass.l4_layer_truncated = 1; - vnet_buffer (b1)->ip.reass.l4_src_port = 0; - vnet_buffer (b1)->ip.reass.l4_dst_port = 0; + vnet_buffer (b1)->ip.reass.l4_hdr_truncated = 1; } else { - vnet_buffer (b1)->ip.reass.l4_layer_truncated = 0; + vnet_buffer (b1)->ip.reass.l4_hdr_truncated = 0; if (IP_PROTOCOL_TCP == ip1->protocol) { vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip1 + 1))->flags; + ((tcp_header_t *) (ip4_next_header (ip1)))->flags; vnet_buffer (b1)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip1 + 1))->ack_number; + ((tcp_header_t *) (ip4_next_header (ip1)))->ack_number; vnet_buffer (b1)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip1 + 1))->seq_number; + ((tcp_header_t *) (ip4_next_header (ip1)))->seq_number; } else if (IP_PROTOCOL_ICMP == ip1->protocol) { vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip1 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip1)))->type; } vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1); vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0); @@ -620,14 +748,14 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_buffer (b1)->ip.reass.ip_proto, vnet_buffer (b1)->ip.reass.l4_src_port, vnet_buffer (b1)->ip.reass.l4_dst_port, - vnet_buffer (b1)->ip.reass.l4_layer_truncated); + vnet_buffer (b1)->ip.reass.l4_hdr_truncated, ~0); } n_left_from -= 2; next[0] = next0; next[1] = next1; next += 2; - if (with_custom_context) + if (a.with_custom_context) context += 2; } @@ -638,13 +766,12 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, b0 = *b; b++; - ip4_header_t *ip0 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), - (is_output_feature ? 1 : 0) * - vnet_buffer (b0)-> - ip.save_rewrite_length); - if (PREDICT_FALSE - (ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0))) + ip4_header_t *ip0 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); + if (PREDICT_FALSE (ip4_get_fragment_more (ip0) || + ip4_get_fragment_offset (ip0))) { // fragment found, go slow path b -= 1; @@ -655,38 +782,41 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } goto slow_path; } - if (is_feature) + if (a.is_feature) { vnet_feature_next (&next0, b0); } else { - next0 = - is_custom ? vnet_buffer (b0)->ip. - reass.next_index : IP4_SV_REASSEMBLY_NEXT_INPUT; + next0 = a.is_custom ? vnet_buffer (b0)->ip.reass.next_index : + IP4_SV_REASSEMBLY_NEXT_INPUT; } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - if (l4_layer_truncated (ip0)) + + if (a.extended) + ip4_sv_reass_reset_vnet_buffer2 (b0); + + if (l4_hdr_truncated (ip0)) { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 1; } else { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; if (IP_PROTOCOL_TCP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip0 + 1))->flags; + ((tcp_header_t *) (ip4_next_header (ip0)))->flags; vnet_buffer (b0)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip0 + 1))->ack_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->ack_number; vnet_buffer (b0)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip0 + 1))->seq_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->seq_number; } else if (IP_PROTOCOL_ICMP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip0)))->type; } vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); @@ -698,13 +828,13 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_buffer (b0)->ip.reass.ip_proto, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } n_left_from -= 1; next[0] = next0; next += 1; - if (with_custom_context) + if (a.with_custom_context) context += 1; } @@ -719,7 +849,7 @@ slow_path: while (n_left_from > 0) { - if (with_custom_context) + if (a.with_custom_context) vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, to_next_aux, n_left_to_next); else @@ -736,15 +866,14 @@ slow_path: bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); - ip4_header_t *ip0 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), - (is_output_feature ? 1 : 0) * - vnet_buffer (b0)-> - ip.save_rewrite_length); + ip4_header_t *ip0 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0)) { // this is a regular packet - no fragmentation - if (is_custom) + if (a.is_custom) { next0 = vnet_buffer (b0)->ip.reass.next_index; } @@ -754,28 +883,28 @@ slow_path: } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - if (l4_layer_truncated (ip0)) + if (l4_hdr_truncated (ip0)) { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 1; vnet_buffer (b0)->ip.reass.l4_src_port = 0; vnet_buffer (b0)->ip.reass.l4_dst_port = 0; } else { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; if (IP_PROTOCOL_TCP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip0 + 1))->flags; + ((tcp_header_t *) (ip4_next_header (ip0)))->flags; vnet_buffer (b0)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip0 + 1))->ack_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->ack_number; vnet_buffer (b0)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip0 + 1))->seq_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->seq_number; } else if (IP_PROTOCOL_ICMP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip0)))->type; } vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); @@ -789,7 +918,7 @@ slow_path: vnet_buffer (b0)->ip.reass.ip_proto, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } goto packet_enqueue; } @@ -797,7 +926,11 @@ slow_path: const u32 fragment_length = clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); const u32 fragment_last = fragment_first + fragment_length - 1; - if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791 + if (fragment_first > fragment_last || + fragment_first + fragment_length > UINT16_MAX - 20 || + (fragment_length < 8 && + ip4_get_fragment_more ( + ip0))) // 8 is minimum frag length per RFC 791 { next0 = IP4_SV_REASSEMBLY_NEXT_DROP; error0 = IP4_ERROR_REASS_MALFORMED_PACKET; @@ -807,7 +940,7 @@ slow_path: ip4_sv_reass_kv_t kv; u8 do_handoff = 0; - if (with_custom_context) + if (a.with_custom_context) kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32 << 32; else @@ -819,15 +952,29 @@ slow_path: (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48; - ip4_sv_reass_t *reass = - ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (t[0])); + t->action = REASS_KEY; + STATIC_ASSERT_SIZEOF (t->kv, sizeof (kv)); + clib_memcpy (&t->kv, &kv, sizeof (kv)); + } + + ip4_sv_reass_t *reass = ip4_sv_reass_find_or_create ( + vm, node, bi0, rm, rt, &kv, &do_handoff); if (PREDICT_FALSE (do_handoff)) { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_add_trace (vm, node, reass, bi0, REASS_HANDOFF, + ~0, ~0, ~0, 0, kv.v.thread_index); + } next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF; vnet_buffer (b0)->ip.reass.owner_thread_index = kv.v.thread_index; - if (with_custom_context) + if (a.with_custom_context) forward_context = 1; goto packet_enqueue; } @@ -840,9 +987,9 @@ slow_path: goto packet_enqueue; } - if (reass->is_complete) + if (ip4_sv_reass_is_complete (reass, a.extended)) { - if (is_custom) + if (a.is_custom) { next0 = vnet_buffer (b0)->ip.reass.next_index; } @@ -851,7 +998,7 @@ slow_path: next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; } vnet_buffer (b0)->ip.reass.is_non_first_fragment = - ! !fragment_first; + !!fragment_first; vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = reass->icmp_type_or_tcp_flags; @@ -861,18 +1008,20 @@ slow_path: reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; + if (a.extended) + ip4_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip4_sv_reass_add_trace ( vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, reass->ip_proto, reass->l4_src_port, reass->l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } goto packet_enqueue; } ip4_sv_reass_rc_t rc = - ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0); + ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0, a.extended); u32 counter = ~0; switch (rc) { @@ -889,62 +1038,64 @@ slow_path: if (~0 != counter) { vlib_node_increment_counter (vm, node->node_index, counter, 1); - ip4_sv_reass_free (vm, rm, rt, reass); + ip4_sv_reass_trace_error_free (vm, node, reass, bi0); + ip4_sv_reass_free (vm, rm, rt, reass, true); goto next_packet; } - if (reass->is_complete) + if (ip4_sv_reass_is_complete (reass, a.extended)) { u32 idx; vec_foreach_index (idx, reass->cached_buffers) - { - u32 bi0 = vec_elt (reass->cached_buffers, idx); - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - ip0 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), - (is_output_feature ? 1 : 0) * - vnet_buffer (b0)-> - ip.save_rewrite_length); - u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; - if (is_feature) - { - vnet_feature_next (&next0, b0); - } - if (is_custom) - { - next0 = vnet_buffer (b0)->ip.reass.next_index; - } - if (0 == n_left_to_next) - { - vlib_put_next_frame (vm, node, next_index, - n_left_to_next); - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - } - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - vnet_buffer (b0)->ip.reass.is_non_first_fragment = - ! !ip4_get_fragment_offset (ip0); - vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - reass->icmp_type_or_tcp_flags; - vnet_buffer (b0)->ip.reass.tcp_ack_number = - reass->tcp_ack_number; - vnet_buffer (b0)->ip.reass.tcp_seq_number = - reass->tcp_seq_number; - vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; - vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - ip4_sv_reass_add_trace ( - vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, - reass->ip_proto, reass->l4_src_port, reass->l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, - next0); - } + { + u32 bi0 = vec_elt (reass->cached_buffers, idx); + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + ip0 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); + u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; + if (a.is_feature) + { + vnet_feature_next (&next0, b0); + } + if (a.is_custom) + { + next0 = vnet_buffer (b0)->ip.reass.next_index; + } + if (0 == n_left_to_next) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + !!ip4_get_fragment_offset (ip0); + vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; + vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; + vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; + if (a.extended) + ip4_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_add_trace ( + vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, + reass->ip_proto, reass->l4_src_port, + reass->l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); + } + vlib_validate_buffer_enqueue_x1 ( + vm, node, next_index, to_next, n_left_to_next, bi0, next0); + } vec_set_len (reass->cached_buffers, 0); // buffers are owned by frame now } @@ -954,12 +1105,13 @@ slow_path: to_next[0] = bi0; to_next += 1; n_left_to_next -= 1; - if (is_feature && IP4_ERROR_NONE == error0) + if (a.is_feature && IP4_ERROR_NONE == error0 && + IP4_SV_REASSEMBLY_NEXT_HANDOFF != next0) { b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - if (with_custom_context && forward_context) + if (a.with_custom_context && forward_context) { if (to_next_aux) { @@ -977,7 +1129,7 @@ slow_path: next_packet: from += 1; n_left_from -= 1; - if (with_custom_context) + if (a.with_custom_context) context += 1; } @@ -989,13 +1141,20 @@ done: return frame->n_vectors; } -VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, false /* is_feature */, false /* is_output_feature */, - false /* is_custom */, false /* with_custom_context */); + /* + * Extended reassembly is not supported for non-feature nodes. + */ + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .is_custom = false, + .with_custom_context = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip4_sv_reass_node) = { @@ -1014,13 +1173,27 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node) = { }, }; -VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_node_feature) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, true /* is_feature */, false /* is_output_feature */, - false /* is_custom */, false /* with_custom_context */); + if (ip4_sv_reass_main.extended_refcount > 0) + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = true, + .is_output_feature = false, + .is_custom = false, + .with_custom_context = false, + .extended = true, + }); + + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = true, + .is_output_feature = false, + .is_custom = false, + .with_custom_context = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = { @@ -1039,22 +1212,35 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = { }; VNET_FEATURE_INIT (ip4_sv_reass_feature) = { - .arc_name = "ip4-unicast", - .node_name = "ip4-sv-reassembly-feature", - .runs_before = VNET_FEATURES ("ip4-lookup"), - .runs_after = 0, + .arc_name = "ip4-unicast", + .node_name = "ip4-sv-reassembly-feature", + .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = 0, }; -VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_node_output_feature) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, true /* is_feature */, true /* is_output_feature */, - false /* is_custom */, false /* with_custom_context */); + if (ip4_sv_reass_main.extended_refcount > 0) + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = true, + .is_output_feature = true, + .is_custom = false, + .with_custom_context = false, + .extended = true, + }); + + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = true, + .is_output_feature = true, + .is_custom = false, + .with_custom_context = false, + .extended = false, + }); } - VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { .name = "ip4-sv-reassembly-output-feature", .vector_size = sizeof (u32), @@ -1066,15 +1252,15 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { { [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input", [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop", - [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff", + [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-output-feature-hoff", }, }; VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = { - .arc_name = "ip4-output", - .node_name = "ip4-sv-reassembly-output-feature", - .runs_before = 0, - .runs_after = 0, + .arc_name = "ip4-output", + .node_name = "ip4-sv-reassembly-output-feature", + .runs_before = 0, + .runs_after = 0, }; VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { @@ -1093,13 +1279,20 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { }, }; -VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_custom_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, false /* is_feature */, false /* is_output_feature */, - true /* is_custom */, false /* with_custom_context */); + /* + * Extended reassembly is not supported for non-feature nodes. + */ + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .is_custom = true, + .with_custom_context = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = { @@ -1122,9 +1315,17 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = { VLIB_NODE_FN (ip4_sv_reass_custom_context_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, false /* is_feature */, false /* is_output_feature */, - true /* is_custom */, true /* with_custom_context */); + /* + * Extended reassembly is not supported for non-feature nodes. + */ + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .is_custom = true, + .with_custom_context = true, + .extended = false, + }); } #ifndef CLIB_MARCH_VARIANT @@ -1159,7 +1360,7 @@ typedef struct #ifndef CLIB_MARCH_VARIANT static int -ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx) +ip4_rehash_cb (clib_bihash_kv_16_8_t *kv, void *_ctx) { ip4_rehash_cb_ctx *ctx = _ctx; if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1)) @@ -1186,8 +1387,8 @@ ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, u32 max_reassembly_length, u32 expire_walk_interval_ms) { u32 old_nbuckets = ip4_sv_reass_get_nbuckets (); - ip4_sv_reass_set_params (timeout_ms, max_reassemblies, - max_reassembly_length, expire_walk_interval_ms); + ip4_sv_reass_set_params (timeout_ms, max_reassemblies, max_reassembly_length, + expire_walk_interval_ms); vlib_process_signal_event (ip4_sv_reass_main.vlib_main, ip4_sv_reass_main.ip4_sv_reass_expire_node_idx, IP4_EVENT_CONFIG_CHANGED, 0); @@ -1200,7 +1401,7 @@ ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, ctx.failure = 0; ctx.new_hash = &new_hash; clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets, - new_nbuckets * 1024); + (uword) new_nbuckets * 1024); clib_bihash_foreach_key_value_pair_16_8 (&ip4_sv_reass_main.hash, ip4_rehash_cb, &ctx); if (ctx.failure) @@ -1220,8 +1421,8 @@ ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, } vnet_api_error_t -ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, - u32 * max_reassembly_length, u32 * expire_walk_interval_ms) +ip4_sv_reass_get (u32 *timeout_ms, u32 *max_reassemblies, + u32 *max_reassembly_length, u32 *expire_walk_interval_ms) { *timeout_ms = ip4_sv_reass_main.timeout_ms; *max_reassemblies = ip4_sv_reass_main.max_reass_n; @@ -1231,7 +1432,7 @@ ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, } static clib_error_t * -ip4_sv_reass_init_function (vlib_main_t * vm) +ip4_sv_reass_init_function (vlib_main_t *vm) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; clib_error_t *error = 0; @@ -1244,11 +1445,11 @@ ip4_sv_reass_init_function (vlib_main_t * vm) vec_validate (rm->per_thread_data, vlib_num_workers ()); ip4_sv_reass_per_thread_t *rt; vec_foreach (rt, rm->per_thread_data) - { - clib_spinlock_init (&rt->lock); - pool_alloc (rt->pool, rm->max_reass_n); - rt->lru_first = rt->lru_last = ~0; - } + { + clib_spinlock_init (&rt->lock); + pool_alloc (rt->pool, rm->max_reass_n); + rt->lru_first = rt->lru_last = ~0; + } node = vlib_get_node_by_name (vm, (u8 *) "ip4-sv-reassembly-expire-walk"); ASSERT (node); @@ -1260,15 +1461,14 @@ ip4_sv_reass_init_function (vlib_main_t * vm) IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS); nbuckets = ip4_sv_reass_get_nbuckets (); - clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024); - - node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop"); - ASSERT (node); - rm->ip4_drop_idx = node->index; + clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, + (uword) nbuckets * 1024); rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0); + rm->fq_output_feature_index = + vlib_frame_queue_main_init (ip4_sv_reass_node_output_feature.index, 0); rm->fq_custom_context_index = vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0); @@ -1291,10 +1491,8 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, while (true) { - vlib_process_wait_for_event_or_clock (vm, - (f64) - rm->expire_walk_interval_ms / - (f64) MSEC_PER_SEC); + vlib_process_wait_for_event_or_clock ( + vm, (f64) rm->expire_walk_interval_ms / (f64) MSEC_PER_SEC); event_type = vlib_process_get_events (vm, &event_data); switch (event_type) @@ -1323,19 +1521,20 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - pool_foreach_index (index, rt->pool) { - reass = pool_elt_at_index (rt->pool, index); - if (now > reass->last_heard + rm->timeout) - { - vec_add1 (pool_indexes_to_free, index); - } - } + pool_foreach_index (index, rt->pool) + { + reass = pool_elt_at_index (rt->pool, index); + if (now > reass->last_heard + rm->timeout) + { + vec_add1 (pool_indexes_to_free, index); + } + } int *i; - vec_foreach (i, pool_indexes_to_free) - { - ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); - ip4_sv_reass_free (vm, rm, rt, reass); - } + vec_foreach (i, pool_indexes_to_free) + { + ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); + ip4_sv_reass_free (vm, rm, rt, reass, true); + } clib_spinlock_unlock (&rt->lock); } @@ -1360,7 +1559,7 @@ VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = { }; static u8 * -format_ip4_sv_reass_key (u8 * s, va_list * args) +format_ip4_sv_reass_key (u8 *s, va_list *args) { ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *); s = @@ -1371,37 +1570,35 @@ format_ip4_sv_reass_key (u8 * s, va_list * args) } static u8 * -format_ip4_sv_reass (u8 * s, va_list * args) +format_ip4_sv_reass (u8 *s, va_list *args) { vlib_main_t *vm = va_arg (*args, vlib_main_t *); ip4_sv_reass_t *reass = va_arg (*args, ip4_sv_reass_t *); - s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n", - reass->id, format_ip4_sv_reass_key, &reass->key, - reass->trace_op_counter); + s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n", reass->id, + format_ip4_sv_reass_key, &reass->key, reass->trace_op_counter); vlib_buffer_t *b; u32 *bip; u32 counter = 0; vec_foreach (bip, reass->cached_buffers) - { - u32 bi = *bip; - do - { - b = vlib_get_buffer (vm, bi); - s = format (s, " #%03u: bi: %u, ", counter, bi); - ++counter; - bi = b->next_buffer; - } - while (b->flags & VLIB_BUFFER_NEXT_PRESENT); - } + { + u32 bi = *bip; + do + { + b = vlib_get_buffer (vm, bi); + s = format (s, " #%03u: bi: %u, ", counter, bi); + ++counter; + bi = b->next_buffer; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } return s; } static clib_error_t * -show_ip4_reass (vlib_main_t * vm, - unformat_input_t * input, - CLIB_UNUSED (vlib_cli_command_t * lmd)) +show_ip4_reass (vlib_main_t *vm, unformat_input_t *input, + CLIB_UNUSED (vlib_cli_command_t *lmd)) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; @@ -1424,100 +1621,105 @@ show_ip4_reass (vlib_main_t * vm, clib_spinlock_lock (&rt->lock); if (details) { - pool_foreach (reass, rt->pool) { - vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass); - } + pool_foreach (reass, rt->pool) + { + vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass); + } } - sum_reass_n += rt->reass_n; + sum_reass_n += pool_elts (rt->pool); clib_spinlock_unlock (&rt->lock); } vlib_cli_output (vm, "---------------------"); vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n", (long unsigned) sum_reass_n); vlib_cli_output (vm, - "Maximum configured concurrent shallow virtual IP4 reassemblies per worker-thread: %lu\n", + "Maximum configured concurrent shallow virtual IP4 " + "reassemblies per worker-thread: %lu\n", (long unsigned) rm->max_reass_n); vlib_cli_output (vm, "Maximum configured amount of fragments per shallow " "virtual IP4 reassembly: %lu\n", (long unsigned) rm->max_reass_len); + vlib_cli_output ( + vm, "Maximum configured shallow virtual IP4 reassembly timeout: %lums\n", + (long unsigned) rm->timeout_ms); vlib_cli_output (vm, - "Maximum configured shallow virtual IP4 reassembly timeout: %lums\n", - (long unsigned) rm->timeout_ms); - vlib_cli_output (vm, - "Maximum configured shallow virtual IP4 reassembly expire walk interval: %lums\n", + "Maximum configured shallow virtual IP4 reassembly expire " + "walk interval: %lums\n", (long unsigned) rm->expire_walk_interval_ms); + return 0; } VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = { - .path = "show ip4-sv-reassembly", - .short_help = "show ip4-sv-reassembly [details]", - .function = show_ip4_reass, + .path = "show ip4-sv-reassembly", + .short_help = "show ip4-sv-reassembly [details]", + .function = show_ip4_reass, }; #ifndef CLIB_MARCH_VARIANT vnet_api_error_t ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable) { - return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, - enable_disable); + return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, enable_disable); } #endif /* CLIB_MARCH_VARIANT */ - -#define foreach_ip4_sv_reass_handoff_error \ -_(CONGESTION_DROP, "congestion drop") - +#define foreach_ip4_sv_reass_handoff_error \ + _ (CONGESTION_DROP, "congestion drop") typedef enum { -#define _(sym,str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym, +#define _(sym, str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym, foreach_ip4_sv_reass_handoff_error #undef _ IP4_SV_REASSEMBLY_HANDOFF_N_ERROR, } ip4_sv_reass_handoff_error_t; static char *ip4_sv_reass_handoff_error_strings[] = { -#define _(sym,string) string, +#define _(sym, string) string, foreach_ip4_sv_reass_handoff_error #undef _ }; typedef struct { - u32 next_worker_index; + u32 thread_index; } ip4_sv_reass_handoff_trace_t; static u8 * -format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args) +format_ip4_sv_reass_handoff_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_sv_reass_handoff_trace_t *t = va_arg (*args, ip4_sv_reass_handoff_trace_t *); - s = - format (s, "ip4-sv-reassembly-handoff: next-worker %d", - t->next_worker_index); + s = format (s, "to thread-index: %u", t->thread_index); return s; } +struct ip4_sv_reass_hoff_args +{ + bool is_feature; + bool is_output_feature; + bool is_custom_context; +}; + always_inline uword ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, bool is_feature, - bool is_custom_context) + vlib_frame_t *frame, + struct ip4_sv_reass_hoff_args a) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u32 n_enq, n_left_from, *from, *context; u16 thread_indices[VLIB_FRAME_SIZE], *ti; - u32 fq_index; from = vlib_frame_vector_args (frame); - if (is_custom_context) + if (a.is_custom_context) context = vlib_frame_aux_args (frame); n_left_from = frame->n_vectors; @@ -1526,28 +1728,28 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, b = bufs; ti = thread_indices; - fq_index = (is_feature) ? rm->fq_feature_index : - (is_custom_context ? rm->fq_custom_context_index : - rm->fq_index); + const u32 fq_index = a.is_output_feature ? rm->fq_output_feature_index : + a.is_feature ? rm->fq_feature_index : + a.is_custom_context ? rm->fq_custom_context_index : + rm->fq_index; while (n_left_from > 0) { ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index; - if (PREDICT_FALSE - ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b[0]->flags & VLIB_BUFFER_IS_TRACED))) + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b[0]->flags & VLIB_BUFFER_IS_TRACED))) { ip4_sv_reass_handoff_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->next_worker_index = ti[0]; + t->thread_index = ti[0]; } n_left_from -= 1; ti += 1; b += 1; } - if (is_custom_context) + if (a.is_custom_context) n_enq = vlib_buffer_enqueue_to_thread_with_aux ( vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1); else @@ -1555,21 +1757,22 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vm, node, fq_index, from, thread_indices, frame->n_vectors, 1); if (n_enq < frame->n_vectors) - vlib_node_increment_counter (vm, node->node_index, - IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP, - frame->n_vectors - n_enq); + vlib_node_increment_counter ( + vm, node->node_index, IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); return frame->n_vectors; } -VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip4_sv_reass_handoff_node_inline ( - vm, node, frame, false /* is_feature */, false /* is_custom_context */); + vm, node, frame, + (struct ip4_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = false, + .is_custom_context = false }); } - VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = { .name = "ip4-sv-reassembly-handoff", .vector_size = sizeof (u32), @@ -1588,7 +1791,10 @@ VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip4_sv_reass_handoff_node_inline ( - vm, node, frame, false /* is_feature */, true /* is_custom_context */); + vm, node, frame, + (struct ip4_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = false, + .is_custom_context = true }); } VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = { @@ -1606,16 +1812,16 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = { }, }; -VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * - node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip4_sv_reass_handoff_node_inline ( - vm, node, frame, true /* is_feature */, false /* is_custom_context */); + vm, node, frame, + (struct ip4_sv_reass_hoff_args){ .is_feature = true, + .is_output_feature = false, + .is_custom_context = false }); } - VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = { .name = "ip4-sv-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1630,6 +1836,30 @@ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = { }, }; +VLIB_NODE_FN (ip4_sv_reass_output_feature_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip4_sv_reass_handoff_node_inline ( + vm, node, frame, + (struct ip4_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = true, + .is_custom_context = false }); +} + +VLIB_REGISTER_NODE (ip4_sv_reass_output_feature_handoff_node) = { + .name = "ip4-sv-reass-output-feature-hoff", + .vector_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings), + .error_strings = ip4_sv_reass_handoff_error_strings, + .format_trace = format_ip4_sv_reass_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + #ifndef CLIB_MARCH_VARIANT int ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) @@ -1640,10 +1870,10 @@ ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) { if (!rm->feature_use_refcount_per_intf[sw_if_index]) { - ++rm->feature_use_refcount_per_intf[sw_if_index]; - return vnet_feature_enable_disable ("ip4-unicast", - "ip4-sv-reassembly-feature", - sw_if_index, 1, 0, 0); + int rv = vnet_feature_enable_disable ( + "ip4-unicast", "ip4-sv-reassembly-feature", sw_if_index, 1, 0, 0); + if (0 != rv) + return rv; } ++rm->feature_use_refcount_per_intf[sw_if_index]; } @@ -1652,9 +1882,10 @@ ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) if (rm->feature_use_refcount_per_intf[sw_if_index]) --rm->feature_use_refcount_per_intf[sw_if_index]; if (!rm->feature_use_refcount_per_intf[sw_if_index]) - return vnet_feature_enable_disable ("ip4-unicast", - "ip4-sv-reassembly-feature", - sw_if_index, 0, 0, 0); + { + return vnet_feature_enable_disable ( + "ip4-unicast", "ip4-sv-reassembly-feature", sw_if_index, 0, 0, 0); + } } return 0; } @@ -1674,8 +1905,7 @@ ip4_sv_reass_custom_context_register_next_node (uword node_index) } int -ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, - int is_enable) +ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index); @@ -1683,10 +1913,11 @@ ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, { if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) { - ++rm->output_feature_use_refcount_per_intf[sw_if_index]; - return vnet_feature_enable_disable ("ip4-output", - "ip4-sv-reassembly-output-feature", - sw_if_index, 1, 0, 0); + int rv = vnet_feature_enable_disable ( + "ip4-output", "ip4-sv-reassembly-output-feature", sw_if_index, 1, + 0, 0); + if (0 != rv) + return rv; } ++rm->output_feature_use_refcount_per_intf[sw_if_index]; } @@ -1695,12 +1926,66 @@ ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, if (rm->output_feature_use_refcount_per_intf[sw_if_index]) --rm->output_feature_use_refcount_per_intf[sw_if_index]; if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) - return vnet_feature_enable_disable ("ip4-output", - "ip4-sv-reassembly-output-feature", - sw_if_index, 0, 0, 0); + { + return vnet_feature_enable_disable ( + "ip4-output", "ip4-sv-reassembly-output-feature", sw_if_index, 0, + 0, 0); + } } return 0; } + +void +ip4_sv_reass_enable_disable_extended (bool is_enable) +{ + if (is_enable) + ++ip4_sv_reass_main.extended_refcount; + else + --ip4_sv_reass_main.extended_refcount; +} + +int +ip4_sv_reass_extended_lock (vlib_buffer_t *b, + struct ip4_sv_lock_unlock_args *a) +{ + ip4_sv_reass_per_thread_t *per_thread = + &ip4_sv_reass_main + .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index]; + + if (!vec_is_member (ip4_sv_reass_main.per_thread_data, per_thread)) + return -1; + + clib_spinlock_lock (&per_thread->lock); + if (pool_is_free_index (per_thread->pool, + vnet_buffer2 (b)->ip.reass.pool_index)) + goto fail; + + ip4_sv_reass_t *reass = pool_elt_at_index ( + per_thread->pool, vnet_buffer2 (b)->ip.reass.pool_index); + if (vnet_buffer2 (b)->ip.reass.id == reass->id) + { + *a->total_ip_payload_length = reass->total_ip_payload_length; + + *a->first_fragment_buffer_index = reass->first_fragment_clone_bi; + *a->first_fragment_total_ip_header_length = + reass->first_fragment_total_ip_header_length; + return 0; + } + +fail: + clib_spinlock_unlock (&per_thread->lock); + return -1; +} + +void +ip4_sv_reass_extended_unlock (vlib_buffer_t *b) +{ + ip4_sv_reass_per_thread_t *per_thread = + &ip4_sv_reass_main + .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index]; + clib_spinlock_unlock (&per_thread->lock); +} + #endif /* diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h index 3a684eb9809..a1e5659a9f1 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.h +++ b/src/vnet/ip/reass/ip4_sv_reass.h @@ -23,6 +23,7 @@ #ifndef __included_ip4_sv_reass_h__ #define __included_ip4_sv_reass_h__ +#include <stdbool.h> #include <vnet/api_errno.h> #include <vnet/vnet.h> @@ -48,6 +49,33 @@ int ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); +/* + * Enable or disable extended reassembly. + * + * Extended reassembly means that fragments are cached until both first and + * last fragments are seen. Furthermore, first fragment buffer will be cloned + * and stored in reassembly context for later retrieval. + */ +void ip4_sv_reass_enable_disable_extended (bool is_enable); + +struct ip4_sv_lock_unlock_args +{ + u32 *total_ip_payload_length; + u32 *first_fragment_buffer_index; + u32 *first_fragment_total_ip_header_length; +}; + +/* + * Lock thread-level lock and fetch information from reassembly context. + * Uses vnet_buffer2 data filled by extended reassembly. + * + * Returns 0 on success, -1 otherwise. + */ +int ip4_sv_reass_extended_lock (vlib_buffer_t *b, + struct ip4_sv_lock_unlock_args *a); + +void ip4_sv_reass_extended_unlock (vlib_buffer_t *b); + uword ip4_sv_reass_custom_register_next_node (uword node_index); uword ip4_sv_reass_custom_context_register_next_node (uword node_index); diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c index fe2ed05555c..69b27c5aa8e 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.c +++ b/src/vnet/ip/reass/ip6_sv_reass.c @@ -28,12 +28,13 @@ #include <vnet/ip/reass/ip6_sv_reass.h> #include <vnet/ip/ip6_inlines.h> -#define MSEC_PER_SEC 1000 +#define MSEC_PER_SEC 1000 #define IP6_SV_REASS_TIMEOUT_DEFAULT_MS 100 -#define IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default -#define IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024 +#define IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS \ + 10000 // 10 seconds default +#define IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024 #define IP6_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 -#define IP6_SV_REASS_HT_LOAD_FACTOR (0.75) +#define IP6_SV_REASS_HT_LOAD_FACTOR (0.75) typedef enum { @@ -94,17 +95,23 @@ typedef struct // buffer indexes of buffers in this reassembly in chronological order - // including overlaps and duplicate fragments u32 *cached_buffers; - // set to true when this reassembly is completed - bool is_complete; - // ip protocol + + bool first_fragment_seen; + bool last_fragment_seen; + + // vnet_buffer data u8 ip_proto; u8 icmp_type_or_tcp_flags; u32 tcp_ack_number; u32 tcp_seq_number; - // l4 src port u16 l4_src_port; - // l4 dst port u16 l4_dst_port; + + // vnet_buffer2 data + u32 total_ip_payload_length; + u32 first_fragment_total_ip_header_length; + u32 first_fragment_clone_bi; + // lru indexes u32 lru_prev; u32 lru_next; @@ -142,18 +149,21 @@ typedef struct vlib_main_t *vlib_main; vnet_main_t *vnet_main; - // node index of ip6-drop node - u32 ip6_drop_idx; - u32 ip6_icmp_error_idx; u32 ip6_sv_reass_expire_node_idx; /** Worker handoff */ u32 fq_index; u32 fq_feature_index; + u32 fq_output_feature_index; u32 fq_custom_context_index; // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; + // reference count for enabling/disabling output feature - per interface + u32 *output_feature_use_refcount_per_intf; + + // extended reassembly refcount - see ip6_sv_reass_enable_disable_extended() + u32 extended_refcount; } ip6_sv_reass_main_t; extern ip6_sv_reass_main_t ip6_sv_reass_main; @@ -174,7 +184,8 @@ typedef enum typedef enum { REASS_FRAGMENT_CACHE, - REASS_FINISH, + REASS_FIRST_FRAG, + REASS_LAST_FRAG, REASS_FRAGMENT_FORWARD, REASS_PASSTHROUGH, } ip6_sv_reass_trace_operation_e; @@ -190,7 +201,7 @@ typedef struct } ip6_sv_reass_trace_t; static u8 * -format_ip6_sv_reass_trace (u8 * s, va_list * args) +format_ip6_sv_reass_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); @@ -204,17 +215,19 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args) case REASS_FRAGMENT_CACHE: s = format (s, "[cached]"); break; - case REASS_FINISH: + case REASS_FIRST_FRAG: s = - format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]", + format (s, "[first-frag-seen, ip proto=%u, src_port=%u, dst_port=%u]", t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), clib_net_to_host_u16 (t->l4_dst_port)); break; + case REASS_LAST_FRAG: + s = format (s, "[last-frag-seen]"); + break; case REASS_FRAGMENT_FORWARD: - s = - format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]", - t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), - clib_net_to_host_u16 (t->l4_dst_port)); + s = format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]", + t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), + clib_net_to_host_u16 (t->l4_dst_port)); break; case REASS_PASSTHROUGH: s = format (s, "[not fragmented or atomic fragment]"); @@ -224,14 +237,14 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args) } static void -ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, - ip6_sv_reass_t * reass, u32 bi, - ip6_sv_reass_trace_operation_e action, - u32 ip_proto, u16 l4_src_port, u16 l4_dst_port) +ip6_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + ip6_sv_reass_t *reass, u32 bi, + ip6_sv_reass_trace_operation_e action, u32 ip_proto, + u16 l4_src_port, u16 l4_dst_port) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (pool_is_free_index - (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b))) + if (pool_is_free_index (vm->trace_main.trace_buffer_pool, + vlib_buffer_get_trace_index (b))) { // this buffer's trace is gone b->flags &= ~VLIB_BUFFER_IS_TRACED; @@ -258,31 +271,35 @@ ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, } always_inline void -ip6_sv_reass_free (vlib_main_t * vm, ip6_sv_reass_main_t * rm, - ip6_sv_reass_per_thread_t * rt, ip6_sv_reass_t * reass) +ip6_sv_reass_free (vlib_main_t *vm, ip6_sv_reass_main_t *rm, + ip6_sv_reass_per_thread_t *rt, ip6_sv_reass_t *reass, + bool del_bihash) { - clib_bihash_kv_48_8_t kv; - kv.key[0] = reass->key.as_u64[0]; - kv.key[1] = reass->key.as_u64[1]; - kv.key[2] = reass->key.as_u64[2]; - kv.key[3] = reass->key.as_u64[3]; - kv.key[4] = reass->key.as_u64[4]; - kv.key[5] = reass->key.as_u64[5]; - clib_bihash_add_del_48_8 (&rm->hash, &kv, 0); + if (del_bihash) + { + clib_bihash_kv_48_8_t kv; + kv.key[0] = reass->key.as_u64[0]; + kv.key[1] = reass->key.as_u64[1]; + kv.key[2] = reass->key.as_u64[2]; + kv.key[3] = reass->key.as_u64[3]; + kv.key[4] = reass->key.as_u64[4]; + kv.key[5] = reass->key.as_u64[5]; + clib_bihash_add_del_48_8 (&rm->hash, &kv, 0); + } vlib_buffer_free (vm, reass->cached_buffers, vec_len (reass->cached_buffers)); vec_free (reass->cached_buffers); reass->cached_buffers = NULL; + if (~0 != reass->first_fragment_clone_bi) + vlib_buffer_free_one (vm, reass->first_fragment_clone_bi); if (~0 != reass->lru_prev) { - ip6_sv_reass_t *lru_prev = - pool_elt_at_index (rt->pool, reass->lru_prev); + ip6_sv_reass_t *lru_prev = pool_elt_at_index (rt->pool, reass->lru_prev); lru_prev->lru_next = reass->lru_next; } if (~0 != reass->lru_next) { - ip6_sv_reass_t *lru_next = - pool_elt_at_index (rt->pool, reass->lru_next); + ip6_sv_reass_t *lru_next = pool_elt_at_index (rt->pool, reass->lru_next); lru_next->lru_prev = reass->lru_prev; } if (rt->lru_first == reass - rt->pool) @@ -297,13 +314,6 @@ ip6_sv_reass_free (vlib_main_t * vm, ip6_sv_reass_main_t * rm, --rt->reass_n; } -always_inline void -ip6_sv_reass_init (ip6_sv_reass_t * reass) -{ - reass->cached_buffers = NULL; - reass->is_complete = false; -} - always_inline ip6_sv_reass_t * ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm, ip6_sv_reass_per_thread_t *rt, @@ -325,7 +335,7 @@ again: if (now > reass->last_heard + rm->timeout) { - ip6_sv_reass_free (vm, rm, rt, reass); + ip6_sv_reass_free (vm, rm, rt, reass, true); reass = NULL; } } @@ -336,19 +346,17 @@ again: return reass; } - if (rt->reass_n >= rm->max_reass_n) + if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n) { reass = pool_elt_at_index (rt->pool, rt->lru_first); - ip6_sv_reass_free (vm, rm, rt, reass); + ip6_sv_reass_free (vm, rm, rt, reass, true); } - pool_get (rt->pool, reass); - clib_memset (reass, 0, sizeof (*reass)); + pool_get_zero (rt->pool, reass); + reass->first_fragment_clone_bi = ~0; reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter; ++rt->id_counter; - ip6_sv_reass_init (reass); ++rt->reass_n; - reass->lru_prev = reass->lru_next = ~0; if (~0 != rt->lru_last) @@ -376,7 +384,7 @@ again: int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2); if (rv) { - ip6_sv_reass_free (vm, rm, rt, reass); + ip6_sv_reass_free (vm, rm, rt, reass, false); reass = NULL; // if other worker created a context already work with the other copy if (-2 == rv) @@ -386,10 +394,23 @@ again: return reass; } +always_inline bool +ip6_sv_reass_is_complete (ip6_sv_reass_t *reass, bool extended) +{ + /* + * Both first and last fragments have to be seen for extended reassembly to + * be complete. Otherwise first fragment is enough. + */ + if (extended) + return reass->first_fragment_seen && reass->last_fragment_seen; + + return reass->first_fragment_seen; +} + always_inline ip6_sv_reass_rc_t ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, ip6_sv_reass_main_t *rm, ip6_sv_reass_t *reass, u32 bi0, - ip6_frag_hdr_t *frag_hdr) + ip6_frag_hdr_t *frag_hdr, bool extended) { vlib_buffer_t *fb = vlib_get_buffer (vm, bi0); vnet_buffer_opaque_t *fvnb = vnet_buffer (fb); @@ -417,26 +438,51 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, fvnb->ip.reass.range_first = fragment_first; fvnb->ip.reass.range_last = fragment_last; fvnb->ip.reass.next_range_bi = ~0; + void *l4_hdr = NULL; if (0 == fragment_first) { - if (!ip6_get_port - (vm, fb, fip, fb->current_length, &reass->ip_proto, - &reass->l4_src_port, &reass->l4_dst_port, - &reass->icmp_type_or_tcp_flags, &reass->tcp_ack_number, - &reass->tcp_seq_number)) + if (!ip6_get_port (vm, fb, fip, fb->current_length, &reass->ip_proto, + &reass->l4_src_port, &reass->l4_dst_port, + &reass->icmp_type_or_tcp_flags, + &reass->tcp_ack_number, &reass->tcp_seq_number, + &l4_hdr)) return IP6_SV_REASS_RC_UNSUPP_IP_PROTO; - reass->is_complete = true; + reass->first_fragment_seen = true; + if (extended) + { + reass->first_fragment_total_ip_header_length = + (u8 *) l4_hdr - (u8 *) fip; + vlib_buffer_t *clone = vlib_buffer_copy_no_chain ( + vm, fb, &reass->first_fragment_clone_bi); + if (!clone) + reass->first_fragment_clone_bi = ~0; + } + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FINISH, + ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FIRST_FRAG, reass->ip_proto, reass->l4_src_port, reass->l4_dst_port); } } + + if (!ip6_frag_hdr_more (frag_hdr)) + { + reass->last_fragment_seen = true; + reass->total_ip_payload_length = fragment_last - 1; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_LAST_FRAG, ~0, + ~0, ~0); + } + } + vec_add1 (reass->cached_buffers, bi0); - if (!reass->is_complete) + + if (!ip6_sv_reass_is_complete (reass, extended)) { if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) { @@ -453,8 +499,7 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, } always_inline bool -ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node, - vlib_buffer_t *b, +ip6_sv_reass_verify_upper_layer_present (vlib_buffer_t *b, ip6_ext_hdr_chain_t *hc) { int nh = hc->eh[hc->length - 1].protocol; @@ -464,16 +509,14 @@ ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node, icmp6_error_set_vnet_buffer ( b, ICMP6_parameter_problem, ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0); - b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER]; return false; } return true; } always_inline bool -ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t * vm, - vlib_buffer_t * b, - ip6_frag_hdr_t * frag_hdr) +ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t *vm, vlib_buffer_t *b, + ip6_frag_hdr_t *frag_hdr) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); ip6_header_t *ip = vlib_buffer_get_current (b); @@ -483,18 +526,18 @@ ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t * vm, (vnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr)); if (more_fragments && 0 != fragment_length % 8) { - icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem, - ICMP6_parameter_problem_erroneous_header_field, - (u8 *) & ip->payload_length - (u8 *) ip); + icmp6_error_set_vnet_buffer ( + b, ICMP6_parameter_problem, + ICMP6_parameter_problem_erroneous_header_field, + (u8 *) &ip->payload_length - (u8 *) ip); return false; } return true; } always_inline bool -ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm, - vlib_buffer_t * b, - ip6_frag_hdr_t * frag_hdr) +ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t *vm, vlib_buffer_t *b, + ip6_frag_hdr_t *frag_hdr) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr); @@ -504,26 +547,52 @@ ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm, if (fragment_first + fragment_length > 65535) { ip6_header_t *ip0 = vlib_buffer_get_current (b); - icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem, - ICMP6_parameter_problem_erroneous_header_field, - (u8 *) & frag_hdr->fragment_offset_and_more - - (u8 *) ip0); + icmp6_error_set_vnet_buffer ( + b, ICMP6_parameter_problem, + ICMP6_parameter_problem_erroneous_header_field, + (u8 *) &frag_hdr->fragment_offset_and_more - (u8 *) ip0); return false; } return true; } +always_inline void +ip6_sv_reass_reset_vnet_buffer2 (vlib_buffer_t *b) +{ + vnet_buffer2 (b)->ip.reass.pool_index = ~0; + vnet_buffer2 (b)->ip.reass.thread_index = ~0; + vnet_buffer2 (b)->ip.reass.id = ~0; +} + +always_inline void +ip6_sv_reass_set_vnet_buffer2_from_reass (vlib_main_t *vm, vlib_buffer_t *b, + ip6_sv_reass_t *reass) +{ + vnet_buffer2 (b)->ip.reass.thread_index = vm->thread_index; + vnet_buffer2 (b)->ip.reass.id = reass->id; + vnet_buffer2 (b)->ip.reass.pool_index = + reass - ip6_sv_reass_main.per_thread_data[vm->thread_index].pool; +} + +struct ip6_sv_reass_args +{ + bool is_feature; + bool is_output_feature; + bool custom_next; + bool custom_context; + bool extended; +}; + always_inline uword ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, bool is_feature, - bool custom_next, bool custom_context) + vlib_frame_t *frame, struct ip6_sv_reass_args a) { u32 *from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index; ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; u32 *context; - if (custom_context) + if (a.custom_context) context = vlib_frame_aux_args (frame); clib_spinlock_lock (&rt->lock); @@ -533,7 +602,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, while (n_left_from > 0) { - if (custom_context) + if (a.custom_context) vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, to_next_aux, n_left_to_next); else @@ -549,7 +618,11 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); - ip6_header_t *ip0 = vlib_buffer_get_current (b0); + ip6_header_t *ip0 = (ip6_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); + ip6_frag_hdr_t *frag_hdr; ip6_ext_hdr_chain_t hdr_chain; bool is_atomic_fragment = false; @@ -569,24 +642,29 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION || is_atomic_fragment) { - // this is a regular unfragmented packet or an atomic fragment - if (!ip6_get_port - (vm, b0, ip0, b0->current_length, - &(vnet_buffer (b0)->ip.reass.ip_proto), - &(vnet_buffer (b0)->ip.reass.l4_src_port), - &(vnet_buffer (b0)->ip.reass.l4_dst_port), - &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags), - &(vnet_buffer (b0)->ip.reass.tcp_ack_number), - &(vnet_buffer (b0)->ip.reass.tcp_seq_number))) + void *l4_hdr; + // this is a regular unfragmented packet or an atomic + // fragment + if (!ip6_get_port ( + vm, b0, ip0, b0->current_length, + &(vnet_buffer (b0)->ip.reass.ip_proto), + &(vnet_buffer (b0)->ip.reass.l4_src_port), + &(vnet_buffer (b0)->ip.reass.l4_dst_port), + &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags), + &(vnet_buffer (b0)->ip.reass.tcp_ack_number), + &(vnet_buffer (b0)->ip.reass.tcp_seq_number), &l4_hdr)) { error0 = IP6_ERROR_REASS_UNSUPP_IP_PROTO; b0->error = node->errors[error0]; next0 = IP6_SV_REASSEMBLY_NEXT_DROP; goto packet_enqueue; } + if (a.extended) + ip6_sv_reass_reset_vnet_buffer2 (b0); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; - next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index : - IP6_SV_REASSEMBLY_NEXT_INPUT; + next0 = a.custom_next ? vnet_buffer (b0)->ip.reass.next_index : + IP6_SV_REASSEMBLY_NEXT_INPUT; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip6_sv_reass_add_trace ( @@ -604,9 +682,10 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (0 == ip6_frag_hdr_offset (frag_hdr)) { // first fragment - verify upper-layer is present - if (!ip6_sv_reass_verify_upper_layer_present (node, b0, - &hdr_chain)) + if (!ip6_sv_reass_verify_upper_layer_present (b0, &hdr_chain)) { + error0 = IP6_ERROR_REASS_MISSING_UPPER; + b0->error = node->errors[error0]; next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR; goto packet_enqueue; } @@ -614,6 +693,8 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (!ip6_sv_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) || !ip6_sv_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr)) { + error0 = IP6_ERROR_REASS_INVALID_FRAG_LEN; + b0->error = node->errors[error0]; next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR; goto packet_enqueue; } @@ -625,7 +706,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, kv.k.as_u64[1] = ip0->src_address.as_u64[1]; kv.k.as_u64[2] = ip0->dst_address.as_u64[0]; kv.k.as_u64[3] = ip0->dst_address.as_u64[1]; - if (custom_context) + if (a.custom_context) kv.k.as_u64[4] = (u64) *context << 32 | (u64) frag_hdr->identification; else @@ -644,7 +725,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF; vnet_buffer (b0)->ip.reass.owner_thread_index = kv.v.thread_index; - if (custom_context) + if (a.custom_context) forward_context = 1; goto packet_enqueue; } @@ -657,10 +738,11 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, goto packet_enqueue; } - if (reass->is_complete) + if (ip6_sv_reass_is_complete (reass, a.extended)) { + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; vnet_buffer (b0)->ip.reass.is_non_first_fragment = - ! !ip6_frag_hdr_offset (frag_hdr); + !!ip6_frag_hdr_offset (frag_hdr); vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = reass->icmp_type_or_tcp_flags; @@ -670,8 +752,12 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index : - IP6_SV_REASSEMBLY_NEXT_INPUT; + + if (a.extended) + ip6_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass); + + next0 = a.custom_next ? vnet_buffer (b0)->ip.reass.next_index : + IP6_SV_REASSEMBLY_NEXT_INPUT; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip6_sv_reass_add_trace ( @@ -682,7 +768,8 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } u32 counter = ~0; - switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr)) + switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr, + a.extended)) { case IP6_SV_REASS_RC_OK: /* nothing to do here */ @@ -703,55 +790,57 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (~0 != counter) { vlib_node_increment_counter (vm, node->node_index, counter, 1); - ip6_sv_reass_free (vm, rm, rt, reass); + ip6_sv_reass_free (vm, rm, rt, reass, true); goto next_packet; } - if (reass->is_complete) + if (ip6_sv_reass_is_complete (reass, a.extended)) { u32 idx; vec_foreach_index (idx, reass->cached_buffers) - { - u32 bi0 = vec_elt (reass->cached_buffers, idx); - if (0 == n_left_to_next) - { - vlib_put_next_frame (vm, node, next_index, - n_left_to_next); - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - } - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - b0 = vlib_get_buffer (vm, bi0); - if (is_feature) - { - vnet_feature_next (&next0, b0); - } - frag_hdr = - vlib_buffer_get_current (b0) + - vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset; - vnet_buffer (b0)->ip.reass.is_non_first_fragment = - ! !ip6_frag_hdr_offset (frag_hdr); - vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - reass->icmp_type_or_tcp_flags; - vnet_buffer (b0)->ip.reass.tcp_ack_number = - reass->tcp_ack_number; - vnet_buffer (b0)->ip.reass.tcp_seq_number = - reass->tcp_seq_number; - vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; - vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - ip6_sv_reass_add_trace ( - vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, - reass->ip_proto, reass->l4_src_port, reass->l4_dst_port); - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, - next0); - } + { + u32 bi0 = vec_elt (reass->cached_buffers, idx); + if (0 == n_left_to_next) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + if (a.is_feature || a.is_output_feature) + { + vnet_feature_next (&next0, b0); + } + frag_hdr = vlib_buffer_get_current (b0) + + vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + !!ip6_frag_hdr_offset (frag_hdr); + vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; + vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; + vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; + if (a.extended) + ip6_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip6_sv_reass_add_trace ( + vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, + reass->ip_proto, reass->l4_src_port, + reass->l4_dst_port); + } + vlib_validate_buffer_enqueue_x1 ( + vm, node, next_index, to_next, n_left_to_next, bi0, next0); + } vec_set_len (reass->cached_buffers, 0); // buffers are owned by frame now } @@ -761,12 +850,14 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, to_next[0] = bi0; to_next += 1; n_left_to_next -= 1; - if (is_feature && IP6_ERROR_NONE == error0) + if ((a.is_feature || a.is_output_feature) && + IP6_ERROR_NONE == error0 && + IP6_SV_REASSEMBLY_NEXT_HANDOFF != next0) { b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - if (custom_context && forward_context) + if (a.custom_context && forward_context) { if (to_next_aux) { @@ -783,7 +874,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, next_packet: from += 1; - if (custom_context) + if (a.custom_context) context += 1; n_left_from -= 1; } @@ -795,13 +886,20 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, return frame->n_vectors; } -VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip6_sv_reass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */, - false /* custom next */, - false /* custom context */); + /* + * Extended reassembly not supported for non-feature nodes. + */ + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .custom_context = false, + .custom_next = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip6_sv_reass_node) = { @@ -820,13 +918,26 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node) = { }, }; -VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip6_sv_reass_node_feature) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */, - false /* custom next */, - false /* custom context */); + if (ip6_sv_reass_main.extended_refcount > 0) + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = true, + .is_output_feature = false, + .custom_context = false, + .custom_next = false, + .extended = true, + }); + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = true, + .is_output_feature = false, + .custom_context = false, + .custom_next = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = { @@ -846,18 +957,70 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = { }; VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = { - .arc_name = "ip6-unicast", - .node_name = "ip6-sv-reassembly-feature", - .runs_before = VNET_FEATURES ("ip6-lookup"), - .runs_after = 0, + .arc_name = "ip6-unicast", + .node_name = "ip6-sv-reassembly-feature", + .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = 0, +}; + +VLIB_NODE_FN (ip6_sv_reass_node_output_feature) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + if (ip6_sv_reass_main.extended_refcount > 0) + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = false, + .is_output_feature = true, + .custom_context = false, + .custom_next = false, + .extended = true, + }); + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = false, + .is_output_feature = true, + .custom_context = false, + .custom_next = false, + .extended = false, + }); +} + +VLIB_REGISTER_NODE (ip6_sv_reass_node_output_feature) = { + .name = "ip6-sv-reassembly-output-feature", + .vector_size = sizeof (u32), + .format_trace = format_ip6_sv_reass_trace, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, + .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input", + [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop", + [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-output-feature-hoff", + }, +}; + +VNET_FEATURE_INIT (ip6_sv_reassembly_output_feature) = { + .arc_name = "ip6-output", + .node_name = "ip6-sv-reassembly-output-feature", + .runs_after = 0, }; VLIB_NODE_FN (ip6_sv_reass_custom_context_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */, - true /* custom next */, - true /* custom context */); + /* + * Extended reassembly not supported for non-feature nodes. + */ + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .custom_context = true, + .custom_next = true, + .extended = false, + }); } VLIB_REGISTER_NODE (ip6_sv_reass_custom_context_node) = { @@ -909,7 +1072,7 @@ typedef struct } ip6_rehash_cb_ctx; static int -ip6_rehash_cb (clib_bihash_kv_48_8_t * kv, void *_ctx) +ip6_rehash_cb (clib_bihash_kv_48_8_t *kv, void *_ctx) { ip6_rehash_cb_ctx *ctx = _ctx; if (clib_bihash_add_del_48_8 (ctx->new_hash, kv, 1)) @@ -936,8 +1099,8 @@ ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, u32 max_reassembly_length, u32 expire_walk_interval_ms) { u32 old_nbuckets = ip6_sv_reass_get_nbuckets (); - ip6_sv_reass_set_params (timeout_ms, max_reassemblies, - max_reassembly_length, expire_walk_interval_ms); + ip6_sv_reass_set_params (timeout_ms, max_reassemblies, max_reassembly_length, + expire_walk_interval_ms); vlib_process_signal_event (ip6_sv_reass_main.vlib_main, ip6_sv_reass_main.ip6_sv_reass_expire_node_idx, IP6_EVENT_CONFIG_CHANGED, 0); @@ -950,7 +1113,7 @@ ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, ctx.failure = 0; ctx.new_hash = &new_hash; clib_bihash_init_48_8 (&new_hash, "ip6-sv-reass", new_nbuckets, - new_nbuckets * 1024); + (uword) new_nbuckets * 1024); clib_bihash_foreach_key_value_pair_48_8 (&ip6_sv_reass_main.hash, ip6_rehash_cb, &ctx); if (ctx.failure) @@ -970,8 +1133,8 @@ ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, } vnet_api_error_t -ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, - u32 * max_reassembly_length, u32 * expire_walk_interval_ms) +ip6_sv_reass_get (u32 *timeout_ms, u32 *max_reassemblies, + u32 *max_reassembly_length, u32 *expire_walk_interval_ms) { *timeout_ms = ip6_sv_reass_main.timeout_ms; *max_reassemblies = ip6_sv_reass_main.max_reass_n; @@ -981,7 +1144,7 @@ ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, } static clib_error_t * -ip6_sv_reass_init_function (vlib_main_t * vm) +ip6_sv_reass_init_function (vlib_main_t *vm) { ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; clib_error_t *error = 0; @@ -994,11 +1157,11 @@ ip6_sv_reass_init_function (vlib_main_t * vm) vec_validate (rm->per_thread_data, vlib_num_workers ()); ip6_sv_reass_per_thread_t *rt; vec_foreach (rt, rm->per_thread_data) - { - clib_spinlock_init (&rt->lock); - pool_alloc (rt->pool, rm->max_reass_n); - rt->lru_first = rt->lru_last = ~0; - } + { + clib_spinlock_init (&rt->lock); + pool_alloc (rt->pool, rm->max_reass_n); + rt->lru_first = rt->lru_last = ~0; + } node = vlib_get_node_by_name (vm, (u8 *) "ip6-sv-reassembly-expire-walk"); ASSERT (node); @@ -1011,14 +1174,7 @@ ip6_sv_reass_init_function (vlib_main_t * vm) nbuckets = ip6_sv_reass_get_nbuckets (); clib_bihash_init_48_8 (&rm->hash, "ip6-sv-reass", nbuckets, - nbuckets * 1024); - - node = vlib_get_node_by_name (vm, (u8 *) "ip6-drop"); - ASSERT (node); - rm->ip6_drop_idx = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error"); - ASSERT (node); - rm->ip6_icmp_error_idx = node->index; + (uword) nbuckets * 1024); if ((error = vlib_call_init_function (vm, ip_main_init))) return error; @@ -1026,6 +1182,8 @@ ip6_sv_reass_init_function (vlib_main_t * vm) rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0); + rm->fq_output_feature_index = + vlib_frame_queue_main_init (ip6_sv_reass_node_output_feature.index, 0); rm->fq_custom_context_index = vlib_frame_queue_main_init (ip6_sv_reass_custom_context_node.index, 0); @@ -1047,9 +1205,8 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, while (true) { - vlib_process_wait_for_event_or_clock (vm, - (f64) rm->expire_walk_interval_ms - / (f64) MSEC_PER_SEC); + vlib_process_wait_for_event_or_clock ( + vm, (f64) rm->expire_walk_interval_ms / (f64) MSEC_PER_SEC); event_type = vlib_process_get_events (vm, &event_data); switch (event_type) @@ -1078,19 +1235,20 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - pool_foreach_index (index, rt->pool) { - reass = pool_elt_at_index (rt->pool, index); - if (now > reass->last_heard + rm->timeout) - { - vec_add1 (pool_indexes_to_free, index); - } - } + pool_foreach_index (index, rt->pool) + { + reass = pool_elt_at_index (rt->pool, index); + if (now > reass->last_heard + rm->timeout) + { + vec_add1 (pool_indexes_to_free, index); + } + } int *i; - vec_foreach (i, pool_indexes_to_free) - { - ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); - ip6_sv_reass_free (vm, rm, rt, reass); - } + vec_foreach (i, pool_indexes_to_free) + { + ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); + ip6_sv_reass_free (vm, rm, rt, reass, true); + } clib_spinlock_unlock (&rt->lock); } @@ -1116,7 +1274,7 @@ VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = { }; static u8 * -format_ip6_sv_reass_key (u8 * s, va_list * args) +format_ip6_sv_reass_key (u8 *s, va_list *args) { ip6_sv_reass_key_t *key = va_arg (*args, ip6_sv_reass_key_t *); s = @@ -1127,35 +1285,34 @@ format_ip6_sv_reass_key (u8 * s, va_list * args) } static u8 * -format_ip6_sv_reass (u8 * s, va_list * args) +format_ip6_sv_reass (u8 *s, va_list *args) { vlib_main_t *vm = va_arg (*args, vlib_main_t *); ip6_sv_reass_t *reass = va_arg (*args, ip6_sv_reass_t *); - s = format (s, "ID: %lu, key: %U, trace_op_counter: %u\n", - reass->id, format_ip6_sv_reass_key, &reass->key, - reass->trace_op_counter); + s = format (s, "ID: %lu, key: %U, trace_op_counter: %u\n", reass->id, + format_ip6_sv_reass_key, &reass->key, reass->trace_op_counter); vlib_buffer_t *b; u32 *bip; u32 counter = 0; vec_foreach (bip, reass->cached_buffers) - { - u32 bi = *bip; - do - { - b = vlib_get_buffer (vm, bi); - s = format (s, " #%03u: bi: %u\n", counter, bi); - ++counter; - bi = b->next_buffer; - } - while (b->flags & VLIB_BUFFER_NEXT_PRESENT); - } + { + u32 bi = *bip; + do + { + b = vlib_get_buffer (vm, bi); + s = format (s, " #%03u: bi: %u\n", counter, bi); + ++counter; + bi = b->next_buffer; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } return s; } static clib_error_t * -show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, - CLIB_UNUSED (vlib_cli_command_t * lmd)) +show_ip6_sv_reass (vlib_main_t *vm, unformat_input_t *input, + CLIB_UNUSED (vlib_cli_command_t *lmd)) { ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; @@ -1179,9 +1336,10 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, clib_spinlock_lock (&rt->lock); if (details) { - pool_foreach (reass, rt->pool) { - vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass); - } + pool_foreach (reass, rt->pool) + { + vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass); + } } sum_reass_n += rt->reass_n; clib_spinlock_unlock (&rt->lock); @@ -1190,90 +1348,93 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "Current IP6 reassemblies count: %lu\n", (long unsigned) sum_reass_n); vlib_cli_output (vm, - "Maximum configured concurrent shallow virtual IP6 reassemblies per worker-thread: %lu\n", + "Maximum configured concurrent shallow virtual IP6 " + "reassemblies per worker-thread: %lu\n", (long unsigned) rm->max_reass_n); vlib_cli_output (vm, "Maximum configured amount of fragments per shallow " "virtual IP6 reassembly: %lu\n", (long unsigned) rm->max_reass_len); + vlib_cli_output ( + vm, "Maximum configured shallow virtual IP6 reassembly timeout: %lums\n", + (long unsigned) rm->timeout_ms); vlib_cli_output (vm, - "Maximum configured shallow virtual IP6 reassembly timeout: %lums\n", - (long unsigned) rm->timeout_ms); - vlib_cli_output (vm, - "Maximum configured shallow virtual IP6 reassembly expire walk interval: %lums\n", + "Maximum configured shallow virtual IP6 reassembly expire " + "walk interval: %lums\n", (long unsigned) rm->expire_walk_interval_ms); - vlib_cli_output (vm, "Buffers in use: %lu\n", - (long unsigned) sum_buffers_n); + vlib_cli_output (vm, "Buffers in use: %lu\n", (long unsigned) sum_buffers_n); return 0; } VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = { - .path = "show ip6-sv-reassembly", - .short_help = "show ip6-sv-reassembly [details]", - .function = show_ip6_sv_reass, + .path = "show ip6-sv-reassembly", + .short_help = "show ip6-sv-reassembly [details]", + .function = show_ip6_sv_reass, }; #ifndef CLIB_MARCH_VARIANT vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable) { - return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, - enable_disable); + return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, enable_disable); } #endif /* CLIB_MARCH_VARIANT */ -#define foreach_ip6_sv_reassembly_handoff_error \ -_(CONGESTION_DROP, "congestion drop") - +#define foreach_ip6_sv_reassembly_handoff_error \ + _ (CONGESTION_DROP, "congestion drop") typedef enum { -#define _(sym,str) IP6_SV_REASSEMBLY_HANDOFF_ERROR_##sym, +#define _(sym, str) IP6_SV_REASSEMBLY_HANDOFF_ERROR_##sym, foreach_ip6_sv_reassembly_handoff_error #undef _ IP6_SV_REASSEMBLY_HANDOFF_N_ERROR, } ip6_sv_reassembly_handoff_error_t; static char *ip6_sv_reassembly_handoff_error_strings[] = { -#define _(sym,string) string, +#define _(sym, string) string, foreach_ip6_sv_reassembly_handoff_error #undef _ }; typedef struct { - u32 next_worker_index; + u32 thread_index; } ip6_sv_reassembly_handoff_trace_t; static u8 * -format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args) +format_ip6_sv_reassembly_handoff_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_sv_reassembly_handoff_trace_t *t = va_arg (*args, ip6_sv_reassembly_handoff_trace_t *); - s = - format (s, "ip6-sv-reassembly-handoff: next-worker %d", - t->next_worker_index); + s = format (s, "to thread-index: %u", t->thread_index); return s; } +struct ip6_sv_reass_hoff_args +{ + bool is_feature; + bool is_output_feature; + bool custom_context; +}; + always_inline uword ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, bool is_feature, - bool custom_context) + vlib_frame_t *frame, + struct ip6_sv_reass_hoff_args a) { ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u32 n_enq, n_left_from, *from, *context; u16 thread_indices[VLIB_FRAME_SIZE], *ti; - u32 fq_index; from = vlib_frame_vector_args (frame); - if (custom_context) + if (a.custom_context) context = vlib_frame_aux_args (frame); n_left_from = frame->n_vectors; vlib_get_buffers (vm, from, bufs, n_left_from); @@ -1281,28 +1442,28 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, b = bufs; ti = thread_indices; - fq_index = (is_feature) ? - rm->fq_feature_index : - (custom_context ? rm->fq_custom_context_index : rm->fq_index); + const u32 fq_index = a.is_output_feature ? rm->fq_output_feature_index : + a.is_feature ? rm->fq_feature_index : + a.custom_context ? rm->fq_custom_context_index : + rm->fq_index; while (n_left_from > 0) { ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index; - if (PREDICT_FALSE - ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b[0]->flags & VLIB_BUFFER_IS_TRACED))) + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b[0]->flags & VLIB_BUFFER_IS_TRACED))) { ip6_sv_reassembly_handoff_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->next_worker_index = ti[0]; + t->thread_index = ti[0]; } n_left_from -= 1; ti += 1; b += 1; } - if (custom_context) + if (a.custom_context) n_enq = vlib_buffer_enqueue_to_thread_with_aux ( vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1); else @@ -1310,18 +1471,20 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vm, node, fq_index, from, thread_indices, frame->n_vectors, 1); if (n_enq < frame->n_vectors) - vlib_node_increment_counter (vm, node->node_index, - IP6_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP, - frame->n_vectors - n_enq); + vlib_node_increment_counter ( + vm, node->node_index, IP6_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); return frame->n_vectors; } -VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip6_sv_reassembly_handoff_inline ( - vm, node, frame, false /* is_feature */, false /* custom_context */); + vm, node, frame, + (struct ip6_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = false, + .custom_context = false }); } VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = { @@ -1338,15 +1501,16 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = { }, }; - -VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip6_sv_reassembly_handoff_inline ( - vm, node, frame, true /* is_feature */, false /* custom_context */); + vm, node, frame, + (struct ip6_sv_reass_hoff_args){ .is_feature = true, + .is_output_feature = false, + .custom_context = false }); } - VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = { .name = "ip6-sv-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1361,11 +1525,38 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = { }, }; +VLIB_NODE_FN (ip6_sv_reassembly_output_feature_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_sv_reassembly_handoff_inline ( + vm, node, frame, + (struct ip6_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = true, + .custom_context = false }); +} + +VLIB_REGISTER_NODE (ip6_sv_reassembly_output_feature_handoff_node) = { + .name = "ip6-sv-reass-output-feature-hoff", + .vector_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings), + .error_strings = ip6_sv_reassembly_handoff_error_strings, + .format_trace = format_ip6_sv_reassembly_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + VLIB_NODE_FN (ip6_sv_reassembly_custom_context_handoff_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip6_sv_reassembly_handoff_inline ( - vm, node, frame, false /* is_feature */, true /* custom_context */); + vm, node, frame, + (struct ip6_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = false, + .custom_context = true }); } VLIB_REGISTER_NODE (ip6_sv_reassembly_custom_context_handoff_node) = { @@ -1393,10 +1584,10 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) { if (!rm->feature_use_refcount_per_intf[sw_if_index]) { - ++rm->feature_use_refcount_per_intf[sw_if_index]; - return vnet_feature_enable_disable ("ip6-unicast", - "ip6-sv-reassembly-feature", - sw_if_index, 1, 0, 0); + int rv = vnet_feature_enable_disable ( + "ip6-unicast", "ip6-sv-reassembly-feature", sw_if_index, 1, 0, 0); + if (0 != rv) + return rv; } ++rm->feature_use_refcount_per_intf[sw_if_index]; } @@ -1404,8 +1595,35 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) { --rm->feature_use_refcount_per_intf[sw_if_index]; if (!rm->feature_use_refcount_per_intf[sw_if_index]) - return vnet_feature_enable_disable ("ip6-unicast", - "ip6-sv-reassembly-feature", + return vnet_feature_enable_disable ( + "ip6-unicast", "ip6-sv-reassembly-feature", sw_if_index, 0, 0, 0); + } + return 0; +} + +vnet_api_error_t +ip6_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) +{ + ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; + vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index); + if (is_enable) + { + if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) + { + int rv = vnet_feature_enable_disable ( + "ip6-output", "ip6-sv-reassembly-output-feature", sw_if_index, 1, + 0, 0); + if (0 != rv) + return rv; + } + ++rm->output_feature_use_refcount_per_intf[sw_if_index]; + } + else + { + --rm->output_feature_use_refcount_per_intf[sw_if_index]; + if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) + return vnet_feature_enable_disable ("ip6-output", + "ip6-sv-reassembly-output-feature", sw_if_index, 0, 0, 0); } return 0; @@ -1418,6 +1636,57 @@ ip6_sv_reass_custom_context_register_next_node (uword node_index) vlib_get_main (), ip6_sv_reassembly_custom_context_handoff_node.index, node_index); } + +void +ip6_sv_reass_enable_disable_extended (bool is_enable) +{ + if (is_enable) + ++ip6_sv_reass_main.extended_refcount; + else + --ip6_sv_reass_main.extended_refcount; +} + +int +ip6_sv_reass_extended_lock (vlib_buffer_t *b, + struct ip6_sv_lock_unlock_args *a) +{ + ip6_sv_reass_per_thread_t *per_thread = + &ip6_sv_reass_main + .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index]; + + if (!vec_is_member (ip6_sv_reass_main.per_thread_data, per_thread)) + return -1; + + clib_spinlock_lock (&per_thread->lock); + if (pool_is_free_index (per_thread->pool, + vnet_buffer2 (b)->ip.reass.pool_index)) + goto fail; + + ip6_sv_reass_t *reass = pool_elt_at_index ( + per_thread->pool, vnet_buffer2 (b)->ip.reass.pool_index); + if (vnet_buffer2 (b)->ip.reass.id == reass->id) + { + *a->total_ip_payload_length = reass->total_ip_payload_length; + + *a->first_fragment_buffer_index = reass->first_fragment_clone_bi; + *a->first_fragment_total_ip_header_length = + reass->first_fragment_total_ip_header_length; + return 0; + } + +fail: + clib_spinlock_unlock (&per_thread->lock); + return -1; +} + +void +ip6_sv_reass_extended_unlock (vlib_buffer_t *b) +{ + ip6_sv_reass_per_thread_t *per_thread = + &ip6_sv_reass_main + .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index]; + clib_spinlock_unlock (&per_thread->lock); +} #endif /* diff --git a/src/vnet/ip/reass/ip6_sv_reass.h b/src/vnet/ip/reass/ip6_sv_reass.h index 7dc9df132dd..9220581ffd3 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.h +++ b/src/vnet/ip/reass/ip6_sv_reass.h @@ -23,6 +23,7 @@ #ifndef __included_ip6_sv_reass_h__ #define __included_ip6_sv_reass_h__ +#include <stdbool.h> #include <vnet/api_errno.h> #include <vnet/vnet.h> @@ -42,6 +43,36 @@ vnet_api_error_t ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable); +vnet_api_error_t +ip6_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, + int is_enable); + +/* + * Enable or disable extended reassembly. + * + * Extended reassembly means that fragments are cached until both first and + * last fragments are seen. Furthermore, first fragment buffer will be cloned + * and stored in reassembly context for later retrieval. + */ +void ip6_sv_reass_enable_disable_extended (bool is_enable); + +struct ip6_sv_lock_unlock_args +{ + u32 *total_ip_payload_length; + u32 *first_fragment_buffer_index; + u32 *first_fragment_total_ip_header_length; +}; + +/* + * Lock thread-level lock and fetch information from reassembly context. + * Uses vnet_buffer2 data filled by extended reassembly. + * + * Returns 0 on success, -1 otherwise. + */ +int ip6_sv_reass_extended_lock (vlib_buffer_t *b, + struct ip6_sv_lock_unlock_args *a); + +void ip6_sv_reass_extended_unlock (vlib_buffer_t *b); int ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); uword ip6_sv_reass_custom_context_register_next_node (uword node_index); diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c index 48f7deadda3..6a25f6c583c 100644 --- a/src/vnet/ipsec/ipsec_input.c +++ b/src/vnet/ipsec/ipsec_input.c @@ -428,11 +428,12 @@ ipsec_ah_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, always_inline void ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, - esp_header_t *esp0, u32 thread_index, - ipsec_spd_t *spd0, vlib_buffer_t **b, - vlib_node_runtime_t *node, u64 *ipsec_bypassed, - u64 *ipsec_dropped, u64 *ipsec_matched, - u64 *ipsec_unprocessed, u16 *next) + udp_header_t *udp0, esp_header_t *esp0, + u32 thread_index, ipsec_spd_t *spd0, + vlib_buffer_t **b, vlib_node_runtime_t *node, + u64 *ipsec_bypassed, u64 *ipsec_dropped, + u64 *ipsec_matched, u64 *ipsec_unprocessed, + u16 *next) { ipsec_policy_t *p0 = NULL; @@ -445,17 +446,40 @@ ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, /* if flow cache is enabled, first search through flow cache for a * policy match for either protect, bypass or discard rules, in that - * order. if no match is found search_flow_cache is set to false (1) + * order. if no match is found search_flow_cache is set to false (0) * and we revert back to linear search */ - search_flow_cache = im->input_flow_cache_flag; + udp_or_esp: - if (esp0->spi == 0) + /* RFC5996 Section 2.23: "To tunnel IKE packets over UDP port 4500, the IKE + * header has four octets of zero prepended and the result immediately + * follows the UDP header. To tunnel ESP packets over UDP port 4500, the ESP + * header immediately follows the UDP header. Since the first four octets of + * the ESP header contain the SPI, and the SPI cannot validly be zero, it is + * always possible to distinguish ESP and IKE messages." + */ + + /* RFC3948 Section 2.1 UDP-Encapsulated ESP Header Format: + * "The UDP header is a standard [RFC0768] header, where + * - the Source Port and Destination Port MUST be the same as that used + * by IKE traffic, + * - the IPv4 UDP Checksum SHOULD be transmitted as a zero value, and + * - receivers MUST NOT depend on the UDP checksum being a zero value. + * The SPI field in the ESP header MUST NOT be a zero value." + */ + + /* + * UDP-IKEv2: UDP protocol, checksum != 0, SPI == 0 and port 500/4500 + * UDP-ESP: UDP protocol, checksum == 0, SPI != 0 and port 4500 + */ + if ((((udp0 != NULL) && (udp0->checksum == 0)) || (udp0 == NULL)) && + (esp0->spi == 0)) { - /* RFC 4303, section 2.1: The SPI value of zero (0 is reserved for - * local, implementation-specific use and MUST NOT be sent on the wire. + /* RFC4303 Section 2.1: "The SPI value of zero (0 is reserved for + * local, implementation-specific use and MUST NOT be sent on the + * wire." */ *ipsec_unprocessed += 1; next[0] = IPSEC_INPUT_NEXT_DROP; @@ -703,27 +727,30 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, udp_header_t *udp0 = NULL; udp0 = (udp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - /* RFC5996 Section 2.23 "Port 4500 is reserved for + /* RFC5996 Section 2.23: "Port 4500 is reserved for * UDP-encapsulated ESP and IKE." + * RFC5996 Section 3.1: "IKE messages use UDP ports 500 and/or 4500" */ - if (clib_host_to_net_u16 (4500) == udp0->dst_port) - { - esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t)); - - ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, - b, node, &ipsec_bypassed, - &ipsec_dropped, &ipsec_matched, - &ipsec_unprocessed, next); - if (ipsec_bypassed > 0) - goto ipsec_bypassed; - } + if ((clib_host_to_net_u16 (500) == udp0->dst_port) || + (clib_host_to_net_u16 (4500) == udp0->dst_port)) + { + esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t)); + + ipsec_esp_packet_process (vm, im, ip0, udp0, esp0, thread_index, + spd0, b, node, &ipsec_bypassed, + &ipsec_dropped, &ipsec_matched, + &ipsec_unprocessed, next); + if (ipsec_bypassed > 0) + goto ipsec_bypassed; + } } else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)) { esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, b, - node, &ipsec_bypassed, &ipsec_dropped, - &ipsec_matched, &ipsec_unprocessed, next); + ipsec_esp_packet_process (vm, im, ip0, NULL, esp0, thread_index, + spd0, b, node, &ipsec_bypassed, + &ipsec_dropped, &ipsec_matched, + &ipsec_unprocessed, next); if (ipsec_bypassed > 0) goto ipsec_bypassed; } diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c index 3f2de2604b2..6cd9cbd3be5 100644 --- a/src/vnet/pg/cli.c +++ b/src/vnet/pg/cli.c @@ -672,7 +672,7 @@ create_pg_if_cmd_fn (vlib_main_t * vm, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "interface pg%u", &if_id)) + if (unformat (line_input, "pg%u", &if_id)) ; else if (unformat (line_input, "coalesce-enabled")) coalesce_enabled = 1; @@ -709,13 +709,60 @@ done: } VLIB_CLI_COMMAND (create_pg_if_cmd, static) = { - .path = "create packet-generator", + .path = "create packet-generator interface", .short_help = "create packet-generator interface <interface name>" " [gso-enabled gso-size <size> [coalesce-enabled]]" " [mode <ethernet | ip4 | ip6>]", .function = create_pg_if_cmd_fn, }; +static clib_error_t * +delete_pg_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index = ~0; + int rv = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing <interface>"); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else + { + return clib_error_create ("unknown input `%U'", + format_unformat_error, input); + } + } + unformat_free (line_input); + + if (sw_if_index == ~0) + return clib_error_return (0, + "please specify interface name or sw_if_index"); + + rv = pg_interface_delete (sw_if_index); + if (rv == VNET_API_ERROR_INVALID_SW_IF_INDEX) + return clib_error_return (0, "not a pg interface"); + else if (rv != 0) + return clib_error_return (0, "error on deleting pg interface"); + + return 0; +} + +VLIB_CLI_COMMAND (delete_pg_if_cmd, static) = { + .path = "delete packet-generator interface", + .short_help = "delete packet-generator interface {<interface name> | " + "sw_if_index <sw_idx>}", + .function = delete_pg_if_cmd_fn, +}; + /* Dummy init function so that we can be linked in. */ static clib_error_t * pg_cli_init (vlib_main_t * vm) diff --git a/src/vnet/pg/pg.api b/src/vnet/pg/pg.api index 4f531fb1f5e..7c6fdcc97cf 100644 --- a/src/vnet/pg/pg.api +++ b/src/vnet/pg/pg.api @@ -18,7 +18,7 @@ This file defines packet-generator interface APIs. */ -option version = "2.0.0"; +option version = "2.1.0"; import "vnet/interface_types.api"; @@ -75,6 +75,18 @@ define pg_create_interface_v2_reply vl_api_interface_index_t sw_if_index; }; +/** \brief PacketGenerator delete interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index +*/ +autoreply define pg_delete_interface +{ + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; +}; + /** \brief PacketGenerator interface enable/disable packet coalesce @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/pg/pg.h b/src/vnet/pg/pg.h index bede747428c..5e99d9af9f6 100644 --- a/src/vnet/pg/pg.h +++ b/src/vnet/pg/pg.h @@ -184,7 +184,11 @@ typedef struct pg_stream_t always_inline void pg_buffer_index_free (pg_buffer_index_t * bi) { + vlib_main_t *vm = vlib_get_main (); + word n_alloc; vec_free (bi->edits); + n_alloc = clib_fifo_elts (bi->buffer_fifo); + vlib_buffer_free (vm, bi->buffer_fifo, n_alloc); clib_fifo_free (bi->buffer_fifo); } @@ -396,6 +400,8 @@ u32 pg_interface_add_or_get (pg_main_t *pg, u32 stream_index, u8 gso_enabled, u32 gso_size, u8 coalesce_enabled, pg_interface_mode_t mode); +int pg_interface_delete (u32 sw_if_index); + always_inline pg_node_t * pg_get_node (uword node_index) { diff --git a/src/vnet/pg/pg_api.c b/src/vnet/pg/pg_api.c index e5d0a08a527..57fb40cdda4 100644 --- a/src/vnet/pg/pg_api.c +++ b/src/vnet/pg/pg_api.c @@ -63,18 +63,36 @@ vl_api_pg_create_interface_v2_t_handler (vl_api_pg_create_interface_v2_t *mp) } static void +vl_api_pg_delete_interface_t_handler (vl_api_pg_delete_interface_t *mp) +{ + vl_api_pg_delete_interface_reply_t *rmp; + pg_main_t *pg = &pg_main; + u32 sw_if_index = ~0; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + sw_if_index = ntohl (mp->sw_if_index); + + rv = pg_interface_delete (sw_if_index); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_PG_DELETE_INTERFACE_REPLY); +} + +static void vl_api_pg_interface_enable_disable_coalesce_t_handler (vl_api_pg_interface_enable_disable_coalesce_t * mp) { vl_api_pg_interface_enable_disable_coalesce_reply_t *rmp; + pg_main_t *pg = &pg_main; + vnet_main_t *vnm = vnet_get_main (); int rv = 0; VALIDATE_SW_IF_INDEX (mp); u32 sw_if_index = ntohl (mp->sw_if_index); - pg_main_t *pg = &pg_main; - vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index); diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c index cf3d37d5e9e..440e285031a 100644 --- a/src/vnet/pg/stream.c +++ b/src/vnet/pg/stream.c @@ -325,6 +325,50 @@ pg_interface_add_or_get (pg_main_t *pg, u32 if_id, u8 gso_enabled, return i; } +int +pg_interface_delete (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + pg_main_t *pm = &pg_main; + pg_interface_t *pi; + vnet_hw_interface_t *hw; + uword *p; + + hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index); + if (hw == NULL || pg_dev_class.index != hw->dev_class_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + pi = pool_elt_at_index (pm->interfaces, hw->dev_instance); + + vnet_hw_interface_set_flags (vnm, pi->hw_if_index, 0); + vnet_sw_interface_set_flags (vnm, pi->sw_if_index, 0); + + if (pi->mode == PG_MODE_ETHERNET) + ethernet_delete_interface (vnm, pi->hw_if_index); + else + vnet_delete_hw_interface (vnm, pi->hw_if_index); + + pi->hw_if_index = ~0; + + if (pi->coalesce_enabled) + pg_interface_enable_disable_coalesce (pi, 0, ~0); + + if (vlib_num_workers ()) + { + clib_mem_free ((void *) pi->lockp); + pi->lockp = 0; + } + + vec_del1 (pm->if_index_by_sw_if_index, sw_if_index); + p = hash_get (pm->if_index_by_if_id, pi->id); + if (p) + hash_unset (pm->if_index_by_if_id, pi->id); + + clib_memset (pi, 0, sizeof (*pi)); + pool_put (pm->interfaces, pi); + return 0; +} + static void do_edit (pg_stream_t * stream, pg_edit_group_t * g, pg_edit_t * e, uword want_commit) @@ -571,18 +615,12 @@ void pg_stream_del (pg_main_t * pg, uword index) { pg_stream_t *s; - pg_buffer_index_t *bi; s = pool_elt_at_index (pg->streams, index); pg_stream_enable_disable (pg, s, /* want_enabled */ 0); hash_unset_mem (pg->stream_index_by_name, s->name); - vec_foreach (bi, s->buffer_indices) - { - clib_fifo_free (bi->buffer_fifo); - } - pg_stream_free (s); pool_put (pg->streams, s); } diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index 7c63ada2774..5c52adb853f 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -175,8 +175,8 @@ app_listener_alloc_and_init (application_t * app, { session_type_t local_st; - local_st = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, - sep->is_ip4); + local_st = + session_type_from_proto_and_ip (TRANSPORT_PROTO_CT, sep->is_ip4); ls = listen_session_alloc (0, local_st); ls->app_wrk_index = sep->app_wrk_index; lh = session_handle (ls); @@ -1430,7 +1430,7 @@ vnet_connect (vnet_connect_args_t *a) session_error_t rv; a->sep_ext.original_tp = a->sep_ext.transport_proto; - a->sep_ext.transport_proto = TRANSPORT_PROTO_NONE; + a->sep_ext.transport_proto = TRANSPORT_PROTO_CT; rv = app_worker_connect_session (client_wrk, &a->sep_ext, &a->sh); a->sep_ext.transport_proto = a->sep_ext.original_tp; if (!rv || rv != SESSION_E_LOCAL_CONNECT) diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index f175e4a58c6..d5656ff8341 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -396,6 +396,7 @@ typedef struct session_accepted_msg_ transport_endpoint_t lcl; transport_endpoint_t rmt; u8 flags; + /* TODO(fcoras) maybe refactor to pass as transport attr */ u32 original_dst_ip4; u16 original_dst_port; } __clib_packed session_accepted_msg_t; @@ -909,17 +910,63 @@ typedef struct app_sapi_msg_ } __clib_packed app_sapi_msg_t; static inline void -session_endpoint_alloc_ext_cfg (session_endpoint_cfg_t *sep_ext, - transport_endpt_ext_cfg_type_t type) +session_endpoint_init_ext_cfgs (session_endpoint_cfg_t *sep_ext, u32 len) { - transport_endpt_ext_cfg_t *cfg; - u32 cfg_size; + sep_ext->ext_cfgs.len = len; + sep_ext->ext_cfgs.data = clib_mem_alloc (len); + clib_memset (sep_ext->ext_cfgs.data, 0, len); +} + +static inline transport_endpt_ext_cfg_t * +session_endpoint_add_ext_cfg (session_endpoint_cfg_t *sep_ext, + transport_endpt_ext_cfg_type_t type, u16 len) +{ + transport_endpt_ext_cfg_t *ext_cfg; + + if (!sep_ext->ext_cfgs.len) + session_endpoint_init_ext_cfgs (sep_ext, + TRANSPORT_ENDPT_EXT_CFGS_CHUNK_SIZE); + + ASSERT (sep_ext->ext_cfgs.tail_offset + len + + TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE < + sep_ext->ext_cfgs.len); + ext_cfg = (transport_endpt_ext_cfg_t *) (sep_ext->ext_cfgs.data + + sep_ext->ext_cfgs.tail_offset); + ext_cfg->len = len; + ext_cfg->type = type; + sep_ext->ext_cfgs.tail_offset += len + TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE; + return ext_cfg; +} + +static inline transport_endpt_ext_cfg_t * +session_endpoint_get_ext_cfg (session_endpoint_cfg_t *sep_ext, + transport_endpt_ext_cfg_type_t type) +{ + transport_endpt_ext_cfg_t *ext_cfg; + + if (!sep_ext->ext_cfgs.len) + return 0; + + ext_cfg = (transport_endpt_ext_cfg_t *) sep_ext->ext_cfgs.data; + while ((u8 *) ext_cfg < + sep_ext->ext_cfgs.data + sep_ext->ext_cfgs.tail_offset) + { + if (ext_cfg->type == type) + return ext_cfg; + ext_cfg = (transport_endpt_ext_cfg_t *) (ext_cfg->data + ext_cfg->len); + } + return 0; +} - cfg_size = sizeof (transport_endpt_ext_cfg_t); - cfg = clib_mem_alloc (cfg_size); - clib_memset (cfg, 0, cfg_size); - cfg->type = type; - sep_ext->ext_cfg = cfg; +static inline void +session_endpoint_free_ext_cfgs (session_endpoint_cfg_t *sep_ext) +{ + if (!sep_ext->ext_cfgs.len) + return; + clib_mem_free (sep_ext->ext_cfgs.data); + sep_ext->ext_cfgs.len = 0; + sep_ext->ext_cfgs.tail_offset = 0; + sep_ext->ext_cfgs.data = 0; } #endif /* __included_uri_h__ */ diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c index 3ac2ba4cfbc..afa39f6ded3 100644 --- a/src/vnet/session/application_local.c +++ b/src/vnet/session/application_local.c @@ -710,7 +710,7 @@ ct_accept_one (u32 thread_index, u32 ho_index) sct->c_is_ip4 = cct->c_is_ip4; clib_memcpy (&sct->c_lcl_ip, &cct->c_rmt_ip, sizeof (cct->c_rmt_ip)); sct->client_wrk = cct->client_wrk; - sct->c_proto = TRANSPORT_PROTO_NONE; + sct->c_proto = TRANSPORT_PROTO_CT; sct->client_opaque = cct->client_opaque; sct->actual_tp = cct->actual_tp; @@ -723,8 +723,8 @@ ct_accept_one (u32 thread_index, u32 ho_index) */ ss = session_alloc (thread_index); ll = listen_session_get (ll_index); - ss->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, - sct->c_is_ip4); + ss->session_type = + session_type_from_proto_and_ip (TRANSPORT_PROTO_CT, sct->c_is_ip4); ss->connection_index = sct->c_c_index; ss->listener_handle = listen_session_get_handle (ll); session_set_state (ss, SESSION_STATE_CREATED); @@ -889,7 +889,7 @@ ct_connect (app_worker_t *client_wrk, session_t *ll, ho->client_opaque = sep->opaque; ho->client_wrk = client_wrk->wrk_index; ho->peer_index = ll->session_index; - ho->c_proto = TRANSPORT_PROTO_NONE; + ho->c_proto = TRANSPORT_PROTO_CT; ho->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; clib_memcpy (&ho->c_rmt_ip, &sep->ip, sizeof (sep->ip)); ho->flags |= CT_CONN_F_CLIENT; @@ -1425,9 +1425,9 @@ ct_session_tx (session_t * s) static clib_error_t * ct_transport_init (vlib_main_t * vm) { - transport_register_protocol (TRANSPORT_PROTO_NONE, &cut_thru_proto, + transport_register_protocol (TRANSPORT_PROTO_CT, &cut_thru_proto, FIB_PROTOCOL_IP4, ~0); - transport_register_protocol (TRANSPORT_PROTO_NONE, &cut_thru_proto, + transport_register_protocol (TRANSPORT_PROTO_CT, &cut_thru_proto, FIB_PROTOCOL_IP6, ~0); return 0; } diff --git a/src/vnet/session/application_namespace.c b/src/vnet/session/application_namespace.c index 8b06331d803..f5b70a9c4cf 100644 --- a/src/vnet/session/application_namespace.c +++ b/src/vnet/session/application_namespace.c @@ -52,6 +52,14 @@ app_namespace_get (u32 index) } app_namespace_t * +app_namespace_get_if_valid (u32 index) +{ + if (pool_is_free_index (app_namespace_pool, index)) + return 0; + return pool_elt_at_index (app_namespace_pool, index); +} + +app_namespace_t * app_namespace_get_from_id (const u8 *ns_id) { u32 index = app_namespace_index_from_id (ns_id); @@ -127,7 +135,7 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a) st = session_table_alloc (); session_table_init (st, FIB_PROTOCOL_MAX); st->is_local = 1; - st->appns_index = app_namespace_index (app_ns); + vec_add1 (st->appns_index, app_namespace_index (app_ns)); app_ns->local_table_index = session_table_index (st); if (a->sock_name) { @@ -173,8 +181,10 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a) if (app_ns->sock_name) vec_free (app_ns->sock_name); - session_lookup_table_cleanup (FIB_PROTOCOL_IP4, app_ns->ip4_fib_index); - session_lookup_table_cleanup (FIB_PROTOCOL_IP6, app_ns->ip6_fib_index); + session_lookup_table_cleanup (FIB_PROTOCOL_IP4, app_ns->ip4_fib_index, + ns_index); + session_lookup_table_cleanup (FIB_PROTOCOL_IP6, app_ns->ip6_fib_index, + ns_index); app_namespace_free (app_ns); } diff --git a/src/vnet/session/application_namespace.h b/src/vnet/session/application_namespace.h index b441e3c48f2..63ff7cc58a2 100644 --- a/src/vnet/session/application_namespace.h +++ b/src/vnet/session/application_namespace.h @@ -77,6 +77,7 @@ typedef struct _vnet_app_namespace_add_del_args app_namespace_t *app_namespace_alloc (const u8 *ns_id); app_namespace_t *app_namespace_get (u32 index); +app_namespace_t *app_namespace_get_if_valid (u32 index); app_namespace_t *app_namespace_get_from_id (const u8 *ns_id); u32 app_namespace_index (app_namespace_t * app_ns); const u8 *app_namespace_id (app_namespace_t * app_ns); diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api index f678f8bd80f..2805546db52 100644 --- a/src/vnet/session/session.api +++ b/src/vnet/session/session.api @@ -13,7 +13,7 @@ * limitations under the License. */ -option version = "4.0.2"; +option version = "4.0.3"; import "vnet/interface_types.api"; import "vnet/ip/ip_types.api"; @@ -411,6 +411,7 @@ autoreply define session_rule_add_del { */ define session_rules_dump { + option deprecated; u32 client_index; u32 context; }; @@ -434,6 +435,7 @@ define session_rules_dump */ define session_rules_details { + option deprecated; u32 context; vl_api_transport_proto_t transport_proto; vl_api_prefix_t lcl; @@ -446,6 +448,49 @@ define session_rules_details string tag[64]; }; +/** \brief Dump session rules + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + */ +define session_rules_v2_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Session rules details + @param context - sender context, to match reply w/ request + @param transport_proto - transport protocol + @param is_ip4 - flag to indicate if ip addresses are ip4 or 6 + @param lcl_ip - local ip + @param lcl_plen - local prefix length + @param rmt_ip - remote ip + @param rmt_ple - remote prefix length + @param lcl_port - local port + @param rmt_port - remote port + @param action_index - the only action defined now is forward to + application with index action_index + @param scope - enum that indicates scope of the rule: global or local. + If 0, default is global, 1 is global 2 is local, 3 is both + @param tag - tag + @param count - count of the number of appns_index + @param appns_index - application namespaces where rule is to be applied to + */ +define session_rules_v2_details +{ + u32 context; + vl_api_transport_proto_t transport_proto; + vl_api_prefix_t lcl; + vl_api_prefix_t rmt; + u16 lcl_port; + u16 rmt_port; + u32 action_index; + vl_api_session_rule_scope_t scope; + string tag[64]; + u32 count; + u32 appns_index[count]; +}; + autoreply define session_sdl_add_del { option deprecated; u32 client_index; @@ -500,6 +545,7 @@ define session_sdl_details */ define session_sdl_v2_dump { + option deprecated; u32 client_index; u32 context; }; @@ -514,6 +560,7 @@ define session_sdl_v2_dump */ define session_sdl_v2_details { + option deprecated; u32 context; vl_api_prefix_t rmt; u32 action_index; @@ -521,6 +568,35 @@ define session_sdl_v2_details string tag[64]; }; +/** \brief Dump session sdl v3 + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + */ +define session_sdl_v3_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Session sdl details v3 + @param context - sender context, to match reply w/ request + @param rmt - remote prefix + @param action_index - the only action defined now is forward to + application with index action_index + @param tag - tag + @param count - count of the number of appns_index + @param appns_index - application namespaces where rule is to be applied to + */ +define session_sdl_v3_details +{ + u32 context; + vl_api_prefix_t rmt; + u32 action_index; + string tag[64]; + u32 count; + u32 appns_index[count]; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index f0043f90c9a..c6df47b412b 100644 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -1181,8 +1181,8 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp) ip_prefix_decode (&mp->lcl, &table_args->lcl); ip_prefix_decode (&mp->rmt, &table_args->rmt); - table_args->lcl_port = mp->lcl_port; - table_args->rmt_port = mp->rmt_port; + table_args->lcl_port = clib_net_to_host_u16 (mp->lcl_port); + table_args->rmt_port = clib_net_to_host_u16 (mp->rmt_port); table_args->action_index = clib_net_to_host_u32 (mp->action_index); table_args->is_add = mp->is_add; mp->tag[sizeof (mp->tag) - 1] = 0; @@ -1232,8 +1232,8 @@ send_session_rule_details4 (mma_rule_16_t * rule, u8 is_local, ip_prefix_encode (&lcl, &rmp->lcl); ip_prefix_encode (&rmt, &rmp->rmt); - rmp->lcl_port = match->lcl_port; - rmp->rmt_port = match->rmt_port; + rmp->lcl_port = clib_host_to_net_u16 (match->lcl_port); + rmp->rmt_port = clib_host_to_net_u16 (match->rmt_port); rmp->action_index = clib_host_to_net_u32 (rule->action_index); rmp->scope = is_local ? SESSION_RULE_SCOPE_API_LOCAL : SESSION_RULE_SCOPE_API_GLOBAL; @@ -1276,8 +1276,8 @@ send_session_rule_details6 (mma_rule_40_t * rule, u8 is_local, ip_prefix_encode (&lcl, &rmp->lcl); ip_prefix_encode (&rmt, &rmp->rmt); - rmp->lcl_port = match->lcl_port; - rmp->rmt_port = match->rmt_port; + rmp->lcl_port = clib_host_to_net_u16 (match->lcl_port); + rmp->rmt_port = clib_host_to_net_u16 (match->rmt_port); rmp->action_index = clib_host_to_net_u32 (rule->action_index); rmp->scope = is_local ? SESSION_RULE_SCOPE_API_LOCAL : SESSION_RULE_SCOPE_API_GLOBAL; @@ -1333,6 +1333,7 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp) vl_api_registration_t *reg; session_table_t *st; u8 tp; + u32 appns_index; reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) @@ -1344,8 +1345,183 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp) { session_rules_table_t *srt = srtg_handle_to_srt (st->srtg_handle, tp); + appns_index = *vec_elt_at_index ( + st->appns_index, + vec_len (st->appns_index) - 1); send_session_rules_table_details ( srt, st->active_fib_proto, tp, st->is_local, + appns_index, reg, mp->context); + } + })); +} + +/* + * session_rules_v2_dunp handler + */ +static void +send_session_rule_v2_details4 (mma_rule_16_t *rule, u8 is_local, + u8 transport_proto, u32 *appns_index, u8 *tag, + vl_api_registration_t *reg, u32 context) +{ + vl_api_session_rules_v2_details_t *rmp = 0; + session_mask_or_match_4_t *match = + (session_mask_or_match_4_t *) &rule->match; + session_mask_or_match_4_t *mask = (session_mask_or_match_4_t *) &rule->mask; + fib_prefix_t lcl, rmt; + u32 i, appns_index_count = vec_len (appns_index); + + rmp = vl_msg_api_alloc (sizeof (*rmp) + + appns_index_count * sizeof (*appns_index)); + if (!rmp) + return; + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (REPLY_MSG_ID_BASE + VL_API_SESSION_RULES_V2_DETAILS); + rmp->context = context; + + rmp->count = clib_host_to_net_u32 (appns_index_count); + vec_foreach_index (i, appns_index) + { + u32 index = *vec_elt_at_index (appns_index, i); + rmp->appns_index[i] = clib_host_to_net_u32 (index); + } + + clib_memset (&lcl, 0, sizeof (lcl)); + clib_memset (&rmt, 0, sizeof (rmt)); + ip_set (&lcl.fp_addr, &match->lcl_ip, 1); + ip_set (&rmt.fp_addr, &match->rmt_ip, 1); + lcl.fp_len = ip4_mask_to_preflen (&mask->lcl_ip); + rmt.fp_len = ip4_mask_to_preflen (&mask->rmt_ip); + lcl.fp_proto = FIB_PROTOCOL_IP4; + rmt.fp_proto = FIB_PROTOCOL_IP4; + + ip_prefix_encode (&lcl, &rmp->lcl); + ip_prefix_encode (&rmt, &rmp->rmt); + rmp->lcl_port = clib_host_to_net_u16 (match->lcl_port); + rmp->rmt_port = clib_host_to_net_u16 (match->rmt_port); + rmp->action_index = clib_host_to_net_u32 (rule->action_index); + rmp->scope = + is_local ? SESSION_RULE_SCOPE_API_LOCAL : SESSION_RULE_SCOPE_API_GLOBAL; + rmp->transport_proto = api_session_transport_proto_encode (transport_proto); + if (tag) + { + clib_memcpy_fast (rmp->tag, tag, vec_len (tag)); + rmp->tag[vec_len (tag)] = 0; + } + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +send_session_rule_v2_details6 (mma_rule_40_t *rule, u8 is_local, + u8 transport_proto, u32 *appns_index, u8 *tag, + vl_api_registration_t *reg, u32 context) +{ + vl_api_session_rules_v2_details_t *rmp = 0; + session_mask_or_match_6_t *match = + (session_mask_or_match_6_t *) &rule->match; + session_mask_or_match_6_t *mask = (session_mask_or_match_6_t *) &rule->mask; + fib_prefix_t lcl, rmt; + u32 i, appns_index_count = vec_len (appns_index); + + rmp = vl_msg_api_alloc (sizeof (*rmp) + + appns_index_count * sizeof (*appns_index)); + if (!rmp) + return; + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (REPLY_MSG_ID_BASE + VL_API_SESSION_RULES_V2_DETAILS); + rmp->context = context; + + rmp->count = clib_host_to_net_u32 (appns_index_count); + vec_foreach_index (i, appns_index) + { + u32 index = *vec_elt_at_index (appns_index, i); + rmp->appns_index[i] = clib_host_to_net_u32 (index); + } + + clib_memset (&lcl, 0, sizeof (lcl)); + clib_memset (&rmt, 0, sizeof (rmt)); + ip_set (&lcl.fp_addr, &match->lcl_ip, 0); + ip_set (&rmt.fp_addr, &match->rmt_ip, 0); + lcl.fp_len = ip6_mask_to_preflen (&mask->lcl_ip); + rmt.fp_len = ip6_mask_to_preflen (&mask->rmt_ip); + lcl.fp_proto = FIB_PROTOCOL_IP6; + rmt.fp_proto = FIB_PROTOCOL_IP6; + + ip_prefix_encode (&lcl, &rmp->lcl); + ip_prefix_encode (&rmt, &rmp->rmt); + rmp->lcl_port = clib_host_to_net_u16 (match->lcl_port); + rmp->rmt_port = clib_host_to_net_u16 (match->rmt_port); + rmp->action_index = clib_host_to_net_u32 (rule->action_index); + rmp->scope = + is_local ? SESSION_RULE_SCOPE_API_LOCAL : SESSION_RULE_SCOPE_API_GLOBAL; + rmp->transport_proto = api_session_transport_proto_encode (transport_proto); + if (tag) + { + clib_memcpy_fast (rmp->tag, tag, vec_len (tag)); + rmp->tag[vec_len (tag)] = 0; + } + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +send_session_rules_table_v2_details (session_rules_table_t *srt, u8 fib_proto, + u8 tp, u8 is_local, u32 *appns_index, + vl_api_registration_t *reg, u32 context) +{ + mma_rule_16_t *rule16; + mma_rule_40_t *rule40; + mma_rules_table_16_t *srt16; + mma_rules_table_40_t *srt40; + u32 ri; + + if (is_local || fib_proto == FIB_PROTOCOL_IP4) + { + u8 *tag = 0; + srt16 = &srt->session_rules_tables_16; + pool_foreach (rule16, srt16->rules) + { + ri = mma_rules_table_rule_index_16 (srt16, rule16); + tag = session_rules_table_rule_tag (srt, ri, 1); + send_session_rule_v2_details4 (rule16, is_local, tp, appns_index, tag, + reg, context); + } + } + if (is_local || fib_proto == FIB_PROTOCOL_IP6) + { + u8 *tag = 0; + srt40 = &srt->session_rules_tables_40; + pool_foreach (rule40, srt40->rules) + { + ri = mma_rules_table_rule_index_40 (srt40, rule40); + tag = session_rules_table_rule_tag (srt, ri, 1); + send_session_rule_v2_details6 (rule40, is_local, tp, appns_index, tag, + reg, context); + } + } +} + +static void +vl_api_session_rules_v2_dump_t_handler (vl_api_session_rules_dump_t *mp) +{ + vl_api_registration_t *reg; + session_table_t *st; + u8 tp; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + session_table_foreach (st, ({ + if (st->srtg_handle != SESSION_SRTG_HANDLE_INVALID) + for (tp = 0; tp < TRANSPORT_N_PROTOS; tp++) + { + session_rules_table_t *srt = + srtg_handle_to_srt (st->srtg_handle, tp); + send_session_rules_table_v2_details ( + srt, st->active_fib_proto, tp, st->is_local, st->appns_index, reg, mp->context); } })); @@ -1355,7 +1531,7 @@ typedef struct session_sdl_table_walk_ctx_ { vl_api_registration_t *reg; u32 mp_context; - u32 appns_index; + u32 *appns_index; } session_sdl_table_walk_ctx; static void @@ -1365,7 +1541,8 @@ send_session_sdl_v2_details (u32 fei, ip46_address_t *rmt_ip, u16 fp_len, { session_sdl_table_walk_ctx *ctx = args; vl_api_registration_t *reg = ctx->reg; - u32 appns_index = ctx->appns_index; + u32 appns_index = + *vec_elt_at_index (ctx->appns_index, vec_len (ctx->appns_index) - 1); u32 context = ctx->mp_context; vl_api_session_sdl_v2_details_t *rmp = 0; fib_prefix_t rmt; @@ -1414,7 +1591,6 @@ vl_api_session_sdl_v2_dump_t_handler (vl_api_session_sdl_v2_dump_t *mp) if (st->srtg_handle != SESSION_SRTG_HANDLE_INVALID) { ctx.appns_index = st->appns_index; - if (st->active_fib_proto == FIB_PROTOCOL_IP4) session_sdl_table_walk4 (st->srtg_handle, send_session_sdl_v2_details, &ctx); @@ -1426,12 +1602,90 @@ vl_api_session_sdl_v2_dump_t_handler (vl_api_session_sdl_v2_dump_t *mp) } static void +send_session_sdl_v3_details (u32 fei, ip46_address_t *rmt_ip, u16 fp_len, + u32 action_index, u32 fp_proto, u8 *tag, + void *args) +{ + session_sdl_table_walk_ctx *ctx = args; + vl_api_registration_t *reg = ctx->reg; + u32 context = ctx->mp_context; + vl_api_session_sdl_v3_details_t *rmp = 0; + fib_prefix_t rmt; + u32 appns_index_count, appns_index, i; + + appns_index_count = vec_len (ctx->appns_index); + rmp = vl_msg_api_alloc (sizeof (*rmp) + + appns_index_count * sizeof (appns_index)); + if (!rmp) + return; + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V3_DETAILS); + rmp->context = context; + + rmp->count = clib_host_to_net_u32 (appns_index_count); + vec_foreach_index (i, ctx->appns_index) + { + appns_index = *vec_elt_at_index (ctx->appns_index, i); + rmp->appns_index[i] = clib_host_to_net_u32 (appns_index); + } + + clib_memset (&rmt, 0, sizeof (rmt)); + if (fp_proto == FIB_PROTOCOL_IP4) + ip_set (&rmt.fp_addr, &rmt_ip->ip4, 1); + else + ip_set (&rmt.fp_addr, &rmt_ip->ip6, 0); + rmt.fp_len = fp_len; + rmt.fp_proto = fp_proto, + + ip_prefix_encode (&rmt, &rmp->rmt); + rmp->action_index = clib_host_to_net_u32 (action_index); + + if (tag) + { + clib_memcpy_fast (rmp->tag, tag, vec_len (tag)); + rmp->tag[vec_len (tag)] = 0; + } + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_session_sdl_v3_dump_t_handler (vl_api_session_sdl_v2_dump_t *mp) +{ + vl_api_registration_t *reg; + session_table_t *st; + session_sdl_table_walk_ctx ctx; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + ctx.reg = reg; + ctx.mp_context = mp->context; + + session_table_foreach ( + st, ({ + if (st->srtg_handle != SESSION_SRTG_HANDLE_INVALID) + { + ctx.appns_index = st->appns_index; + if (st->active_fib_proto == FIB_PROTOCOL_IP4) + session_sdl_table_walk4 (st->srtg_handle, + send_session_sdl_v3_details, &ctx); + else + session_sdl_table_walk6 (st->srtg_handle, + send_session_sdl_v3_details, &ctx); + } + })); +} + +static void send_session_sdl_details (u32 fei, ip46_address_t *lcl_ip, u16 fp_len, u32 action_index, u32 fp_proto, u8 *tag, void *args) { session_sdl_table_walk_ctx *ctx = args; vl_api_registration_t *reg = ctx->reg; - u32 appns_index = ctx->appns_index; + u32 appns_index = + *vec_elt_at_index (ctx->appns_index, vec_len (ctx->appns_index) - 1); u32 context = ctx->mp_context; vl_api_session_sdl_details_t *rmp = 0; fib_prefix_t lcl; @@ -1480,7 +1734,6 @@ vl_api_session_sdl_dump_t_handler (vl_api_session_sdl_dump_t *mp) if (st->srtg_handle != SESSION_SRTG_HANDLE_INVALID) { ctx.appns_index = st->appns_index; - if (st->active_fib_proto == FIB_PROTOCOL_IP4) session_sdl_table_walk4 (st->srtg_handle, send_session_sdl_details, &ctx); @@ -2209,6 +2462,10 @@ session_api_hookup (vlib_main_t *vm) am, REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V2_DUMP, 1); vl_api_set_msg_thread_safe ( am, REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V2_DETAILS, 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V3_DUMP, 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V3_DETAILS, 1); return 0; } diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c index 73b777127fd..01be281d4f7 100644 --- a/src/vnet/session/session_input.c +++ b/src/vnet/session/session_input.c @@ -217,9 +217,7 @@ app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index, s->connection_index, s->thread_index); session_free (s); /* Notify app that it has data on the new session */ - s = session_get_from_handle (evt->as_u64[1]); - session_send_io_evt_to_thread (s->rx_fifo, - SESSION_IO_EVT_BUILTIN_RX); + session_program_rx_io_evt (evt->as_u64[1]); break; case SESSION_CTRL_EVT_TRANSPORT_CLOSED: s = session_get (evt->session_index, thread_index); @@ -257,6 +255,9 @@ app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index, app->cb_fns.del_segment_callback (app_wrk->wrk_index, evt->as_u64[1]); break; + case SESSION_CTRL_EVT_RPC: + ((void (*) (session_t * s)) (evt->rpc_args.fp)) (evt->rpc_args.arg); + break; default: clib_warning ("unexpected event: %u", evt->event_type); ASSERT (0); diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 0d580ba35c6..3a99c0b5aaf 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -1383,7 +1383,7 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl, session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args) { - app_namespace_t *app_ns = app_namespace_get (args->appns_index); + app_namespace_t *app_ns = app_namespace_get_if_valid (args->appns_index); session_table_t *st; u32 fib_index; u8 fib_proto; @@ -1404,6 +1404,8 @@ vnet_session_rule_add_del (session_rule_add_del_args_t *args) fib_proto = args->table_args.rmt.fp_proto; fib_index = app_namespace_get_fib_index (app_ns, fib_proto); st = session_table_get_for_fib_index (fib_proto, fib_index); + if (!st) + return SESSION_E_INVALID; session_rules_table_init (st, fib_proto); if ((rv = session_rules_table_add_del ( st->srtg_handle, args->transport_proto, &args->table_args))) @@ -1455,7 +1457,7 @@ session_lookup_set_tables_appns (app_namespace_t * app_ns) st = session_table_get_or_alloc (fp, fib_index); if (st) { - st->appns_index = app_namespace_index (app_ns); + vec_add1 (st->appns_index, app_namespace_index (app_ns)); session_lookup_fib_table_lock (fib_index, fp); } } @@ -1540,7 +1542,6 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { u32 proto = ~0, lcl_port, rmt_port, action = 0, lcl_plen = 0, rmt_plen = 0; - clib_error_t *error = 0; u32 appns_index, scope = 0; ip46_address_t lcl_ip, rmt_ip; u8 is_ip4 = 1, conn_set = 0; @@ -1549,10 +1550,12 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, app_namespace_t *app_ns; int rv; - session_cli_return_if_not_enabled (); - if (session_rule_table_is_enabled () == 0) - return clib_error_return (0, "session rule table engine is not enabled"); + { + vlib_cli_output (vm, "session rule table engine is not enabled"); + unformat_skip_line (input); + goto done; + } clib_memset (&lcl_ip, 0, sizeof (lcl_ip)); clib_memset (&rmt_ip, 0, sizeof (rmt_ip)); @@ -1594,8 +1597,8 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, ; else { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + vlib_cli_output (vm, "unknown input `%U'", format_unformat_error, + input); goto done; } } @@ -1654,12 +1657,12 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, .scope = scope, }; if ((rv = vnet_session_rule_add_del (&args))) - error = clib_error_return (0, "rule add del returned %u", rv); + vlib_cli_output (vm, "rule add del returned %d", rv); done: vec_free (ns_id); vec_free (tag); - return error; + return 0; } VLIB_CLI_COMMAND (session_rule_command, static) = @@ -1944,23 +1947,30 @@ session_lookup_init (void) } void -session_lookup_table_cleanup (u32 fib_proto, u32 fib_index) +session_lookup_table_cleanup (u32 fib_proto, u32 fib_index, u32 ns_index) { session_table_t *st; - u32 table_index; + u32 table_index, appns_index; + int i; session_lookup_fib_table_unlock (fib_index, fib_proto); + table_index = session_lookup_get_index_for_fib (fib_proto, fib_index); + st = session_table_get (table_index); + if (st == 0) + return; if (fib_index_to_lock_count[fib_proto][fib_index] == 0) { - table_index = session_lookup_get_index_for_fib (fib_proto, fib_index); - st = session_table_get (table_index); - if (st) - { - session_table_free (st, fib_proto); - if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index) - fib_index_to_table_index[fib_proto][fib_index] = ~0; - } + session_table_free (st, fib_proto); + if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index) + fib_index_to_table_index[fib_proto][fib_index] = ~0; } + else + vec_foreach_index (i, st->appns_index) + { + appns_index = *vec_elt_at_index (st->appns_index, i); + if (ns_index == appns_index) + vec_del1 (st->appns_index, i); + } } /* diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index 14b8005d5d0..4d86d409e98 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -136,7 +136,14 @@ session_mq_listen_handler (session_worker_t *wrk, session_evt_elt_t *elt) a->sep_ext.transport_flags = mp->flags; if (mp->ext_config) - a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config); + { + transport_endpt_ext_cfg_t *ext_cfg = + session_mq_get_ext_config (app, mp->ext_config); + a->sep_ext.ext_cfgs.data = (u8 *) ext_cfg; + a->sep_ext.ext_cfgs.len = + ext_cfg->len + TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE; + a->sep_ext.ext_cfgs.tail_offset = a->sep_ext.ext_cfgs.len; + } if ((rv = vnet_listen (a))) session_worker_stat_error_inc (wrk, rv, 1); @@ -213,7 +220,14 @@ session_mq_connect_one (session_connect_msg_t *mp) a->wrk_map_index = mp->wrk_index; if (mp->ext_config) - a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config); + { + transport_endpt_ext_cfg_t *ext_cfg = + session_mq_get_ext_config (app, mp->ext_config); + a->sep_ext.ext_cfgs.data = (u8 *) ext_cfg; + a->sep_ext.ext_cfgs.len = + ext_cfg->len + TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE; + a->sep_ext.ext_cfgs.tail_offset = a->sep_ext.ext_cfgs.len; + } if ((rv = vnet_connect (a))) { diff --git a/src/vnet/session/session_sdl.c b/src/vnet/session/session_sdl.c index 9505ba1689f..f1dfac4e1ab 100644 --- a/src/vnet/session/session_sdl.c +++ b/src/vnet/session/session_sdl.c @@ -244,13 +244,17 @@ session_sdl_table_init (session_table_t *st, u8 fib_proto) session_sdl_block_t *sdlb; u8 all = fib_proto > FIB_PROTOCOL_IP6 ? 1 : 0; char name[80]; - app_namespace_t *app_ns = app_namespace_get (st->appns_index); + u32 appns_index; + app_namespace_t *app_ns; session_rules_table_group_t *srtg; /* Don't support local table */ if (st->is_local == 1) return; + appns_index = + *vec_elt_at_index (st->appns_index, vec_len (st->appns_index) - 1); + app_ns = app_namespace_get (appns_index); srtg = srtg_instance_alloc (st, 0); srt = srtg->session_rules; sdlb = &srt->sdl_block; @@ -460,18 +464,19 @@ session_sdl_command_fn (vlib_main_t *vm, unformat_input_t *input, u32 appns_index; app_namespace_t *app_ns; u32 rmt_plen = 0, action = 0; - clib_error_t *error = 0; ip46_address_t rmt_ip; u8 conn_set = 0; u8 fib_proto = -1, is_add = 1, *ns_id = 0; - u8 *tag = 0, tag_only = 0; + u8 *tag = 0; int rv; session_rule_add_del_args_t args; - session_cli_return_if_not_enabled (); - if (session_sdl_is_enabled () == 0) - return clib_error_return (0, "session sdl engine is not enabled"); + { + vlib_cli_output (vm, "session sdl engine is not enabled"); + unformat_skip_line (input); + goto done; + } while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -499,8 +504,8 @@ session_sdl_command_fn (vlib_main_t *vm, unformat_input_t *input, ; else { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + vlib_cli_output (vm, "unknown input `%U'", format_unformat_error, + input); goto done; } } @@ -536,13 +541,6 @@ session_sdl_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } - /* Delete with only tag entered. Try v4 first and then v6 if failed */ - if ((is_add == 0) && (fib_proto == (u8) ~0)) - { - fib_proto = FIB_PROTOCOL_IP4; - tag_only = 1; - } - memset (&args, 0, sizeof (args)); args.transport_proto = TRANSPORT_PROTO_TCP; args.table_args.rmt.fp_addr = rmt_ip; @@ -555,27 +553,12 @@ session_sdl_command_fn (vlib_main_t *vm, unformat_input_t *input, args.scope = SESSION_RULE_SCOPE_GLOBAL; if ((rv = vnet_session_rule_add_del (&args))) - { - /* Try tag only delete on v6 */ - if (rv && tag_only) - { - args.table_args.rmt.fp_proto = FIB_PROTOCOL_IP6; - args.table_args.lcl.fp_proto = FIB_PROTOCOL_IP6; - if ((rv = vnet_session_rule_add_del (&args))) - { - error = clib_error_return (0, "sdl add del returned %u", rv); - } - } - else - { - error = clib_error_return (0, "sdl add del returned %u", rv); - } - } + vlib_cli_output (vm, "sdl add del returned %d", rv); done: vec_free (ns_id); vec_free (tag); - return error; + return 0; } VLIB_CLI_COMMAND (session_sdl_command, static) = { diff --git a/src/vnet/session/session_table.c b/src/vnet/session/session_table.c index 5dafe0e633c..f3ec1f90bf6 100644 --- a/src/vnet/session/session_table.c +++ b/src/vnet/session/session_table.c @@ -79,6 +79,7 @@ session_table_free (session_table_t *slt, u8 fib_proto) clib_bihash_free_48_8 (&slt->v6_half_open_hash); } + vec_free (slt->appns_index); pool_put (lookup_tables, slt); } @@ -222,7 +223,17 @@ u8 * format_session_table (u8 *s, va_list *args) { session_table_t *st = va_arg (*args, session_table_t *); + u32 appns_index, i; + s = format (s, "appns index: "); + vec_foreach_index (i, st->appns_index) + { + appns_index = *vec_elt_at_index (st->appns_index, i); + if (i > 0) + s = format (s, ", "); + s = format (s, "%d", appns_index); + } + s = format (s, "\n"); if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash)) { s = format (s, "%U", format_bihash_16_8, &st->v4_session_hash, 0); diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h index aae4a1c2af5..126e849beae 100644 --- a/src/vnet/session/session_table.h +++ b/src/vnet/session/session_table.h @@ -42,7 +42,7 @@ typedef struct _session_lookup_table u8 is_local; /** Namespace this table belongs to */ - u32 appns_index; + u32 *appns_index; /** For global tables only one fib proto is active. This is a * byproduct of fib table ids not necessarily being the same for @@ -77,7 +77,7 @@ session_table_t *_get_session_tables (); #define session_table_foreach(VAR, BODY) \ pool_foreach (VAR, _get_session_tables ()) BODY -void session_lookup_table_cleanup (u32 fib_proto, u32 fib_index); +void session_lookup_table_cleanup (u32 fib_proto, u32 fib_index, u32 ns_index); #endif /* SRC_VNET_SESSION_SESSION_TABLE_H_ */ /* diff --git a/src/vnet/session/session_test.c b/src/vnet/session/session_test.c index e248dd7010c..14f17761e75 100644 --- a/src/vnet/session/session_test.c +++ b/src/vnet/session/session_test.c @@ -382,6 +382,11 @@ vl_api_session_sdl_v2_details_t_handler (vl_api_session_sdl_v2_details_t *mp) { } +static void +vl_api_session_sdl_v3_details_t_handler (vl_api_session_sdl_v3_details_t *mp) +{ +} + static int api_session_sdl_dump (vat_main_t *vam) { @@ -394,6 +399,24 @@ api_session_sdl_v2_dump (vat_main_t *vam) return -1; } +static int +api_session_sdl_v3_dump (vat_main_t *vam) +{ + return -1; +} + +static void +vl_api_session_rules_v2_details_t_handler ( + vl_api_session_rules_v2_details_t *mp) +{ +} + +static int +api_session_rules_v2_dump (vat_main_t *vam) +{ + return -1; +} + #include <vnet/session/session.api_test.c> /* diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h index 5e650727d61..935f8f189ee 100644 --- a/src/vnet/session/session_types.h +++ b/src/vnet/session/session_types.h @@ -77,7 +77,7 @@ typedef struct _session_endpoint_cfg u8 original_tp; u64 parent_handle; session_endpoint_cfg_flags_t flags; - transport_endpt_ext_cfg_t *ext_cfg; + transport_endpt_ext_cfgs_t ext_cfgs; } session_endpoint_cfg_t; #define SESSION_IP46_ZERO \ @@ -112,7 +112,8 @@ typedef struct _session_endpoint_cfg .peer = TRANSPORT_ENDPOINT_NULL, .transport_proto = 0, \ .app_wrk_index = ENDPOINT_INVALID_INDEX, \ .opaque = ENDPOINT_INVALID_INDEX, \ - .parent_handle = SESSION_INVALID_HANDLE, .ext_cfg = 0, \ + .parent_handle = SESSION_INVALID_HANDLE, \ + .ext_cfgs = TRANSPORT_ENDPT_EXT_CFGS_NULL, \ } #define session_endpoint_to_transport(_sep) ((transport_endpoint_t *)_sep) @@ -288,7 +289,7 @@ session_get_fib_proto (session_t * s) always_inline u8 session_has_transport (session_t * s) { - return (session_get_transport_proto (s) != TRANSPORT_PROTO_NONE); + return (session_get_transport_proto (s) != TRANSPORT_PROTO_CT); } static inline transport_service_type_t diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h index b3469fa9fdb..f3b84998743 100644 --- a/src/vnet/session/transport_types.h +++ b/src/vnet/session/transport_types.h @@ -171,7 +171,7 @@ STATIC_ASSERT (sizeof (transport_connection_t) <= 128, #define foreach_transport_proto \ _ (TCP, "tcp", "T") \ _ (UDP, "udp", "U") \ - _ (NONE, "ct", "C") \ + _ (CT, "ct", "C") \ _ (TLS, "tls", "J") \ _ (QUIC, "quic", "Q") \ _ (DTLS, "dtls", "D") \ @@ -185,6 +185,8 @@ typedef enum _transport_proto #undef _ } transport_proto_t; +#define TRANSPORT_PROTO_NONE TRANSPORT_PROTO_CT + u8 *format_transport_proto (u8 * s, va_list * args); u8 *format_transport_proto_short (u8 * s, va_list * args); u8 *format_transport_flags (u8 *s, va_list *args); @@ -257,7 +259,8 @@ typedef enum transport_endpt_attr_flag_ _ (u64, next_output_node, NEXT_OUTPUT_NODE) \ _ (u16, mss, MSS) \ _ (u8, flags, FLAGS) \ - _ (u8, cc_algo, CC_ALGO) + _ (u8, cc_algo, CC_ALGO) \ + _ (transport_endpoint_t, ext_endpt, EXT_ENDPT) typedef enum transport_endpt_attr_type_ { @@ -281,6 +284,7 @@ typedef enum transport_endpt_ext_cfg_type_ { TRANSPORT_ENDPT_EXT_CFG_NONE, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + TRANSPORT_ENDPT_EXT_CFG_HTTP, } transport_endpt_ext_cfg_type_t; typedef struct transport_endpt_crypto_cfg_ @@ -297,10 +301,27 @@ typedef struct transport_endpt_ext_cfg_ union { transport_endpt_crypto_cfg_t crypto; + u32 opaque; /**< For general use */ u8 data[0]; }; } transport_endpt_ext_cfg_t; +#define TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE 4 + +typedef struct transport_endpt_ext_cfgs_ +{ + u32 len; /**< length of config data chunk */ + u32 tail_offset; /**< current tail in config data chunk */ + u8 *data; /**< start of config data chunk */ +} transport_endpt_ext_cfgs_t; + +#define TRANSPORT_ENDPT_EXT_CFGS_CHUNK_SIZE 512 + +#define TRANSPORT_ENDPT_EXT_CFGS_NULL \ + { \ + .len = 0, .tail_offset = 0, .data = 0, \ + } + typedef clib_bihash_24_8_t transport_endpoint_table_t; #define ENDPOINT_INVALID_INDEX ((u32)~0) diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h index c2867eb7508..40edbbaf437 100644 --- a/src/vnet/srv6/sr.h +++ b/src/vnet/srv6/sr.h @@ -43,9 +43,22 @@ #define SR_BEHAVIOR_DX4 7 #define SR_BEHAVIOR_DT6 8 #define SR_BEHAVIOR_DT4 9 -#define SR_BEHAVIOR_END_UN_PERF 10 -#define SR_BEHAVIOR_END_UN 11 -#define SR_BEHAVIOR_LAST 12 /* Must always be the last one */ +/** + * SR_BEHAVIOR_LAST + * Not used anymore. Kept not to break the API. + * We use SR_BEHAVIOR_CURRENT_LAST going forward + * */ +#define SR_BEHAVIOR_LAST 10 +#define SR_BEHAVIOR_END_UN_PERF 11 +#define SR_BEHAVIOR_END_UN 12 +#define SR_BEHAVIOR_UA 13 + +/** + * SR_BEHAVIOR_CURRENT_LAST + * MUST be updated everytime we add new behaviors. + * MUST be set to value of last added behavior + 1. + * */ +#define SR_BEHAVIOR_CURRENT_LAST 14 #define SR_STEER_L2 2 #define SR_STEER_IPV4 4 @@ -164,7 +177,8 @@ typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid); */ typedef struct { - u16 sr_localsid_function_number; /**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */ + u16 sr_localsid_function_number; /**< SR LocalSID plugin function + (>SR_BEHAVIOR_CURRENT_LAST) */ u8 *function_name; /**< Function name. (key). */ diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c index a44c3098112..e546e1db0e7 100644 --- a/src/vnet/srv6/sr_api.c +++ b/src/vnet/srv6/sr_api.c @@ -39,6 +39,8 @@ static void vl_api_sr_localsid_add_del_t_handler { vl_api_sr_localsid_add_del_reply_t *rmp; int rv = 0; + int usid_len = 0; + u16 localsid_prefix_len = 128; ip46_address_t prefix; ip6_address_t localsid; /* @@ -46,21 +48,31 @@ static void vl_api_sr_localsid_add_del_t_handler * char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table, * ip46_address_t *nh_addr, void *ls_plugin_mem) */ - if (mp->behavior == SR_BEHAVIOR_X || - mp->behavior == SR_BEHAVIOR_DX6 || - mp->behavior == SR_BEHAVIOR_DX4 || mp->behavior == SR_BEHAVIOR_DX2) + if (mp->behavior == SR_BEHAVIOR_X || mp->behavior == SR_BEHAVIOR_UA || + mp->behavior == SR_BEHAVIOR_DX6 || mp->behavior == SR_BEHAVIOR_DX4 || + mp->behavior == SR_BEHAVIOR_DX2) VALIDATE_SW_IF_INDEX (mp); + if (mp->behavior == SR_BEHAVIOR_END_UN_PERF || + mp->behavior == SR_BEHAVIOR_END_UN) + { + usid_len = 16; + localsid_prefix_len = 48; + } + + if (mp->behavior == SR_BEHAVIOR_UA) + { + usid_len = 16; + localsid_prefix_len = 64; + } + ip6_address_decode (mp->localsid, &localsid); ip_address_decode (&mp->nh_addr, &prefix); - rv = sr_cli_localsid (mp->is_del, - &localsid, 128, - mp->end_psp, - mp->behavior, - ntohl (mp->sw_if_index), - ntohl (mp->vlan_index), - ntohl (mp->fib_table), &prefix, 0, NULL); + rv = sr_cli_localsid (mp->is_del, &localsid, localsid_prefix_len, + mp->end_psp, mp->behavior, ntohl (mp->sw_if_index), + ntohl (mp->vlan_index), ntohl (mp->fib_table), &prefix, + usid_len, NULL); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY); diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c index 62b1a271576..2172fa10ef1 100644 --- a/src/vnet/srv6/sr_localsid.c +++ b/src/vnet/srv6/sr_localsid.c @@ -100,10 +100,10 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, { /* Retrieve localsid */ ls = pool_elt_at_index (sm->localsids, p[0]); - if (ls->behavior >= SR_BEHAVIOR_LAST) + if (ls->behavior >= SR_BEHAVIOR_CURRENT_LAST) { - plugin = pool_elt_at_index (sm->plugin_functions, - ls->behavior - SR_BEHAVIOR_LAST); + plugin = pool_elt_at_index ( + sm->plugin_functions, ls->behavior - SR_BEHAVIOR_CURRENT_LAST); pref_length = plugin->prefix_length; } @@ -130,7 +130,7 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, || ls->behavior == SR_BEHAVIOR_DX4) adj_unlock (ls->nh_adj); - if (ls->behavior >= SR_BEHAVIOR_LAST) + if (ls->behavior >= SR_BEHAVIOR_CURRENT_LAST) { /* Callback plugin removal function */ rv = plugin->removal (ls); @@ -149,13 +149,13 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, if (is_del) return -2; - if (behavior >= SR_BEHAVIOR_LAST) - { - sr_localsid_fn_registration_t *plugin = 0; - plugin = - pool_elt_at_index (sm->plugin_functions, behavior - SR_BEHAVIOR_LAST); - pref_length = plugin->prefix_length; - } + if (behavior >= SR_BEHAVIOR_CURRENT_LAST) + { + sr_localsid_fn_registration_t *plugin = 0; + plugin = pool_elt_at_index (sm->plugin_functions, + behavior - SR_BEHAVIOR_CURRENT_LAST); + pref_length = plugin->prefix_length; + } if (localsid_prefix_len != 0) { @@ -213,6 +213,23 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, ls->usid_next_len = 16 - ls->usid_next_index; } break; + case SR_BEHAVIOR_UA: + if (usid_len) + { + int usid_width; + clib_memcpy (&ls->usid_block, localsid_addr, sizeof (ip6_address_t)); + + usid_width = pref_length - usid_len; + ip6_address_mask_from_width (&ls->usid_block_mask, usid_width); + + ls->usid_index = usid_width / 8; + ls->usid_len = usid_len / 8; + ls->usid_next_index = ls->usid_index + ls->usid_len; + ls->usid_next_len = 16 - ls->usid_next_index; + } + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); + break; case SR_BEHAVIOR_X: ls->sw_if_index = sw_if_index; clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); @@ -241,13 +258,14 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, } /* Figure out the adjacency magic for Xconnect variants */ - if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4 - || ls->behavior == SR_BEHAVIOR_DX6) + if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_UA || + ls->behavior == SR_BEHAVIOR_DX4 || ls->behavior == SR_BEHAVIOR_DX6) { adj_index_t nh_adj_index = ADJ_INDEX_INVALID; /* Retrieve the adjacency corresponding to the (OIF, next_hop) */ - if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X) + if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_UA || + ls->behavior == SR_BEHAVIOR_X) nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, nh_addr, sw_if_index); @@ -272,17 +290,18 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, else if (ls->behavior == SR_BEHAVIOR_END_UN) dpo_set (&dpo, sr_localsid_un_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); - else if (ls->behavior == SR_BEHAVIOR_END_UN_PERF) + else if (ls->behavior == SR_BEHAVIOR_END_UN_PERF || + ls->behavior == SR_BEHAVIOR_UA) dpo_set (&dpo, sr_localsid_un_perf_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); - else if (ls->behavior > SR_BEHAVIOR_D_FIRST - && ls->behavior < SR_BEHAVIOR_LAST) + else if (ls->behavior > SR_BEHAVIOR_D_FIRST && + ls->behavior < SR_BEHAVIOR_CURRENT_LAST) dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); - else if (ls->behavior >= SR_BEHAVIOR_LAST) + else if (ls->behavior >= SR_BEHAVIOR_CURRENT_LAST) { sr_localsid_fn_registration_t *plugin = 0; plugin = pool_elt_at_index (sm->plugin_functions, - ls->behavior - SR_BEHAVIOR_LAST); + ls->behavior - SR_BEHAVIOR_CURRENT_LAST); /* Copy the unformat memory result */ ls->plugin_mem = ls_plugin_mem; /* Callback plugin creation function */ @@ -389,6 +408,11 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, behavior = SR_BEHAVIOR_END_UN_PERF; else if (unformat (input, "un.flex %u", &usid_size)) behavior = SR_BEHAVIOR_END_UN; + else if (unformat (input, "ua %u %U %U", &usid_size, + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &next_hop.ip6)) + behavior = SR_BEHAVIOR_UA; + else { /* Loop over all the plugin behavior format functions */ @@ -463,7 +487,7 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, behavior, sw_if_index, vlan_index, fib_index, &next_hop, usid_size, ls_plugin_mem); - if (behavior == SR_BEHAVIOR_END_UN_PERF) + if (behavior == SR_BEHAVIOR_END_UN_PERF || behavior == SR_BEHAVIOR_UA) { if (rv == 0) { @@ -507,17 +531,19 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, VLIB_CLI_COMMAND (sr_localsid_command, static) = { .path = "sr localsid", .short_help = "sr localsid (del) address XX:XX::YY:YY" - "(fib-table 8) behavior STRING", + "(fib-table 8) behavior STRING", .long_help = "Create SR LocalSID and binds it to a particular behavior\n" "Arguments:\n" "\tlocalSID IPv6_addr(128b) LocalSID IPv6 address\n" - "\t(fib-table X) Optional. VRF where to install SRv6 localsid\n" + "\t(fib-table X) Optional. VRF where to install SRv6 " + "localsid\n" "\tbehavior STRING Specifies the behavior\n" "\n\tBehaviors:\n" "\tEnd\t-> Endpoint.\n" "\tEnd.uN\t-> Endpoint with uSID.\n" - "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" + "\tuA\t-> Endpoint with uSID and Layer-3 cross-connect.\n" + "\tEnd.X\t-> Endpoint with Layer-3 cross-connect.\n" "\t\tParameters: '<iface> <ip6_next_hop>'\n" "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" "\t\tParameters: '<iface>'\n" @@ -525,9 +551,11 @@ VLIB_CLI_COMMAND (sr_localsid_command, static) = { "\t\tParameters: '<iface> <ip6_next_hop>'\n" "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" "\t\tParameters: '<iface> <ip4_next_hop>'\n" - "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table " + "lookup.\n" "\t\tParameters: '<ip6_fib_table>'\n" - "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table " + "lookup.\n" "\t\tParameters: '<ip4_fib_table>'\n", .function = sr_cli_localsid_command_fn, }; @@ -554,22 +582,30 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, switch (ls->behavior) { case SR_BEHAVIOR_END: - vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd", - format_ip6_address, &ls->localsid); + vlib_cli_output (vm, "\tAddress: \t%U/%u\n\tBehavior: \tEnd", + format_ip6_address, &ls->localsid, + ls->localsid_prefix_len); break; case SR_BEHAVIOR_END_UN: - vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tEnd (flex) [uSID:\t%U/%d, length: %d]", + vlib_cli_output (vm, "\tAddress: \t%U/%u\n\tBehavior: \tuN (flex)", format_ip6_address, &ls->localsid, - format_ip6_address, &ls->usid_block, - ls->usid_index * 8, ls->usid_len * 8); + ls->localsid_prefix_len); break; case SR_BEHAVIOR_END_UN_PERF: + vlib_cli_output ( + vm, "\tAddress: \t%U/%u\n\tBehavior: \tuN [End with uSID]", + format_ip6_address, &ls->localsid, ls->localsid_prefix_len, + ls->usid_len * 8); + break; + case SR_BEHAVIOR_UA: vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tEnd [uSID:\t%U/%d, length: %d]", + "\tAddress: \t%U/%u\n\tBehavior: \tuA [End with " + "uSID and Layer-3 cross-connect]" + "\n\tIface: \t%U\n\tNext hop: \t%U", format_ip6_address, &ls->localsid, - format_ip6_address, &ls->usid_block, - ls->usid_index * 8, ls->usid_len * 8); + ls->localsid_prefix_len, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip6_address, &ls->next_hop.ip6); break; case SR_BEHAVIOR_X: vlib_cli_output (vm, @@ -636,11 +672,10 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, FIB_PROTOCOL_IP4)); break; default: - if (ls->behavior >= SR_BEHAVIOR_LAST) + if (ls->behavior >= SR_BEHAVIOR_CURRENT_LAST) { - sr_localsid_fn_registration_t *plugin = - pool_elt_at_index (sm->plugin_functions, - ls->behavior - SR_BEHAVIOR_LAST); + sr_localsid_fn_registration_t *plugin = pool_elt_at_index ( + sm->plugin_functions, ls->behavior - SR_BEHAVIOR_CURRENT_LAST); vlib_cli_output (vm, "\tAddress: \t%U/%u\n" "\tBehavior: \t%s (%s)\n\t%U", @@ -781,6 +816,9 @@ format_sr_localsid_trace (u8 * s, va_list * args) case SR_BEHAVIOR_DX4: s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n"); break; + case SR_BEHAVIOR_UA: + s = format (s, "\tBehavior: uSID and IPv6 L3 xconnect\n"); + break; case SR_BEHAVIOR_X: s = format (s, "\tBehavior: IPv6 L3 xconnect\n"); break; @@ -1031,7 +1069,8 @@ end_un_srh_processing (vlib_node_runtime_t * node, } static_always_inline void -end_un_processing (ip6_header_t * ip0, ip6_sr_localsid_t * ls0) +end_un_processing (vlib_node_runtime_t *node, vlib_buffer_t *b0, + ip6_header_t *ip0, ip6_sr_localsid_t *ls0, u32 *next0) { u8 next_usid_index; u8 index; @@ -1052,6 +1091,11 @@ end_un_processing (ip6_header_t * ip0, ip6_sr_localsid_t * ls0) { ip0->dst_address.as_u8[index] = 0; } + if (ls0->behavior == SR_BEHAVIOR_UA) + { + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + } return; } @@ -2141,10 +2185,10 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node, pool_elt_at_index (sm->localsids, vnet_buffer (b3)->ip.adj_index[VLIB_TX]); - end_un_processing (ip0, ls0); - end_un_processing (ip1, ls1); - end_un_processing (ip2, ls2); - end_un_processing (ip3, ls3); + end_un_processing (node, b0, ip0, ls0, &next0); + end_un_processing (node, b1, ip1, ls1, &next1); + end_un_processing (node, b2, ip2, ls2, &next2); + end_un_processing (node, b3, ip3, ls3, &next3); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -2232,7 +2276,7 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b0)->ip.adj_index[VLIB_TX]); /* SRH processing */ - end_un_processing (ip0, ls0); + end_un_processing (node, b0, ip0, ls0, &next0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -2359,7 +2403,7 @@ sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, clib_memset (plugin, 0, sizeof (*plugin)); plugin->sr_localsid_function_number = (plugin - sm->plugin_functions); - plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST; + plugin->sr_localsid_function_number += SR_BEHAVIOR_CURRENT_LAST; plugin->prefix_length = prefix_length; plugin->ls_format = ls_format; plugin->ls_unformat = ls_unformat; @@ -2394,22 +2438,28 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm, { vec_add1 (plugins_vec, plugin); } /* Print static behaviors */ - vlib_cli_output (vm, "Default behaviors:\n" - "\tEnd\t-> Endpoint.\n" - "\tEnd.X\t-> Endpoint with Layer-3 cross-connect.\n" - "\t\tParameters: '<iface> <ip6_next_hop>'\n" - "\tEnd.T\t-> Endpoint with specific IPv6 table lookup.\n" - "\t\tParameters: '<fib_table>'\n" - "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" - "\t\tParameters: '<iface>'\n" - "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" - "\t\tParameters: '<iface> <ip6_next_hop>'\n" - "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" - "\t\tParameters: '<iface> <ip4_next_hop>'\n" - "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" - "\t\tParameters: '<ip6_fib_table>'\n" - "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" - "\t\tParameters: '<ip4_fib_table>'\n"); + vlib_cli_output ( + vm, + "Default behaviors:\n" + "\tEnd\t-> Endpoint.\n" + "\tEnd.X\t-> Endpoint with Layer-3 cross-connect.\n" + "\tuN\t-> Endpoint with uSID.\n" + "\tuA\t-> Endpoint with uSID and Layer-3 cross-connect.\n" + "\t\tParameters: '<iface> <ip6_next_hop>'\n" + "\tEnd.T\t-> Endpoint with specific IPv6 table lookup.\n" + "\t\tParameters: '<fib_table>'\n" + "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" + "\t\tParameters: '<iface>'\n" + "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" + "\t\tParameters: '<iface> <ip6_next_hop>'\n" + "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" + "\t\tParameters: '<iface> <ip4_next_hop>'\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table " + "lookup.\n" + "\t\tParameters: '<ip6_fib_table>'\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table " + "lookup.\n" + "\t\tParameters: '<ip4_fib_table>'\n"); vlib_cli_output (vm, "Plugin behaviors:\n"); for (i = 0; i < vec_len (plugins_vec); i++) { diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c index 0aa88cc273e..a9114628f95 100644 --- a/src/vnet/srv6/sr_policy_rewrite.c +++ b/src/vnet/srv6/sr_policy_rewrite.c @@ -418,7 +418,7 @@ create_sl (ip6_sr_policy_t *sr_policy, ip6_address_t *sl, { plugin = pool_elt_at_index (sm->policy_plugin_functions, - sr_policy->plugin - SR_BEHAVIOR_LAST); + sr_policy->plugin - SR_BEHAVIOR_CURRENT_LAST); segment_list->plugin = sr_policy->plugin; segment_list->plugin_mem = sr_policy->plugin_mem; @@ -828,7 +828,7 @@ sr_policy_del (ip6_address_t * bsid, u32 index) plugin = pool_elt_at_index (sm->policy_plugin_functions, - sr_policy->plugin - SR_BEHAVIOR_LAST); + sr_policy->plugin - SR_BEHAVIOR_CURRENT_LAST); plugin->removal (sr_policy); sr_policy->plugin = 0; @@ -3499,7 +3499,7 @@ sr_policy_register_function (vlib_main_t * vm, u8 * fn_name, clib_memset (plugin, 0, sizeof (*plugin)); plugin->sr_policy_function_number = (plugin - sm->policy_plugin_functions); - plugin->sr_policy_function_number += SR_BEHAVIOR_LAST; + plugin->sr_policy_function_number += SR_BEHAVIOR_CURRENT_LAST; plugin->prefix_length = prefix_length; plugin->ls_format = ls_format; plugin->ls_unformat = ls_unformat; diff --git a/src/vnet/srv6/sr_types.api b/src/vnet/srv6/sr_types.api index 967eab0bd5a..7bc22c1a0f4 100644 --- a/src/vnet/srv6/sr_types.api +++ b/src/vnet/srv6/sr_types.api @@ -35,7 +35,10 @@ enum sr_behavior : u8 SR_BEHAVIOR_API_DX4 = 7, SR_BEHAVIOR_API_DT6 = 8, SR_BEHAVIOR_API_DT4 = 9, - SR_BEHAVIOR_API_LAST = 10, /* Must always be the last one */ + SR_BEHAVIOR_API_LAST = 10, /* Not used. Kept not to break the API */ + SR_BEHAVIOR_API_END_UN_PERF = 11 [backwards_compatible], + SR_BEHAVIOR_API_END_UN = 12 [backwards_compatible], + SR_BEHAVIOR_API_UA = 13 [backwards_compatible], }; enum sr_steer : u8 diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 70b5d28e0cc..cd3e4b7700c 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -2551,7 +2551,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node, tcp_connection_t *tc; tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index, thread_index); - if (tc->state != TCP_STATE_TIME_WAIT) + if (!tc || tc->state != TCP_STATE_TIME_WAIT) { tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1); goto done; diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c index 8754b67ca31..12dcbb449e8 100644 --- a/src/vnet/tls/tls.c +++ b/src/vnet/tls/tls.c @@ -628,16 +628,18 @@ tls_connect (transport_endpoint_cfg_t * tep) application_t *app; tls_ctx_t *ctx; u32 ctx_index; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return SESSION_E_NOEXTCFG; app_wrk = app_worker_get (sep->app_wrk_index); app = application_get (app_wrk->app_index); - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; engine_type = tls_get_engine_type (ccfg->crypto_engine, app->tls_engine); if (engine_type == CRYPTO_ENGINE_NONE) { @@ -709,16 +711,18 @@ tls_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) app_listener_t *al; tls_ctx_t *lctx; u32 lctx_index; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return SESSION_E_NOEXTCFG; app_wrk = app_worker_get (sep->app_wrk_index); app = application_get (app_wrk->app_index); - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; engine_type = tls_get_engine_type (ccfg->crypto_engine, app->tls_engine); if (engine_type == CRYPTO_ENGINE_NONE) { @@ -1115,16 +1119,18 @@ dtls_connect (transport_endpoint_cfg_t *tep) application_t *app; tls_ctx_t *ctx; u32 ctx_handle; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return -1; app_wrk = app_worker_get (sep->app_wrk_index); app = application_get (app_wrk->app_index); - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; engine_type = tls_get_engine_type (ccfg->crypto_engine, app->tls_engine); if (engine_type == CRYPTO_ENGINE_NONE) { diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c index 8deeb9e41ee..1fc055f8d50 100644 --- a/src/vnet/udp/udp.c +++ b/src/vnet/udp/udp.c @@ -467,16 +467,7 @@ udp_open_connection (transport_endpoint_cfg_t * rmt) uc->mss = rmt->mss ? rmt->mss : udp_default_mtu (um, uc->c_is_ip4); if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX) uc->sw_if_index = rmt->peer.sw_if_index; - uc->flags |= UDP_CONN_F_OWNS_PORT; - if (rmt->transport_flags & TRANSPORT_CFG_F_CONNECTED) - { - uc->flags |= UDP_CONN_F_CONNECTED; - } - else - { - clib_spinlock_init (&uc->rx_lock); - uc->c_flags |= TRANSPORT_CONNECTION_F_CLESS; - } + uc->flags |= UDP_CONN_F_OWNS_PORT | UDP_CONN_F_CONNECTED; if (!um->csum_offload) uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD; uc->next_node_index = rmt->next_node_index; diff --git a/src/vnet/udp/udp_local.h b/src/vnet/udp/udp_local.h index 16286824ef2..06c7b3f1758 100644 --- a/src/vnet/udp/udp_local.h +++ b/src/vnet/udp/udp_local.h @@ -18,42 +18,43 @@ #include <vnet/vnet.h> -#define foreach_udp4_dst_port \ -_ (53, dns) \ -_ (67, dhcp_to_server) \ -_ (68, dhcp_to_client) \ -_ (500, ikev2) \ -_ (2152, GTPU) \ -_ (3784, bfd4) \ -_ (3785, bfd_echo4) \ -_ (4341, lisp_gpe) \ -_ (4342, lisp_cp) \ -_ (4500, ipsec) \ -_ (4739, ipfix) \ -_ (4789, vxlan) \ -_ (4789, vxlan6) \ -_ (48879, vxlan_gbp) \ -_ (4790, VXLAN_GPE) \ -_ (6633, vpath_3) \ -_ (6081, geneve) \ -_ (53053, dns_reply) +#define foreach_udp4_dst_port \ + _ (53, dns) \ + _ (67, dhcp_to_server) \ + _ (68, dhcp_to_client) \ + _ (500, ikev2) \ + _ (2152, GTPU) \ + _ (3784, bfd4) \ + _ (3785, bfd_echo4) \ + _ (4341, lisp_gpe) \ + _ (4342, lisp_cp) \ + _ (4500, ipsec) \ + _ (4739, ipfix) \ + _ (4784, bfd4_mh) \ + _ (4789, vxlan) \ + _ (4789, vxlan6) \ + _ (48879, vxlan_gbp) \ + _ (4790, VXLAN_GPE) \ + _ (6633, vpath_3) \ + _ (6081, geneve) \ + _ (53053, dns_reply) - -#define foreach_udp6_dst_port \ -_ (53, dns6) \ -_ (547, dhcpv6_to_server) \ -_ (546, dhcpv6_to_client) \ -_ (2152, GTPU6) \ -_ (3784, bfd6) \ -_ (3785, bfd_echo6) \ -_ (4341, lisp_gpe6) \ -_ (4342, lisp_cp6) \ -_ (48879, vxlan6_gbp) \ -_ (4790, VXLAN6_GPE) \ -_ (6633, vpath6_3) \ -_ (6081, geneve6) \ -_ (8138, BIER) \ -_ (53053, dns_reply6) +#define foreach_udp6_dst_port \ + _ (53, dns6) \ + _ (547, dhcpv6_to_server) \ + _ (546, dhcpv6_to_client) \ + _ (2152, GTPU6) \ + _ (3784, bfd6) \ + _ (3785, bfd_echo6) \ + _ (4341, lisp_gpe6) \ + _ (4342, lisp_cp6) \ + _ (48879, vxlan6_gbp) \ + _ (4784, bfd6_mh) \ + _ (4790, VXLAN6_GPE) \ + _ (6633, vpath6_3) \ + _ (6081, geneve6) \ + _ (8138, BIER) \ + _ (53053, dns_reply6) typedef enum { |