diff options
Diffstat (limited to 'src')
157 files changed, 10047 insertions, 3278 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d5c7fd1c718..1fbb624b972 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -164,7 +164,7 @@ elseif (${CMAKE_BUILD_TYPE_LC} MATCHES "coverity") add_compile_options(-O0) add_compile_definitions(__COVERITY__) elseif (${CMAKE_BUILD_TYPE_LC} MATCHES "gcov") - add_compile_options(-O0 -fprofile-arcs -ftest-coverage) + add_compile_options(-O0 -fprofile-arcs -ftest-coverage -fprofile-update=prefer-atomic) add_compile_definitions(CLIB_DEBUG CLIB_GCOV) link_libraries(gcov) endif() diff --git a/src/plugins/af_xdp/af_xdp.api b/src/plugins/af_xdp/af_xdp.api index 4c2908e2037..20aa20b4d7d 100644 --- a/src/plugins/af_xdp/af_xdp.api +++ b/src/plugins/af_xdp/af_xdp.api @@ -33,96 +33,6 @@ enumflag af_xdp_flag : u8 /** \brief @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param host_if - Linux netdev interface name - @param name - new af_xdp interface name (optional) - @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional) - @param rxq_size - receive queue size (optional) - @param txq_size - transmit queue size (optional) - @param mode - operation mode (optional) - @param flags - flags (optional) - @param prog - eBPF program path (optional) -*/ - -define af_xdp_create -{ - u32 client_index; - u32 context; - - string host_if[64]; - string name[64]; - u16 rxq_num [default=1]; - u16 rxq_size [default=0]; - u16 txq_size [default=0]; - vl_api_af_xdp_mode_t mode [default=0]; - vl_api_af_xdp_flag_t flags [default=0]; - string prog[256]; - option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [zero-copy|no-zero-copy] [no-syscall-lock]"; - option deprecated; -}; - -/** \brief - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param host_if - Linux netdev interface name - @param name - new af_xdp interface name (optional) - @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional) - @param rxq_size - receive queue size (optional) - @param txq_size - transmit queue size (optional) - @param mode - operation mode (optional) - @param flags - flags (optional) - @param prog - eBPF program path (optional) - @param namespace - netns of nic (optional) -*/ - -define af_xdp_create_v2 -{ - u32 client_index; - u32 context; - - string host_if[64]; - string name[64]; - u16 rxq_num [default=1]; - u16 rxq_size [default=0]; - u16 txq_size [default=0]; - vl_api_af_xdp_mode_t mode [default=0]; - vl_api_af_xdp_flag_t flags [default=0]; - string prog[256]; - string namespace[64]; - option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]"; - option deprecated; -}; - -/** \brief - @param context - sender context, to match reply w/ request - @param retval - return value for request - @param sw_if_index - software index for the new af_xdp interface -*/ - -define af_xdp_create_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; - option deprecated; -}; - -/** \brief - @param context - sender context, to match reply w/ request - @param retval - return value for request - @param sw_if_index - software index for the new af_xdp interface -*/ - -define af_xdp_create_v2_reply -{ - u32 context; - i32 retval; - vl_api_interface_index_t sw_if_index; - option deprecated; -}; - -/** \brief - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request @param sw_if_index - interface index */ diff --git a/src/plugins/af_xdp/api.c b/src/plugins/af_xdp/api.c index 3e9a3fe2578..9ead9856ff5 100644 --- a/src/plugins/af_xdp/api.c +++ b/src/plugins/af_xdp/api.c @@ -57,65 +57,6 @@ af_xdp_api_flags (vl_api_af_xdp_flag_t flags) } static void -vl_api_af_xdp_create_t_handler (vl_api_af_xdp_create_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - af_xdp_main_t *rm = &af_xdp_main; - vl_api_af_xdp_create_reply_t *rmp; - af_xdp_create_if_args_t args; - int rv; - - clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t)); - - args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0; - args.name = mp->name[0] ? (char *) mp->name : 0; - args.prog = mp->prog[0] ? (char *) mp->prog : 0; - args.mode = af_xdp_api_mode (mp->mode); - args.flags = af_xdp_api_flags (mp->flags); - args.rxq_size = ntohs (mp->rxq_size); - args.txq_size = ntohs (mp->txq_size); - args.rxq_num = ntohs (mp->rxq_num); - - af_xdp_create_if (vm, &args); - rv = args.rv; - - REPLY_MACRO2 (VL_API_AF_XDP_CREATE_REPLY, - ({ rmp->sw_if_index = ntohl (args.sw_if_index); })); -} - -static void -vl_api_af_xdp_create_v2_t_handler (vl_api_af_xdp_create_v2_t *mp) -{ - vlib_main_t *vm = vlib_get_main (); - af_xdp_main_t *rm = &af_xdp_main; - vl_api_af_xdp_create_v2_reply_t *rmp; - af_xdp_create_if_args_t args; - int rv; - - clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t)); - - args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0; - args.name = mp->name[0] ? (char *) mp->name : 0; - args.prog = mp->prog[0] ? (char *) mp->prog : 0; - args.netns = mp->namespace[0] ? (char *) mp->namespace : 0; - args.mode = af_xdp_api_mode (mp->mode); - args.flags = af_xdp_api_flags (mp->flags); - args.rxq_size = ntohs (mp->rxq_size); - args.txq_size = ntohs (mp->txq_size); - args.rxq_num = ntohs (mp->rxq_num); - - af_xdp_create_if (vm, &args); - rv = args.rv; - - /* clang-format off */ - REPLY_MACRO2 (VL_API_AF_XDP_CREATE_V2_REPLY, - ({ - rmp->sw_if_index = ntohl (args.sw_if_index); - })); - /* clang-format on */ -} - -static void vl_api_af_xdp_create_v3_t_handler (vl_api_af_xdp_create_v3_t *mp) { vlib_main_t *vm = vlib_get_main (); diff --git a/src/plugins/af_xdp/test_api.c b/src/plugins/af_xdp/test_api.c index 581697e341d..5f622adcb04 100644 --- a/src/plugins/af_xdp/test_api.c +++ b/src/plugins/af_xdp/test_api.c @@ -58,75 +58,7 @@ api_af_xdp_mode (af_xdp_mode_t mode) return ~0; } -/* af_xdp create API */ -static int -api_af_xdp_create (vat_main_t * vam) -{ - vl_api_af_xdp_create_t *mp; - af_xdp_create_if_args_t args; - int ret; - - if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args)) - { - clib_warning ("unknown input `%U'", format_unformat_error, vam->input); - return -99; - } - - M (AF_XDP_CREATE, mp); - - snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", - args.linux_ifname ? : ""); - snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ? : ""); - mp->rxq_num = clib_host_to_net_u16 (args.rxq_num); - mp->rxq_size = clib_host_to_net_u16 (args.rxq_size); - mp->txq_size = clib_host_to_net_u16 (args.txq_size); - mp->mode = api_af_xdp_mode (args.mode); - if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK) - mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK; - snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ? : ""); - - S (mp); - W (ret); - - return ret; -} - -/* af_xdp create v2 API */ -static int -api_af_xdp_create_v2 (vat_main_t *vam) -{ - vl_api_af_xdp_create_v2_t *mp; - af_xdp_create_if_args_t args; - int ret; - - if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args)) - { - clib_warning ("unknown input `%U'", format_unformat_error, vam->input); - return -99; - } - - M (AF_XDP_CREATE, mp); - - snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", - args.linux_ifname ?: ""); - snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ?: ""); - snprintf ((char *) mp->namespace, sizeof (mp->namespace), "%s", - args.netns ?: ""); - mp->rxq_num = clib_host_to_net_u16 (args.rxq_num); - mp->rxq_size = clib_host_to_net_u16 (args.rxq_size); - mp->txq_size = clib_host_to_net_u16 (args.txq_size); - mp->mode = api_af_xdp_mode (args.mode); - if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK) - mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK; - snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ?: ""); - - S (mp); - W (ret); - - return ret; -} - -/* af_xdp create v2 API */ +/* af_xdp create v3 API */ static int api_af_xdp_create_v3 (vat_main_t *vam) { @@ -140,7 +72,7 @@ api_af_xdp_create_v3 (vat_main_t *vam) return -99; } - M (AF_XDP_CREATE, mp); + M (AF_XDP_CREATE_V3, mp); snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", args.linux_ifname ?: ""); @@ -160,45 +92,9 @@ api_af_xdp_create_v3 (vat_main_t *vam) return ret; } -/* af_xdp-create reply handler */ -static void -vl_api_af_xdp_create_reply_t_handler (vl_api_af_xdp_create_reply_t * mp) -{ - vat_main_t *vam = af_xdp_test_main.vat_main; - i32 retval = ntohl (mp->retval); - - if (retval == 0) - { - fformat (vam->ofp, "created af_xdp with sw_if_index %d\n", - ntohl (mp->sw_if_index)); - } - - vam->retval = retval; - vam->result_ready = 1; - vam->regenerate_interface_table = 1; -} - -/* af_xdp-create v2 reply handler */ -static void -vl_api_af_xdp_create_v2_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp) -{ - vat_main_t *vam = af_xdp_test_main.vat_main; - i32 retval = ntohl (mp->retval); - - if (retval == 0) - { - fformat (vam->ofp, "created af_xdp with sw_if_index %d\n", - ntohl (mp->sw_if_index)); - } - - vam->retval = retval; - vam->result_ready = 1; - vam->regenerate_interface_table = 1; -} - /* af_xdp-create v3 reply handler */ static void -vl_api_af_xdp_create_v3_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp) +vl_api_af_xdp_create_v3_reply_t_handler (vl_api_af_xdp_create_v3_reply_t *mp) { vat_main_t *vam = af_xdp_test_main.vat_main; i32 retval = mp->retval; diff --git a/src/plugins/crypto_native/FEATURE.yaml b/src/plugins/crypto_native/FEATURE.yaml index 06f26d4a8cf..d54816d673f 100644 --- a/src/plugins/crypto_native/FEATURE.yaml +++ b/src/plugins/crypto_native/FEATURE.yaml @@ -4,6 +4,9 @@ maintainer: Damjan Marion <damarion@cisco.com> features: - CBC(128, 192, 256) - GCM(128, 192, 256) + - CTR(128, 192, 256) + - SHA(224, 256) + - HMAC-SHA(224, 256) description: "An implementation of a native crypto-engine" state: production diff --git a/src/plugins/dev_armada/CMakeLists.txt b/src/plugins/dev_armada/CMakeLists.txt index f955a9baa91..e755e7bdd46 100644 --- a/src/plugins/dev_armada/CMakeLists.txt +++ b/src/plugins/dev_armada/CMakeLists.txt @@ -16,6 +16,7 @@ set(MUSDK_LINK_FLAGS "-Wl,--whole-archive,${MUSDK_LIB_DIR}/libmusdk.a,--no-whole add_vpp_plugin(dev_armada SOURCES plugin.c + pp2/counters.c pp2/init.c pp2/format.c pp2/port.c diff --git a/src/plugins/dev_armada/pp2/counters.c b/src/plugins/dev_armada/pp2/counters.c new file mode 100644 index 00000000000..a041138bc79 --- /dev/null +++ b/src/plugins/dev_armada/pp2/counters.c @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/counters.h> +#include <vnet/dev/bus/platform.h> +#include <vppinfra/ring.h> +#include <dev_armada/musdk.h> +#include <dev_armada/pp2/pp2.h> + +VLIB_REGISTER_LOG_CLASS (mvpp2_log, static) = { + .class_name = "armada", + .subclass_name = "pp2-counters", +}; + +typedef enum +{ + MVPP2_PORT_CTR_RX_BYTES, + MVPP2_PORT_CTR_RX_PACKETS, + MVPP2_PORT_CTR_RX_UCAST, + MVPP2_PORT_CTR_RX_ERRORS, + MVPP2_PORT_CTR_RX_FULLQ_DROPPED, + MVPP2_PORT_CTR_RX_BM_DROPPED, + MVPP2_PORT_CTR_RX_EARLY_DROPPED, + MVPP2_PORT_CTR_RX_FIFO_DROPPED, + MVPP2_PORT_CTR_RX_CLS_DROPPED, + + MVPP2_PORT_CTR_TX_BYTES, + MVPP2_PORT_CTR_TX_PACKETS, + MVPP2_PORT_CTR_TX_UCAST, + MVPP2_PORT_CTR_TX_ERRORS, +} mvpp2_port_counter_id_t; + +typedef enum +{ + MVPP2_RXQ_CTR_ENQ_DESC, + MVPP2_RXQ_CTR_DROP_FULLQ, + MVPP2_RXQ_CTR_DROP_EARLY, + MVPP2_RXQ_CTR_DROP_BM, +} mvpp2_rxq_counter_id_t; + +typedef enum +{ + MVPP2_TXQ_CTR_ENQ_DESC, + MVPP2_TXQ_CTR_ENQ_DEC_TO_DDR, + MVPP2_TXQ_CTR_ENQ_BUF_TO_DDR, + MVPP2_TXQ_CTR_DEQ_DESC, +} mvpp2_txq_counter_id_t; + +static vnet_dev_counter_t mvpp2_port_counters[] = { + VNET_DEV_CTR_RX_BYTES (MVPP2_PORT_CTR_RX_BYTES), + VNET_DEV_CTR_RX_PACKETS (MVPP2_PORT_CTR_RX_PACKETS), + VNET_DEV_CTR_RX_DROPS (MVPP2_PORT_CTR_RX_ERRORS), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_FULLQ_DROPPED, RX, PACKETS, + "fullq dropped"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_BM_DROPPED, RX, PACKETS, + "bm dropped"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_EARLY_DROPPED, RX, PACKETS, + "early dropped"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_FIFO_DROPPED, RX, PACKETS, + "fifo dropped"), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_CLS_DROPPED, RX, PACKETS, + "cls dropped"), + + VNET_DEV_CTR_TX_BYTES (MVPP2_PORT_CTR_TX_BYTES), + VNET_DEV_CTR_TX_PACKETS (MVPP2_PORT_CTR_TX_PACKETS), + VNET_DEV_CTR_TX_DROPS (MVPP2_PORT_CTR_TX_ERRORS), + VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"), +}; + +static vnet_dev_counter_t mvpp2_rxq_counters[] = { + VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_ENQ_DESC, RX, DESCRIPTORS, "enqueued"), + VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_DROP_FULLQ, RX, PACKETS, "drop fullQ"), + VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_DROP_EARLY, RX, PACKETS, "drop early"), + VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_DROP_BM, RX, PACKETS, "drop BM"), +}; + +static vnet_dev_counter_t mvpp2_txq_counters[] = { + VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_ENQ_DESC, TX, DESCRIPTORS, "enqueued"), + VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_DEQ_DESC, TX, PACKETS, "dequeued"), + VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_ENQ_BUF_TO_DDR, TX, BUFFERS, + "enq to DDR"), + VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_ENQ_DEC_TO_DDR, TX, DESCRIPTORS, + "enq to DDR"), +}; + +void +mvpp2_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_port_add_counters (vm, port, mvpp2_port_counters, + ARRAY_LEN (mvpp2_port_counters)); + + foreach_vnet_dev_port_rx_queue (q, port) + vnet_dev_rx_queue_add_counters (vm, q, mvpp2_rxq_counters, + ARRAY_LEN (mvpp2_rxq_counters)); + + foreach_vnet_dev_port_tx_queue (q, port) + vnet_dev_tx_queue_add_counters (vm, q, mvpp2_txq_counters, + ARRAY_LEN (mvpp2_txq_counters)); +} + +void +mvpp2_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + struct pp2_ppio_statistics stats; + pp2_ppio_get_statistics (mp->ppio, &stats, 1); +} + +void +mvpp2_rxq_clear_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *q) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (q->port); + struct pp2_ppio_inq_statistics stats; + pp2_ppio_inq_get_statistics (mp->ppio, 0, q->queue_id, &stats, 1); +} + +void +mvpp2_txq_clear_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *q) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (q->port); + struct pp2_ppio_inq_statistics stats; + pp2_ppio_inq_get_statistics (mp->ppio, 0, q->queue_id, &stats, 1); +} + +vnet_dev_rv_t +mvpp2_port_get_stats (vlib_main_t *vm, vnet_dev_port_t *port) +{ + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + struct pp2_ppio_statistics stats; + pp2_ppio_get_statistics (mp->ppio, &stats, 0); + + foreach_vnet_dev_counter (c, port->counter_main) + { + switch (c->user_data) + { + case MVPP2_PORT_CTR_RX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.rx_bytes); + break; + case MVPP2_PORT_CTR_RX_PACKETS: + vnet_dev_counter_value_update (vm, c, stats.rx_packets); + break; + case MVPP2_PORT_CTR_RX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_unicast_packets); + break; + case MVPP2_PORT_CTR_RX_ERRORS: + vnet_dev_counter_value_update (vm, c, stats.rx_errors); + break; + case MVPP2_PORT_CTR_TX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.tx_bytes); + break; + case MVPP2_PORT_CTR_TX_PACKETS: + vnet_dev_counter_value_update (vm, c, stats.tx_packets); + break; + case MVPP2_PORT_CTR_TX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_unicast_packets); + break; + case MVPP2_PORT_CTR_TX_ERRORS: + vnet_dev_counter_value_update (vm, c, stats.tx_errors); + break; + case MVPP2_PORT_CTR_RX_FULLQ_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_fullq_dropped); + break; + case MVPP2_PORT_CTR_RX_BM_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_bm_dropped); + break; + case MVPP2_PORT_CTR_RX_EARLY_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_early_dropped); + break; + case MVPP2_PORT_CTR_RX_FIFO_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_fifo_dropped); + break; + case MVPP2_PORT_CTR_RX_CLS_DROPPED: + vnet_dev_counter_value_update (vm, c, stats.rx_cls_dropped); + break; + + default: + ASSERT (0); + } + } + + foreach_vnet_dev_port_rx_queue (q, port) + { + struct pp2_ppio_inq_statistics stats; + pp2_ppio_inq_get_statistics (mp->ppio, 0, q->queue_id, &stats, 0); + + foreach_vnet_dev_counter (c, q->counter_main) + { + switch (c->user_data) + { + case MVPP2_RXQ_CTR_ENQ_DESC: + vnet_dev_counter_value_update (vm, c, stats.enq_desc); + break; + case MVPP2_RXQ_CTR_DROP_BM: + vnet_dev_counter_value_update (vm, c, stats.drop_bm); + break; + case MVPP2_RXQ_CTR_DROP_EARLY: + vnet_dev_counter_value_update (vm, c, stats.drop_early); + break; + case MVPP2_RXQ_CTR_DROP_FULLQ: + vnet_dev_counter_value_update (vm, c, stats.drop_fullq); + break; + default: + ASSERT (0); + } + } + } + + foreach_vnet_dev_port_tx_queue (q, port) + { + struct pp2_ppio_outq_statistics stats; + pp2_ppio_outq_get_statistics (mp->ppio, q->queue_id, &stats, 0); + + foreach_vnet_dev_counter (c, q->counter_main) + { + switch (c->user_data) + { + case MVPP2_TXQ_CTR_ENQ_DESC: + vnet_dev_counter_value_update (vm, c, stats.enq_desc); + break; + case MVPP2_TXQ_CTR_DEQ_DESC: + vnet_dev_counter_value_update (vm, c, stats.deq_desc); + break; + case MVPP2_TXQ_CTR_ENQ_BUF_TO_DDR: + vnet_dev_counter_value_update (vm, c, stats.enq_buf_to_ddr); + break; + case MVPP2_TXQ_CTR_ENQ_DEC_TO_DDR: + vnet_dev_counter_value_update (vm, c, stats.enq_dec_to_ddr); + break; + default: + ASSERT (0); + } + } + } + + return VNET_DEV_OK; +} diff --git a/src/plugins/dev_armada/pp2/format.c b/src/plugins/dev_armada/pp2/format.c index 37d482b5ce8..42c4114c512 100644 --- a/src/plugins/dev_armada/pp2/format.c +++ b/src/plugins/dev_armada/pp2/format.c @@ -152,25 +152,47 @@ format_mvpp2_rx_desc (u8 *s, va_list *args) s = format (s, " "); foreach_pp2_rx_desc_field; +#undef _ return s; } u8 * +format_mv_dsa_tag (u8 *s, va_list *args) +{ + mv_dsa_tag_t *tag = va_arg (*args, mv_dsa_tag_t *); + u32 cnt = 0; + +#define _(b, n) \ + if (#n[0] != '_') \ + s = format (s, "%s" #n " %u", cnt++ ? " " : "", tag->n); + foreach_mv_dsa_tag_field +#undef _ + return s; +} + +u8 * format_mvpp2_rx_trace (u8 *s, va_list *args) { vlib_main_t *vm = va_arg (*args, vlib_main_t *); vlib_node_t *node = va_arg (*args, vlib_node_t *); mvpp2_rx_trace_t *t = va_arg (*args, mvpp2_rx_trace_t *); vnet_main_t *vnm = vnet_get_main (); - u32 hw_if_index = t->rxq->port->intf.hw_if_index; - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); u32 indent = format_get_indent (s); struct pp2_ppio_desc *d = &t->desc; - s = format (s, "pp2: %v (%d) next-node %U", hi->name, hw_if_index, - format_vlib_next_node_name, vm, node->index, t->rxq->next_index); + if (t->sw_if_index != CLIB_U32_MAX) + s = format (s, "pp2: %U (%d) next-node %U", format_vnet_sw_if_index_name, + vnm, t->sw_if_index, t->sw_if_index, + format_vlib_next_node_name, vm, node->index, t->next_index); + else + s = format (s, "pp2: next-node %U", format_vlib_next_node_name, vm, + node->index, t->next_index); + s = format (s, "\n%U%U", format_white_space, indent + 2, format_mvpp2_rx_desc, d); + if (t->dsa_tag.as_u32) + s = format (s, "\n%Udsa tag: %U", format_white_space, indent + 2, + format_mv_dsa_tag, &t->dsa_tag); return s; } diff --git a/src/plugins/dev_armada/pp2/init.c b/src/plugins/dev_armada/pp2/init.c index 38ff32d8f53..4333dbb352f 100644 --- a/src/plugins/dev_armada/pp2/init.c +++ b/src/plugins/dev_armada/pp2/init.c @@ -201,6 +201,7 @@ mvpp2_init (vlib_main_t *vm, vnet_dev_t *dev) vnet_dev_rv_t rv = VNET_DEV_OK; vnet_dev_bus_platform_device_data_t *dd = vnet_dev_get_bus_data (dev); clib_dt_node_t *sc; + clib_dt_node_t *sw = 0; int pp_id = -1; if (!clib_dt_node_is_compatible (dd->node, "marvell,armada-7k-pp22")) @@ -219,12 +220,55 @@ mvpp2_init (vlib_main_t *vm, vnet_dev_t *dev) if (pp_id < 0) return VNET_DEV_ERR_UNKNOWN_DEVICE; + foreach_clib_dt_tree_node (n, clib_dt_get_root_node (sc)) + if (clib_dt_node_is_compatible (n, "marvell,mv88e6190") || + clib_dt_node_is_compatible (n, "marvell,mv88e6393x")) + { + clib_dt_node_t *ports; + sw = n; + log_debug (dev, "found mv88e6190 compatible switch at %v", n->path); + ports = clib_dt_get_child_node (sw, "ports"); + foreach_clib_dt_child_node (pn, ports) + { + u32 reg = CLIB_U32_MAX; + char *label = "(no label)"; + clib_dt_property_t *p; + clib_dt_node_t *n; + + p = clib_dt_get_node_property_by_name (pn, "reg"); + if (p) + reg = clib_dt_property_get_u32 (p); + p = clib_dt_get_node_property_by_name (pn, "label"); + if (p) + label = clib_dt_property_get_string (p); + + log_debug (dev, "port %u label %s", reg, label); + + n = clib_dt_dereference_node (pn, "phy-handle"); + if (n) + log_debug (dev, " phy is %v", n->path); + + n = clib_dt_dereference_node (pn, "sfp"); + if (n) + log_debug (dev, " sfp is %v", n->path); + + n = clib_dt_dereference_node (pn, "ethernet"); + if (n) + log_debug (dev, " connected to %v", n->path); + + p = clib_dt_get_node_property_by_name (pn, "phy-mode"); + if (p) + log_debug (dev, " phy mode is %s", + clib_dt_property_get_string (p)); + } + } + if ((mvpp2_global_init (vm, dev)) != VNET_DEV_OK) return rv; md->pp_id = pp_id; - vec_foreach_pointer (cn, dd->node->child_nodes) + foreach_clib_dt_child_node (cn, dd->node) { clib_dt_property_t *p; char netdev_name[IFNAMSIZ]; @@ -234,10 +278,10 @@ mvpp2_init (vlib_main_t *vm, vnet_dev_t *dev) p = clib_dt_get_node_property_by_name (cn, "port-id"); - if (!clib_dt_proprerty_is_u32 (p)) + if (!clib_dt_property_is_u32 (p)) continue; - ppio_id = clib_dt_proprerty_get_u32 (p); + ppio_id = clib_dt_property_get_u32 (p); log_debug (dev, "found port with ppio id %u", ppio_id); if (pp2_ppio_available (md->pp_id, ppio_id) == 0) @@ -271,6 +315,28 @@ mvpp2_init (vlib_main_t *vm, vnet_dev_t *dev) .ppio_id = ppio_id, }; + if (sw) + { + clib_dt_node_t *ports = clib_dt_get_child_node (sw, "ports"); + if (ports) + foreach_clib_dt_child_node (sp, ports) + { + clib_dt_node_t *eth; + + eth = clib_dt_dereference_node (sp, "ethernet"); + + if (cn != eth) + continue; + + mvpp2_port.is_dsa = 1; + mvpp2_port.switch_node = sw; + mvpp2_port.switch_port_node = sp; + log_debug (dev, "port is connected to switch port %v", + sp->path); + break; + } + } + vnet_dev_port_add_args_t port_add_args = { .port = { .attr = { @@ -278,18 +344,26 @@ mvpp2_init (vlib_main_t *vm, vnet_dev_t *dev) .max_rx_queues = PP2_PPIO_MAX_NUM_INQS, .max_tx_queues = PP2_PPIO_MAX_NUM_OUTQS, .max_supported_rx_frame_size = 9216, + .caps.secondary_interfaces = mvpp2_port.is_dsa != 0, }, .ops = { .init = mvpp2_port_init, .deinit = mvpp2_port_deinit, .start = mvpp2_port_start, .stop = mvpp2_port_stop, + .add_sec_if = mvpp2_port_add_sec_if, + .del_sec_if = mvpp2_port_del_sec_if, .config_change = mvpp2_port_cfg_change, .config_change_validate = mvpp2_port_cfg_change_validate, .format_status = format_mvpp2_port_status, + .clear_counters = mvpp2_port_clear_counters, }, .data_size = sizeof (mvpp2_port_t), .initial_data = &mvpp2_port, + .sec_if_args = VNET_DEV_ARGS ( + VNET_DEV_ARG_UINT32 (MVPP2_SEC_IF_ARG_DSA_SWITCH, "dsa_switch", "DSA source switch ID", .max= 31), + VNET_DEV_ARG_UINT32 (MVPP2_SEC_IF_ARG_DSA_PORT, "dsa_port", "DSA source switch port ID", .max = 31) + ), }, .rx_node = &mvpp2_rx_node, .tx_node = &mvpp2_tx_node, @@ -302,6 +376,9 @@ mvpp2_init (vlib_main_t *vm, vnet_dev_t *dev) .max_size = 4096, .size_is_power_of_two = 1, }, + .ops = { + .clear_counters = mvpp2_rxq_clear_counters, + }, }, .tx_queue = { .config = { @@ -315,6 +392,7 @@ mvpp2_init (vlib_main_t *vm, vnet_dev_t *dev) .ops = { .alloc = mvpp2_txq_alloc, .free = mvpp2_txq_free, + .clear_counters = mvpp2_txq_clear_counters, }, }, }; diff --git a/src/plugins/dev_armada/pp2/port.c b/src/plugins/dev_armada/pp2/port.c index 8e785e5e0e4..63a212e80c2 100644 --- a/src/plugins/dev_armada/pp2/port.c +++ b/src/plugins/dev_armada/pp2/port.c @@ -23,6 +23,7 @@ mvpp2_port_init (vlib_main_t *vm, vnet_dev_port_t *port) mvpp2_device_t *md = vnet_dev_get_data (dev); mvpp2_port_t *mp = vnet_dev_get_port_data (port); vnet_dev_rv_t rv = VNET_DEV_OK; + vnet_dev_rx_queue_t *rxq0 = vnet_dev_get_port_rx_queue_by_id (port, 0); struct pp2_ppio_link_info li; char match[16]; int mrv; @@ -40,17 +41,17 @@ mvpp2_port_init (vlib_main_t *vm, vnet_dev_port_t *port) .tcs_params[0] = { .pkt_offset = 0, .num_in_qs = 1, - .inqs_params = &(struct pp2_ppio_inq_params) { .size = 512 }, - .pools[0][0] = md->thread[0].bpool, + .inqs_params = &(struct pp2_ppio_inq_params) { .size = rxq0->size }, + .pools[0][0] = md->thread[rxq0->rx_thread_index].bpool, }, }, }; - foreach_vnet_dev_port_rx_queue (q, port) + foreach_vnet_dev_port_tx_queue (q, port) { struct pp2_ppio_outqs_params *oqs = &ppio_params.outqs_params; - oqs->outqs_params[0].weight = 1; - oqs->outqs_params[0].size = q->size; + oqs->outqs_params[q->queue_id].weight = 1; + oqs->outqs_params[q->queue_id].size = q->size; oqs->num_outqs++; } @@ -75,6 +76,11 @@ mvpp2_port_init (vlib_main_t *vm, vnet_dev_port_t *port) log_debug (dev, "port %u %U", port->port_id, format_pp2_ppio_link_info, &li); + for (u32 i = 0; i < VLIB_FRAME_SIZE; i++) + mp->desc_ptrs[i] = mp->descs + i; + + mvpp2_port_add_counters (vm, port); + done: if (rv != VNET_DEV_OK) mvpp2_port_stop (vm, port); @@ -145,12 +151,13 @@ mvpp2_port_poll (vlib_main_t *vm, vnet_dev_port_t *port) } } - if (changes.change.any == 0) - return; - - mp->last_link_info = li; + if (changes.change.any) + { + mp->last_link_info = li; + vnet_dev_port_state_change (vm, port, changes); + } - vnet_dev_port_state_change (vm, port, changes); + mvpp2_port_get_stats (vm, port); } vnet_dev_rv_t @@ -203,6 +210,68 @@ mvpp2_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) } vnet_dev_rv_t +mvpp2_port_add_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *p) +{ + vnet_dev_port_interface_t *sif = p; + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + u32 port_id = CLIB_U32_MAX, switch_id = 0, index; + + if (mp->is_dsa == 0) + return VNET_DEV_ERR_NOT_SUPPORTED; + + foreach_vnet_dev_args (a, sif) + { + switch (a->id) + { + case MVPP2_SEC_IF_ARG_DSA_PORT: + if (a->val_set) + port_id = vnet_dev_arg_get_uint32 (a); + break; + case MVPP2_SEC_IF_ARG_DSA_SWITCH: + switch_id = vnet_dev_arg_get_uint32 (a); + break; + default: + break; + } + } + + if (port_id == CLIB_U32_MAX) + { + log_err (port->dev, "missing dsa_port argument"); + return VNET_DEV_ERR_INVALID_ARG; + } + + log_debug (port->dev, "switch %u port %u", switch_id, port_id); + + mv_dsa_tag_t tag = { + .tag_type = MV_DSA_TAG_TYPE_FROM_CPU, + .src_port_or_lag = port_id, + .src_dev = switch_id, + }; + + index = switch_id << 5 | port_id; + + sif->user_data = tag.as_u32; + uword_bitmap_set_bits_at_index (mp->valid_dsa_src_bitmap, index, 1); + mp->dsa_to_sec_if[index] = sif->index; + return VNET_DEV_OK; +} + +vnet_dev_rv_t +mvpp2_port_del_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *p) +{ + vnet_dev_port_interface_t *sif = p; + mvpp2_port_t *mp = vnet_dev_get_port_data (port); + mv_dsa_tag_t tag = { .as_u32 = sif->user_data }; + u32 index = tag.src_dev << 5 | tag.src_port_or_lag; + + log_debug (port->dev, "switch %u port %u", tag.src_dev, tag.src_port_or_lag); + + uword_bitmap_clear_bits_at_index (mp->valid_dsa_src_bitmap, index, 1); + return VNET_DEV_OK; +} + +vnet_dev_rv_t mvpp2_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_cfg_change_req_t *req) { @@ -211,6 +280,7 @@ mvpp2_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port, switch (req->type) { case VNET_DEV_PORT_CFG_PROMISC_MODE: + case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR: break; @@ -246,6 +316,19 @@ mvpp2_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, req->promisc); break; + case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + clib_memcpy (&addr, req->addr.eth_mac, sizeof (addr)); + mrv = pp2_ppio_set_mac_addr (mp->ppio, addr); + if (mrv) + { + log_err (port->dev, "pp2_ppio_set_mac_addr: failed, rv %d", mrv); + rv = VNET_DEV_ERR_INTERNAL; + } + else + log_debug (port->dev, "pp2_ppio_set_mac_addr: %U added", + format_ethernet_address, &addr); + break; + case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: clib_memcpy (&addr, req->addr.eth_mac, sizeof (addr)); mrv = pp2_ppio_add_mac_addr (mp->ppio, addr); diff --git a/src/plugins/dev_armada/pp2/pp2.h b/src/plugins/dev_armada/pp2/pp2.h index 6b12dc737a7..160bfd20c5c 100644 --- a/src/plugins/dev_armada/pp2/pp2.h +++ b/src/plugins/dev_armada/pp2/pp2.h @@ -8,6 +8,7 @@ #include <vppinfra/clib.h> #include <vppinfra/error_bootstrap.h> #include <vppinfra/format.h> +#include <vppinfra/devicetree.h> #include <vnet/vnet.h> #include <vnet/dev/dev.h> @@ -29,6 +30,58 @@ #define MVPP2_NUM_BPOOLS 16 #define MVPP2_MAX_THREADS 4 #define MRVL_PP2_BUFF_BATCH_SZ 32 +#define MV_DSA_N_SRC 32 + +#define foreach_mv_dsa_tag_field \ + _ (12, vid) \ + _ (1, _zero13) \ + _ (3, pri) \ + _ (1, cfi_dei) \ + _ (1, _unused17) \ + _ (1, src_is_lag) \ + _ (5, src_port_or_lag) \ + _ (5, src_dev) \ + _ (1, src_tagged) \ + _ (2, tag_type) + +typedef enum +{ + MV_DSA_TAG_TYPE_TO_CPU = 0, + MV_DSA_TAG_TYPE_FROM_CPU = 1, + MV_DSA_TAG_TYPE_TO_SNIFFER = 2, + MV_DSA_TAG_TYPE_FORWARD = 3 +} mv_dsa_tag_type_t; + +typedef enum +{ + MVPP2_SEC_IF_ARG_DSA_SWITCH, + MVPP2_SEC_IF_ARG_DSA_PORT +} mvpp2_sec_if_args_t; + +typedef union +{ + struct + { +#define _(b, n) u32 (n) : (b); + foreach_mv_dsa_tag_field +#undef _ + }; + u32 as_u32; +} mv_dsa_tag_t; + +STATIC_ASSERT_SIZEOF (mv_dsa_tag_t, 4); + +static_always_inline mv_dsa_tag_t +mv_dsa_tag_read (void *p) +{ + return (mv_dsa_tag_t){ .as_u32 = clib_net_to_host_u32 (*(u32u *) p) }; +} + +static_always_inline void +mv_dsa_tag_write (void *p, mv_dsa_tag_t tag) +{ + ((mv_dsa_tag_t *) p)->as_u32 = clib_host_to_net_u32 (tag.as_u32); +} typedef struct { @@ -49,6 +102,13 @@ typedef struct struct pp2_ppio *ppio; u8 ppio_id; struct pp2_ppio_link_info last_link_info; + clib_dt_node_t *switch_node; + clib_dt_node_t *switch_port_node; + + struct pp2_ppio_desc descs[VLIB_FRAME_SIZE]; + struct pp2_ppio_desc *desc_ptrs[VLIB_FRAME_SIZE]; + uword valid_dsa_src_bitmap[1024 / uword_bits]; + u16 dsa_to_sec_if[1024]; } mvpp2_port_t; typedef struct @@ -65,21 +125,33 @@ typedef struct typedef struct { struct pp2_ppio_desc desc; - vnet_dev_rx_queue_t *rxq; + u32 sw_if_index; + u16 next_index; + mv_dsa_tag_t dsa_tag; } mvpp2_rx_trace_t; +/* counters.c */ +void mvpp2_port_add_counters (vlib_main_t *, vnet_dev_port_t *); +void mvpp2_port_clear_counters (vlib_main_t *, vnet_dev_port_t *); +void mvpp2_rxq_clear_counters (vlib_main_t *, vnet_dev_rx_queue_t *); +void mvpp2_txq_clear_counters (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_rv_t mvpp2_port_get_stats (vlib_main_t *, vnet_dev_port_t *); + /* format.c */ format_function_t format_pp2_ppio_link_info; format_function_t format_mvpp2_port_status; format_function_t format_mvpp2_dev_info; format_function_t format_mvpp2_rx_trace; format_function_t format_mvpp2_rx_desc; +format_function_t format_mv_dsa_tag; /* port.c */ vnet_dev_port_op_t mvpp2_port_init; vnet_dev_port_op_no_rv_t mvpp2_port_deinit; vnet_dev_port_op_t mvpp2_port_start; vnet_dev_port_op_no_rv_t mvpp2_port_stop; +vnet_dev_port_op_with_ptr_t mvpp2_port_add_sec_if; +vnet_dev_port_op_with_ptr_t mvpp2_port_del_sec_if; vnet_dev_rv_t mvpp2_port_cfg_change (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *); vnet_dev_rv_t @@ -128,6 +200,7 @@ typedef enum "pp2_bpool_get_num_buffs error") \ _ (BPOOL_PUT_BUFFS, bpool_put_buffs, ERROR, "pp2_bpool_put_buffs error") \ _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error") \ + _ (UNKNOWN_DSA_SRC, unknown_dsa_src, ERROR, "unknown DSA source") \ _ (MAC_CE, mac_ce, ERROR, "MAC error (CRC error)") \ _ (MAC_OR, mac_or, ERROR, "overrun error") \ _ (MAC_RSVD, mac_rsvd, ERROR, "unknown MAC error") \ diff --git a/src/plugins/dev_armada/pp2/rx.c b/src/plugins/dev_armada/pp2/rx.c index 81101ef9313..5b0e8d35000 100644 --- a/src/plugins/dev_armada/pp2/rx.c +++ b/src/plugins/dev_armada/pp2/rx.c @@ -5,104 +5,219 @@ #include <vlib/vlib.h> #include <vnet/dev/dev.h> #include <vnet/ethernet/ethernet.h> +#include <vppinfra/vector/mask_compare.h> +#include <vppinfra/vector/compress.h> #include <dev_armada/pp2/pp2.h> -static_always_inline void -mvpp2_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, - vnet_dev_rx_queue_t *rxq, vlib_buffer_t *b0, uword *n_trace, - struct pp2_ppio_desc *d) +static_always_inline vlib_buffer_t * +desc_to_vlib_buffer (vlib_main_t *vm, struct pp2_ppio_desc *d) { - if (PREDICT_TRUE (vlib_trace_buffer (vm, node, rxq->next_index, b0, - /* follow_chain */ 0))) + return vlib_get_buffer (vm, pp2_ppio_inq_desc_get_cookie (d)); +} + +static_always_inline u64 +mrvl_pp2_rx_one_if (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_dev_rx_queue_t *rxq, + vnet_dev_rx_queue_if_rt_data_t *if_rt_data, + struct pp2_ppio_desc **desc_ptrs, u32 n_desc, + i32 current_data, i32 len_adj, mv_dsa_tag_t tag) +{ + vnet_main_t *vnm = vnet_get_main (); + u64 n_rx_bytes = 0; + vlib_buffer_t *b0, *b1; + u32 n_trace, n_left = n_desc; + u32 buffer_indices[VLIB_FRAME_SIZE], *bi = buffer_indices; + struct pp2_ppio_desc **dp = desc_ptrs; + u32 next_index = if_rt_data->next_index; + vlib_buffer_template_t bt = if_rt_data->buffer_template; + u32 sw_if_index = if_rt_data->sw_if_index; + + bt.current_data = current_data; + + for (; n_left >= 4; dp += 2, bi += 2, n_left -= 2) { - mvpp2_rx_trace_t *tr; - vlib_set_trace_count (vm, node, --(*n_trace)); - tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->desc = *d; - tr->rxq = rxq; + clib_prefetch_store (desc_to_vlib_buffer (vm, dp[2])); + clib_prefetch_store (desc_to_vlib_buffer (vm, dp[3])); + b0 = desc_to_vlib_buffer (vm, dp[0]); + b1 = desc_to_vlib_buffer (vm, dp[1]); + bi[0] = pp2_ppio_inq_desc_get_cookie (dp[0]); + bi[1] = pp2_ppio_inq_desc_get_cookie (dp[1]); + b0->template = bt; + b1->template = bt; + + n_rx_bytes += b0->current_length = + pp2_ppio_inq_desc_get_pkt_len (dp[0]) + len_adj; + n_rx_bytes += b1->current_length = + pp2_ppio_inq_desc_get_pkt_len (dp[1]) + len_adj; + } + + for (; n_left; dp++, bi++, n_left--) + { + b0 = desc_to_vlib_buffer (vm, dp[0]); + bi[0] = pp2_ppio_inq_desc_get_cookie (dp[0]); + b0->template = bt; + + n_rx_bytes += b0->current_length = + pp2_ppio_inq_desc_get_pkt_len (dp[0]) + len_adj; } + + /* trace */ + n_trace = vlib_get_trace_count (vm, node); + if (PREDICT_FALSE (n_trace > 0)) + { + for (u32 i = 0; i < n_desc && n_trace > 0; i++) + { + vlib_buffer_t *b = desc_to_vlib_buffer (vm, desc_ptrs[i]); + if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b, + /* follow_chain */ 0))) + { + mvpp2_rx_trace_t *tr; + tr = vlib_add_trace (vm, node, b, sizeof (*tr)); + tr->desc = *desc_ptrs[i]; + tr->next_index = next_index; + tr->sw_if_index = sw_if_index; + tr->dsa_tag = tag; + n_trace--; + } + } + vlib_set_trace_count (vm, node, n_trace); + } + vlib_buffer_enqueue_to_single_next (vm, node, buffer_indices, next_index, + n_desc); + + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + vm->thread_index, sw_if_index, n_desc, n_rx_bytes); + + return n_rx_bytes; } static_always_inline uword mrvl_pp2_rx_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, vnet_dev_rx_queue_t *rxq) { - vnet_main_t *vnm = vnet_get_main (); vnet_dev_port_t *port = rxq->port; - vnet_dev_t *dev = port->dev; - mvpp2_device_t *md = vnet_dev_get_data (dev); mvpp2_port_t *mp = vnet_dev_get_port_data (port); - vlib_buffer_template_t bt = rxq->buffer_template; - u32 thread_index = vm->thread_index; - uword n_trace = vlib_get_trace_count (vm, node); - u32 next_index = rxq->next_index; - u32 n_rx_packets = 0, n_rx_bytes = 0; - struct pp2_hif *hif = md->hif[thread_index]; - struct pp2_ppio_desc descs[VLIB_FRAME_SIZE], *d; - struct pp2_bpool *bpool = md->thread[thread_index].bpool; - struct buff_release_entry *bre = md->thread[thread_index].bre; + mv_dsa_tag_t dsa_tags[VLIB_FRAME_SIZE]; u16 n_desc = VLIB_FRAME_SIZE; - u32 buffers[VLIB_FRAME_SIZE]; - u32 n_bufs, *bi, i; - vlib_buffer_t *b0, *b1; + vlib_buffer_t *b; + u32 i; if (PREDICT_FALSE ( - pp2_ppio_recv (mp->ppio, 0, rxq->queue_id, descs, &n_desc))) + pp2_ppio_recv (mp->ppio, 0, rxq->queue_id, mp->descs, &n_desc))) { vlib_error_count (vm, node->node_index, MVPP2_RX_NODE_CTR_PPIO_RECV, 1); - n_desc = 0; + return 0; } - n_rx_packets = n_desc; + if (mp->is_dsa) + { + for (i = 0; i < n_desc; i++) + { + b = desc_to_vlib_buffer (vm, mp->descs + i); + u8 *start = b->data; + mv_dsa_tag_t tag = mv_dsa_tag_read (start + 14); + dsa_tags[i] = tag; + clib_memmove (start + 6, start + 2, 12); + } - for (i = 0; i < n_desc; i++) - buffers[i] = pp2_ppio_inq_desc_get_cookie (descs + i); + vlib_frame_bitmap_t avail_bmp = {}; + vlib_frame_bitmap_init (avail_bmp, n_desc); + u32 n_avail = n_desc; - bt.current_data = 2; + while (n_avail) + { + vlib_frame_bitmap_t selected_bmp = {}; + struct pp2_ppio_desc *sel_descs[VLIB_FRAME_SIZE]; + mv_dsa_tag_t tag; + u32 n_sel, index; - for (d = descs, bi = buffers; n_desc >= 4; d += 2, bi += 2, n_desc -= 2) - { - /* prefetch */ - b0 = vlib_get_buffer (vm, bi[0]); - b1 = vlib_get_buffer (vm, bi[1]); - b0->template = bt; - b1->template = bt; + tag = dsa_tags[vlib_frame_bitmap_find_first_set (avail_bmp)]; + index = tag.src_dev << 5 | tag.src_port_or_lag; - n_rx_bytes += b0->current_length = pp2_ppio_inq_desc_get_pkt_len (d); - n_rx_bytes += b1->current_length = pp2_ppio_inq_desc_get_pkt_len (d + 1); + clib_mask_compare_u32 (tag.as_u32, (u32 *) dsa_tags, selected_bmp, + n_desc); + n_sel = vlib_frame_bitmap_count_set_bits (selected_bmp); + n_avail -= n_sel; - if (PREDICT_FALSE (n_trace > 0)) - { - mvpp2_rx_trace (vm, node, rxq, b0, &n_trace, d); - if (n_trace > 0) - mvpp2_rx_trace (vm, node, rxq, b1, &n_trace, d + 1); - } - } + if (uword_bitmap_is_bit_set (mp->valid_dsa_src_bitmap, index)) + { + clib_compress_u64 ((uword *) sel_descs, (uword *) mp->desc_ptrs, + selected_bmp, n_desc); + mrvl_pp2_rx_one_if (vm, node, rxq, + vnet_dev_get_rx_queue_sec_if_rt_data ( + rxq, mp->dsa_to_sec_if[index]), + sel_descs, n_sel, 6, -4, tag); + } + else + { + u32 n_free = 0, buffer_indices[VLIB_FRAME_SIZE]; - for (; n_desc; d++, bi++, n_desc--) - { - b0 = vlib_get_buffer (vm, bi[0]); - b0->template = bt; + foreach_vlib_frame_bitmap_set_bit_index (i, selected_bmp) + buffer_indices[n_free++] = + pp2_ppio_inq_desc_get_cookie (mp->descs + i); - n_rx_bytes += b0->current_length = pp2_ppio_inq_desc_get_pkt_len (d); + u32 n_trace = vlib_get_trace_count (vm, node); + if (PREDICT_FALSE (n_trace > 0)) + { + foreach_vlib_frame_bitmap_set_bit_index (i, selected_bmp) + { + vlib_buffer_t *b = + desc_to_vlib_buffer (vm, mp->descs + i); - if (PREDICT_FALSE (n_trace > 0)) - mvpp2_rx_trace (vm, node, rxq, b0, &n_trace, d); + if (PREDICT_TRUE (vlib_trace_buffer ( + vm, node, VNET_DEV_ETH_RX_PORT_NEXT_DROP, b, + /* follow_chain */ 0))) + { + mvpp2_rx_trace_t *tr; + tr = vlib_add_trace (vm, node, b, sizeof (*tr)); + tr->desc = mp->descs[i]; + tr->next_index = VNET_DEV_ETH_RX_PORT_NEXT_DROP; + tr->sw_if_index = CLIB_U32_MAX; + tr->dsa_tag = dsa_tags[i]; + n_trace--; + } + if (n_trace == 0) + break; + } + vlib_set_trace_count (vm, node, n_trace); + } + + vlib_buffer_free (vm, buffer_indices, n_free); + vlib_error_count (vm, node->node_index, + MVPP2_RX_NODE_CTR_UNKNOWN_DSA_SRC, 1); + } + } + } + else + { + mrvl_pp2_rx_one_if (vm, node, rxq, + vnet_dev_get_rx_queue_if_rt_data (rxq), + mp->desc_ptrs, n_desc, 2, 0, (mv_dsa_tag_t){}); } - vlib_buffer_enqueue_to_single_next (vm, node, buffers, next_index, - n_rx_packets); + return n_desc; +} - vlib_increment_combined_counter ( - vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - thread_index, port->intf.sw_if_index, n_rx_packets, n_rx_bytes); +static_always_inline void +mrvl_pp2_rx_refill (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_dev_rx_queue_t *rxq) +{ + vnet_dev_port_t *port = rxq->port; + vnet_dev_t *dev = port->dev; + mvpp2_device_t *md = vnet_dev_get_data (dev); + u32 thread_index = vm->thread_index; + struct pp2_hif *hif = md->hif[thread_index]; + struct pp2_bpool *bpool = md->thread[thread_index].bpool; + struct buff_release_entry *bre = md->thread[thread_index].bre; + u32 n_bufs, *bi; if (PREDICT_FALSE (pp2_bpool_get_num_buffs (bpool, &n_bufs))) { vlib_error_count (vm, node->node_index, MVPP2_RX_NODE_CTR_BPOOL_GET_NUM_BUFFS, 1); - goto done; + return; } n_bufs = rxq->size - n_bufs; @@ -110,18 +225,18 @@ mrvl_pp2_rx_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { u16 n_alloc, i; struct buff_release_entry *e = bre; + u32 buffer_indices[MRVL_PP2_BUFF_BATCH_SZ]; - n_alloc = vlib_buffer_alloc (vm, buffers, MRVL_PP2_BUFF_BATCH_SZ); - i = n_alloc; + n_alloc = vlib_buffer_alloc (vm, buffer_indices, MRVL_PP2_BUFF_BATCH_SZ); if (PREDICT_FALSE (n_alloc == 0)) { vlib_error_count (vm, node->node_index, MVPP2_RX_NODE_CTR_BUFFER_ALLOC, 1); - goto done; + return; } - for (bi = buffers; i--; e++, bi++) + for (i = n_alloc, bi = buffer_indices; i--; e++, bi++) { vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]); @@ -129,23 +244,16 @@ mrvl_pp2_rx_inline (vlib_main_t *vm, vlib_node_runtime_t *node, e->buff.cookie = bi[0]; } - i = n_alloc; - if (PREDICT_FALSE (pp2_bpool_put_buffs (hif, bre, &i))) + if (PREDICT_FALSE (pp2_bpool_put_buffs (hif, bre, &n_alloc))) { vlib_error_count (vm, node->node_index, MVPP2_RX_NODE_CTR_BPOOL_PUT_BUFFS, 1); - vlib_buffer_free (vm, buffers, n_alloc); - goto done; + vlib_buffer_free (vm, buffer_indices, n_alloc); + return; } - if (PREDICT_FALSE (i != n_alloc)) - vlib_buffer_free (vm, buffers + i, n_alloc - i); - - n_bufs -= i; + n_bufs -= n_alloc; } - -done: - return n_rx_packets; } VNET_DEV_NODE_FN (mvpp2_rx_node) @@ -153,6 +261,9 @@ VNET_DEV_NODE_FN (mvpp2_rx_node) { u32 n_rx = 0; foreach_vnet_dev_rx_queue_runtime (rxq, node) - n_rx += mrvl_pp2_rx_inline (vm, node, frame, rxq); + { + n_rx += mrvl_pp2_rx_inline (vm, node, frame, rxq); + mrvl_pp2_rx_refill (vm, node, rxq); + } return n_rx; } diff --git a/src/plugins/dev_armada/pp2/tx.c b/src/plugins/dev_armada/pp2/tx.c index 1e6675c9746..583eec71d60 100644 --- a/src/plugins/dev_armada/pp2/tx.c +++ b/src/plugins/dev_armada/pp2/tx.c @@ -12,6 +12,7 @@ VNET_DEV_NODE_FN (mvpp2_tx_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node); + vnet_dev_instance_t *ins = vnet_dev_get_dev_instance (rt->dev_instance); vnet_dev_tx_queue_t *txq = rt->tx_queue; vnet_dev_port_t *port = txq->port; vnet_dev_t *dev = port->dev; @@ -27,6 +28,24 @@ VNET_DEV_NODE_FN (mvpp2_tx_node) struct pp2_ppio_desc descs[VLIB_FRAME_SIZE], *d = descs; u16 sz = txq->size; u16 mask = sz - 1; + i16 len_adj = 0; + + if (ins->is_primary_if == 0) + { + vnet_dev_port_interface_t *sif = + vnet_dev_port_get_sec_if_by_index (port, ins->sec_if_index); + + mv_dsa_tag_t tag = { .as_u32 = sif->user_data }; + + for (u32 i = 0; i < n_vectors; i++) + { + vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]); + u8 *start = vlib_buffer_get_current (b); + clib_memmove (start - 4, start, 12); + mv_dsa_tag_write (start + 8, tag); + } + len_adj = 4; + } if (mtq->n_enq) { @@ -51,9 +70,9 @@ VNET_DEV_NODE_FN (mvpp2_tx_node) u64 paddr = vlib_buffer_get_pa (vm, b0); pp2_ppio_outq_desc_reset (d); - pp2_ppio_outq_desc_set_phys_addr (d, paddr + b0->current_data); + pp2_ppio_outq_desc_set_phys_addr (d, paddr + b0->current_data - len_adj); pp2_ppio_outq_desc_set_pkt_offset (d, 0); - pp2_ppio_outq_desc_set_pkt_len (d, b0->current_length); + pp2_ppio_outq_desc_set_pkt_len (d, b0->current_length + len_adj); } buffers = vlib_frame_vector_args (frame); diff --git a/src/plugins/dev_ena/rx_node.c b/src/plugins/dev_ena/rx_node.c index 41fc5b8c943..51c6dbce84c 100644 --- a/src/plugins/dev_ena/rx_node.c +++ b/src/plugins/dev_ena/rx_node.c @@ -251,7 +251,6 @@ ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_dev_rx_queue_t *rxq) { ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq); - vnet_dev_port_t *port = rxq->port; vnet_main_t *vnm = vnet_get_main (); vlib_buffer_t *buffers[VLIB_FRAME_SIZE], **b; ena_rx_cdesc_status_t statuses[VLIB_FRAME_SIZE + 8]; @@ -260,13 +259,13 @@ ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u16 *csi; uword n_rx_packets = 0, n_rx_bytes = 0; vlib_frame_bitmap_t head_bmp = {}; - u32 sw_if_index = port->intf.sw_if_index; - u32 hw_if_index = port->intf.hw_if_index; + u32 sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq); + u32 hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq); u32 n_trace, n_deq, n_left; u32 cq_next = q->cq_next; - u32 next_index = rxq->next_index; + u32 next_index = vnet_dev_get_rx_queue_if_next_index (rxq); vlib_frame_t *next_frame; - vlib_buffer_template_t bt = rxq->buffer_template; + vlib_buffer_template_t bt = vnet_dev_get_rx_queue_if_buffer_template (rxq); u32 *bi; int maybe_chained; diff --git a/src/plugins/dev_iavf/port.c b/src/plugins/dev_iavf/port.c index f1578fccb59..a0530822688 100644 --- a/src/plugins/dev_iavf/port.c +++ b/src/plugins/dev_iavf/port.c @@ -263,7 +263,7 @@ avf_msix_n_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 line) iavf_reg_write (ad, IAVF_VFINT_DYN_CTLN (line), dyn_ctln_enabled.as_u32); vlib_node_set_interrupt_pending (vlib_get_main_by_index (line), - port->intf.rx_node_index); + vnet_dev_get_port_rx_node_index (port)); } vnet_dev_rv_t diff --git a/src/plugins/dev_iavf/rx_node.c b/src/plugins/dev_iavf/rx_node.c index ee6d7e8def0..bf650f9bfb9 100644 --- a/src/plugins/dev_iavf/rx_node.c +++ b/src/plugins/dev_iavf/rx_node.c @@ -249,14 +249,14 @@ iavf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 thr_idx = vlib_get_thread_index (); iavf_rt_data_t *rtd = vnet_dev_get_rt_temp_space (vm); iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq); - vlib_buffer_template_t bt = rxq->buffer_template; + vlib_buffer_template_t bt = vnet_dev_get_rx_queue_if_buffer_template (rxq); u32 n_trace, n_rx_packets = 0, n_rx_bytes = 0; u16 n_tail_desc = 0; u64 or_qw1 = 0; u32 *bi, *to_next, n_left_to_next; - u32 next_index = rxq->next_index; - u32 sw_if_index = port->intf.sw_if_index; - u32 hw_if_index = port->intf.hw_if_index; + u32 next_index = vnet_dev_get_rx_queue_if_next_index (rxq); + u32 sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq); + u32 hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq); u16 next = arq->next; u16 size = rxq->size; u16 mask = size - 1; diff --git a/src/plugins/dev_octeon/CMakeLists.txt b/src/plugins/dev_octeon/CMakeLists.txt index c6271ecdfba..6109de57a7d 100644 --- a/src/plugins/dev_octeon/CMakeLists.txt +++ b/src/plugins/dev_octeon/CMakeLists.txt @@ -36,6 +36,7 @@ add_vpp_plugin(dev_octeon tx_node.c flow.c counter.c + crypto.c MULTIARCH_SOURCES rx_node.c diff --git a/src/plugins/dev_octeon/crypto.c b/src/plugins/dev_octeon/crypto.c new file mode 100644 index 00000000000..7d3790f3ec9 --- /dev/null +++ b/src/plugins/dev_octeon/crypto.c @@ -0,0 +1,1782 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include <vnet/dev/dev.h> +#include <vnet/devices/devices.h> +#include <dev_octeon/octeon.h> +#include <dev_octeon/crypto.h> +#include <base/roc_api.h> +#include <common.h> + +oct_crypto_main_t oct_crypto_main; +oct_crypto_dev_t oct_crypto_dev; + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "octeon", + .subclass_name = "crypto", +}; + +static_always_inline void +oct_map_keyindex_to_session (oct_crypto_sess_t *sess, u32 key_index, u8 type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_key_t *ckey; + + ckey = vec_elt_at_index (ocm->keys[type], key_index); + + ckey->sess = sess; + sess->key_index = key_index; +} + +static_always_inline oct_crypto_sess_t * +oct_crypto_session_alloc (vlib_main_t *vm, u8 type) +{ + extern oct_plt_init_param_t oct_plt_init_param; + oct_crypto_sess_t *addr = NULL; + oct_crypto_main_t *ocm; + oct_crypto_dev_t *ocd; + u32 size; + + ocm = &oct_crypto_main; + ocd = ocm->crypto_dev[type]; + + size = sizeof (oct_crypto_sess_t); + + addr = oct_plt_init_param.oct_plt_zmalloc (size, CLIB_CACHE_LINE_BYTES); + if (addr == NULL) + { + log_err (ocd->dev, "Failed to allocate crypto session memory"); + return NULL; + } + + return addr; +} + +static_always_inline i32 +oct_crypto_session_create (vlib_main_t *vm, vnet_crypto_key_index_t key_index, + int op_type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_sess_t *session; + vnet_crypto_key_t *key; + oct_crypto_key_t *ckey; + + key = vnet_crypto_get_key (key_index); + + if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + { + /* + * Read crypto or integ key session. And map link key index to same. + */ + if (key->index_crypto != UINT32_MAX) + { + ckey = vec_elt_at_index (ocm->keys[op_type], key->index_crypto); + session = ckey->sess; + } + else if (key->index_integ != UINT32_MAX) + { + ckey = vec_elt_at_index (ocm->keys[op_type], key->index_integ); + session = ckey->sess; + } + else + return -1; + } + else + { + session = oct_crypto_session_alloc (vm, op_type); + if (session == NULL) + return -1; + } + + oct_map_keyindex_to_session (session, key_index, op_type); + return 0; +} + +void +oct_crypto_key_del_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index) +{ + extern oct_plt_init_param_t oct_plt_init_param; + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_key_t *ckey_linked; + oct_crypto_key_t *ckey; + + vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + if (ckey->sess) + { + /* + * If in case link algo is pointing to same sesison, reset the pointer. + */ + if (ckey->sess->key_index != key_index) + { + ckey_linked = vec_elt_at_index ( + ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], ckey->sess->key_index); + ckey_linked->sess = NULL; + } + oct_plt_init_param.oct_plt_free (ckey->sess); + ckey->sess = NULL; + } + + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index); + if (ckey->sess) + { + /* + * If in case link algo is pointing to same sesison, reset the pointer. + */ + if (ckey->sess->key_index != key_index) + { + ckey_linked = vec_elt_at_index ( + ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], ckey->sess->key_index); + ckey_linked->sess = NULL; + } + + oct_plt_init_param.oct_plt_free (ckey->sess); + ckey->sess = NULL; + } +} + +void +oct_crypto_key_add_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + oct_crypto_key_t *ckey; + oct_crypto_dev_t *ocd = &oct_crypto_dev; + + vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index); + if (ckey->sess == NULL) + { + if (oct_crypto_session_create (vm, key_index, + VNET_CRYPTO_OP_TYPE_ENCRYPT)) + { + log_err (ocd->dev, "Unable to create crypto session"); + return; + } + } + + vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index); + ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index); + if (ckey->sess == NULL) + { + if (oct_crypto_session_create (vm, key_index, + VNET_CRYPTO_OP_TYPE_DECRYPT)) + { + log_err (ocd->dev, "Unable to create crypto session"); + return; + } + } +} + +void +oct_crypto_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop, + vnet_crypto_key_index_t idx) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + + if (kop == VNET_CRYPTO_KEY_OP_DEL) + { + oct_crypto_key_del_handler (vm, idx); + return; + } + oct_crypto_key_add_handler (vm, idx); + + ocm->started = 1; +} + +static_always_inline void +oct_crypto_session_free (vlib_main_t *vm, oct_crypto_sess_t *sess) +{ + extern oct_plt_init_param_t oct_plt_init_param; + + oct_plt_init_param.oct_plt_free (sess); + return; +} + +#ifdef PLATFORM_OCTEON9 +static inline void +oct_cpt_inst_submit (struct cpt_inst_s *inst, uint64_t lmtline, + uint64_t io_addr) +{ + uint64_t lmt_status; + + do + { + /* Copy CPT command to LMTLINE */ + roc_lmt_mov64 ((void *) lmtline, inst); + + /* + * Make sure compiler does not reorder memcpy and ldeor. + * LMTST transactions are always flushed from the write + * buffer immediately, a DMB is not required to push out + * LMTSTs. + */ + asm volatile ("dmb oshst" : : : "memory"); + lmt_status = roc_lmt_submit_ldeor (io_addr); + } + while (lmt_status == 0); +} +#endif + +static_always_inline void +oct_crypto_burst_submit (oct_crypto_dev_t *crypto_dev, struct cpt_inst_s *inst, + u32 n_left) +{ + u64 lmt_base; + u64 io_addr; + u32 count; + +#ifdef PLATFORM_OCTEON9 + lmt_base = crypto_dev->lf.lmt_base; + io_addr = crypto_dev->lf.io_addr; + + for (count = 0; count < n_left; count++) + oct_cpt_inst_submit (inst + count, lmt_base, io_addr); +#else + u64 *lmt_line[OCT_MAX_LMT_SZ]; + u64 lmt_arg, core_lmt_id; + + lmt_base = crypto_dev->lmtline.lmt_base; + io_addr = crypto_dev->lmtline.io_addr; + + ROC_LMT_CPT_BASE_ID_GET (lmt_base, core_lmt_id); + + for (count = 0; count < 16; count++) + { + lmt_line[count] = OCT_CPT_LMT_GET_LINE_ADDR (lmt_base, count); + } + + while (n_left > OCT_MAX_LMT_SZ) + { + + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile ("dmb oshst" ::: "memory"); + + lmt_arg = ROC_CN10K_CPT_LMT_ARG | (u64) core_lmt_id; + + for (count = 0; count < 16; count++) + { + roc_lmt_mov_seg ((void *) lmt_line[count], inst + count, + CPT_LMT_SIZE_COPY); + } + + /* Set number of LMTSTs, excluding the first */ + lmt_arg |= (OCT_MAX_LMT_SZ - 1) << 12; + + roc_lmt_submit_steorl (lmt_arg, io_addr); + + inst += OCT_MAX_LMT_SZ; + n_left -= OCT_MAX_LMT_SZ; + } + + if (n_left > 0) + { + /* + * Add a memory barrier so that LMTLINEs from the previous iteration + * can be reused for a subsequent transfer. + */ + asm volatile ("dmb oshst" ::: "memory"); + + lmt_arg = ROC_CN10K_CPT_LMT_ARG | (u64) core_lmt_id; + + for (count = 0; count < n_left; count++) + { + roc_lmt_mov_seg ((void *) lmt_line[count], inst + count, + CPT_LMT_SIZE_COPY); + } + + /* Set number of LMTSTs, excluding the first */ + lmt_arg |= (n_left - 1) << 12; + + roc_lmt_submit_steorl (lmt_arg, io_addr); + } +#endif +} + +static_always_inline uint32_t +oct_crypto_fill_sg_comp_from_iov (struct roc_sglist_comp *list, uint32_t i, + struct roc_se_iov_ptr *from, + uint32_t from_offset, uint32_t *psize, + struct roc_se_buf_ptr *extra_buf, + uint32_t extra_offset) +{ + uint32_t extra_len = extra_buf ? extra_buf->size : 0; + uint32_t size = *psize; + int32_t j; + + for (j = 0; j < from->buf_cnt; j++) + { + struct roc_sglist_comp *to = &list[i >> 2]; + uint32_t buf_sz = from->bufs[j].size; + void *vaddr = from->bufs[j].vaddr; + uint64_t e_vaddr; + uint32_t e_len; + + if (PREDICT_FALSE (from_offset)) + { + if (from_offset >= buf_sz) + { + from_offset -= buf_sz; + continue; + } + e_vaddr = (uint64_t) vaddr + from_offset; + e_len = clib_min ((buf_sz - from_offset), size); + from_offset = 0; + } + else + { + e_vaddr = (uint64_t) vaddr; + e_len = clib_min (buf_sz, size); + } + + to->u.s.len[i % 4] = clib_host_to_net_u16 (e_len); + to->ptr[i % 4] = clib_host_to_net_u64 (e_vaddr); + + if (extra_len && (e_len >= extra_offset)) + { + /* Break the data at given offset */ + uint32_t next_len = e_len - extra_offset; + uint64_t next_vaddr = e_vaddr + extra_offset; + + if (!extra_offset) + { + i--; + } + else + { + e_len = extra_offset; + size -= e_len; + to->u.s.len[i % 4] = clib_host_to_net_u16 (e_len); + } + + extra_len = clib_min (extra_len, size); + /* Insert extra data ptr */ + if (extra_len) + { + i++; + to = &list[i >> 2]; + to->u.s.len[i % 4] = clib_host_to_net_u16 (extra_len); + to->ptr[i % 4] = + clib_host_to_net_u64 ((uint64_t) extra_buf->vaddr); + size -= extra_len; + } + + next_len = clib_min (next_len, size); + /* insert the rest of the data */ + if (next_len) + { + i++; + to = &list[i >> 2]; + to->u.s.len[i % 4] = clib_host_to_net_u16 (next_len); + to->ptr[i % 4] = clib_host_to_net_u64 (next_vaddr); + size -= next_len; + } + extra_len = 0; + } + else + { + size -= e_len; + } + if (extra_offset) + extra_offset -= size; + i++; + + if (PREDICT_FALSE (!size)) + break; + } + + *psize = size; + return (uint32_t) i; +} + +static_always_inline u32 +oct_crypto_fill_sg2_comp_from_iov (struct roc_sg2list_comp *list, u32 i, + struct roc_se_iov_ptr *from, + u32 from_offset, u32 *psize, + struct roc_se_buf_ptr *extra_buf, + u32 extra_offset) +{ + u32 extra_len = extra_buf ? extra_buf->size : 0; + u32 size = *psize, buf_sz, e_len, next_len; + struct roc_sg2list_comp *to; + u64 e_vaddr, next_vaddr; + void *vaddr; + i32 j; + + for (j = 0; j < from->buf_cnt; j++) + { + to = &list[i / 3]; + buf_sz = from->bufs[j].size; + vaddr = from->bufs[j].vaddr; + + if (PREDICT_FALSE (from_offset)) + { + if (from_offset >= buf_sz) + { + from_offset -= buf_sz; + continue; + } + e_vaddr = (u64) vaddr + from_offset; + e_len = clib_min ((buf_sz - from_offset), size); + from_offset = 0; + } + else + { + e_vaddr = (u64) vaddr; + e_len = clib_min (buf_sz, size); + } + + to->u.s.len[i % 3] = (e_len); + to->ptr[i % 3] = (e_vaddr); + to->u.s.valid_segs = (i % 3) + 1; + + if (extra_len && (e_len >= extra_offset)) + { + /* Break the data at given offset */ + next_len = e_len - extra_offset; + next_vaddr = e_vaddr + extra_offset; + + if (!extra_offset) + i--; + else + { + e_len = extra_offset; + size -= e_len; + to->u.s.len[i % 3] = (e_len); + } + + extra_len = clib_min (extra_len, size); + /* Insert extra data ptr */ + if (extra_len) + { + i++; + to = &list[i / 3]; + to->u.s.len[i % 3] = (extra_len); + to->ptr[i % 3] = ((u64) extra_buf->vaddr); + to->u.s.valid_segs = (i % 3) + 1; + size -= extra_len; + } + + next_len = clib_min (next_len, size); + /* insert the rest of the data */ + if (next_len) + { + i++; + to = &list[i / 3]; + to->u.s.len[i % 3] = (next_len); + to->ptr[i % 3] = (next_vaddr); + to->u.s.valid_segs = (i % 3) + 1; + size -= next_len; + } + extra_len = 0; + } + else + size -= e_len; + + if (extra_offset) + extra_offset -= size; + + i++; + + if (PREDICT_FALSE (!size)) + break; + } + + *psize = size; + return (u32) i; +} + +static_always_inline uint32_t +oct_crypto_fill_sg_comp_from_buf (struct roc_sglist_comp *list, uint32_t i, + struct roc_se_buf_ptr *from) +{ + struct roc_sglist_comp *to = &list[i >> 2]; + + to->u.s.len[i % 4] = clib_host_to_net_u16 (from->size); + to->ptr[i % 4] = clib_host_to_net_u64 ((uint64_t) from->vaddr); + return ++i; +} + +static_always_inline uint32_t +oct_crypto_fill_sg_comp (struct roc_sglist_comp *list, uint32_t i, + uint64_t dma_addr, uint32_t size) +{ + struct roc_sglist_comp *to = &list[i >> 2]; + + to->u.s.len[i % 4] = clib_host_to_net_u16 (size); + to->ptr[i % 4] = clib_host_to_net_u64 (dma_addr); + return ++i; +} + +static_always_inline u32 +oct_crypto_fill_sg2_comp (struct roc_sg2list_comp *list, u32 index, + u64 dma_addr, u32 size) +{ + struct roc_sg2list_comp *to = &list[index / 3]; + + to->u.s.len[index % 3] = (size); + to->ptr[index % 3] = (dma_addr); + to->u.s.valid_segs = (index % 3) + 1; + return ++index; +} + +static_always_inline u32 +oct_crypto_fill_sg2_comp_from_buf (struct roc_sg2list_comp *list, u32 index, + struct roc_se_buf_ptr *from) +{ + struct roc_sg2list_comp *to = &list[index / 3]; + + to->u.s.len[index % 3] = (from->size); + to->ptr[index % 3] = ((u64) from->vaddr); + to->u.s.valid_segs = (index % 3) + 1; + return ++index; +} + +static_always_inline int __attribute__ ((unused)) +oct_crypto_sg_inst_prep (struct roc_se_fc_params *params, + struct cpt_inst_s *inst, uint64_t offset_ctrl, + const uint8_t *iv_s, int iv_len, uint8_t pack_iv, + uint8_t pdcp_alg_type, int32_t inputlen, + int32_t outputlen, uint32_t passthrough_len, + uint32_t req_flags, int pdcp_flag, int decrypt) +{ + struct roc_sglist_comp *gather_comp, *scatter_comp; + void *m_vaddr = params->meta_buf.vaddr; + struct roc_se_buf_ptr *aad_buf = NULL; + uint32_t mac_len = 0, aad_len = 0; + struct roc_se_ctx *se_ctx; + uint32_t i, g_size_bytes; + uint64_t *offset_vaddr; + uint32_t s_size_bytes; + uint8_t *in_buffer; + uint32_t size; + uint8_t *iv_d; + int ret = 0; + + se_ctx = params->ctx; + mac_len = se_ctx->mac_len; + + if (PREDICT_FALSE (req_flags & ROC_SE_VALID_AAD_BUF)) + { + /* We don't support both AAD and auth data separately */ + aad_len = params->aad_buf.size; + aad_buf = ¶ms->aad_buf; + } + + /* save space for iv */ + offset_vaddr = m_vaddr; + + m_vaddr = + (uint8_t *) m_vaddr + ROC_SE_OFF_CTRL_LEN + PLT_ALIGN_CEIL (iv_len, 8); + + inst->w4.s.opcode_major |= (uint64_t) ROC_DMA_MODE_SG; + + /* iv offset is 0 */ + *offset_vaddr = offset_ctrl; + + iv_d = ((uint8_t *) offset_vaddr + ROC_SE_OFF_CTRL_LEN); + + if (PREDICT_TRUE (iv_len)) + memcpy (iv_d, iv_s, iv_len); + + /* DPTR has SG list */ + + /* TODO Add error check if space will be sufficient */ + gather_comp = (struct roc_sglist_comp *) ((uint8_t *) m_vaddr + 8); + + /* + * Input Gather List + */ + i = 0; + + /* Offset control word followed by iv */ + + i = oct_crypto_fill_sg_comp (gather_comp, i, (uint64_t) offset_vaddr, + ROC_SE_OFF_CTRL_LEN + iv_len); + + /* Add input data */ + if (decrypt && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = inputlen - iv_len - mac_len; + + if (PREDICT_TRUE (size)) + { + uint32_t aad_offset = aad_len ? passthrough_len : 0; + i = oct_crypto_fill_sg_comp_from_iov ( + gather_comp, i, params->src_iov, 0, &size, aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer" + " space, size %d needed", + size); + return -1; + } + } + + if (mac_len) + i = + oct_crypto_fill_sg_comp_from_buf (gather_comp, i, ¶ms->mac_buf); + } + else + { + /* input data */ + size = inputlen - iv_len; + if (size) + { + uint32_t aad_offset = aad_len ? passthrough_len : 0; + i = oct_crypto_fill_sg_comp_from_iov ( + gather_comp, i, params->src_iov, 0, &size, aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + + in_buffer = m_vaddr; + ((uint16_t *) in_buffer)[0] = 0; + ((uint16_t *) in_buffer)[1] = 0; + ((uint16_t *) in_buffer)[2] = clib_host_to_net_u16 (i); + + g_size_bytes = ((i + 3) / 4) * sizeof (struct roc_sglist_comp); + /* + * Output Scatter List + */ + + i = 0; + scatter_comp = + (struct roc_sglist_comp *) ((uint8_t *) gather_comp + g_size_bytes); + + i = oct_crypto_fill_sg_comp ( + scatter_comp, i, (uint64_t) offset_vaddr + ROC_SE_OFF_CTRL_LEN, iv_len); + + /* Add output data */ + if ((!decrypt) && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = outputlen - iv_len - mac_len; + if (size) + { + + uint32_t aad_offset = aad_len ? passthrough_len : 0; + + i = oct_crypto_fill_sg_comp_from_iov ( + scatter_comp, i, params->dst_iov, 0, &size, aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + + /* mac data */ + if (mac_len) + i = + oct_crypto_fill_sg_comp_from_buf (scatter_comp, i, ¶ms->mac_buf); + } + else + { + /* Output including mac */ + size = outputlen - iv_len; + + if (size) + { + uint32_t aad_offset = aad_len ? passthrough_len : 0; + + i = oct_crypto_fill_sg_comp_from_iov ( + scatter_comp, i, params->dst_iov, 0, &size, aad_buf, aad_offset); + + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + ((uint16_t *) in_buffer)[3] = clib_host_to_net_u16 (i); + s_size_bytes = ((i + 3) / 4) * sizeof (struct roc_sglist_comp); + + size = g_size_bytes + s_size_bytes + ROC_SG_LIST_HDR_SIZE; + + /* This is DPTR len in case of SG mode */ + inst->w4.s.dlen = size; + + if (PREDICT_FALSE (size > ROC_SG_MAX_DLEN_SIZE)) + { + clib_warning ( + "Cryptodev: Exceeds max supported components. Reduce segments"); + ret = -1; + } + + inst->dptr = (uint64_t) in_buffer; + return ret; +} + +static_always_inline int __attribute__ ((unused)) +oct_crypto_sg2_inst_prep (struct roc_se_fc_params *params, + struct cpt_inst_s *inst, u64 offset_ctrl, + const u8 *iv_s, int iv_len, u8 pack_iv, + u8 pdcp_alg_type, i32 inputlen, i32 outputlen, + u32 passthrough_len, u32 req_flags, int pdcp_flag, + int decrypt) +{ + u32 mac_len = 0, aad_len = 0, size, index, g_size_bytes; + struct roc_sg2list_comp *gather_comp, *scatter_comp; + void *m_vaddr = params->meta_buf.vaddr; + struct roc_se_buf_ptr *aad_buf = NULL; + union cpt_inst_w5 cpt_inst_w5; + union cpt_inst_w6 cpt_inst_w6; + u16 scatter_sz, gather_sz; + struct roc_se_ctx *se_ctx; + u64 *offset_vaddr; + int ret = 0; + u8 *iv_d; + + se_ctx = params->ctx; + mac_len = se_ctx->mac_len; + + if (PREDICT_FALSE (req_flags & ROC_SE_VALID_AAD_BUF)) + { + /* We don't support both AAD and auth data separately */ + aad_len = params->aad_buf.size; + aad_buf = ¶ms->aad_buf; + } + + /* save space for iv */ + offset_vaddr = m_vaddr; + + m_vaddr = (u8 *) m_vaddr + ROC_SE_OFF_CTRL_LEN + PLT_ALIGN_CEIL (iv_len, 8); + + inst->w4.s.opcode_major |= (u64) ROC_DMA_MODE_SG; + + /* This is DPTR len in case of SG mode */ + inst->w4.s.dlen = inputlen + ROC_SE_OFF_CTRL_LEN; + + /* iv offset is 0 */ + *offset_vaddr = offset_ctrl; + iv_d = ((u8 *) offset_vaddr + ROC_SE_OFF_CTRL_LEN); + + if (PREDICT_TRUE (iv_len)) + clib_memcpy (iv_d, iv_s, iv_len); + + /* DPTR has SG list */ + + gather_comp = (struct roc_sg2list_comp *) ((u8 *) m_vaddr); + + /* + * Input Gather List + */ + index = 0; + + /* Offset control word followed by iv */ + + index = oct_crypto_fill_sg2_comp (gather_comp, index, (u64) offset_vaddr, + ROC_SE_OFF_CTRL_LEN + iv_len); + + /* Add input data */ + if (decrypt && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = inputlen - iv_len - mac_len; + if (size) + { + /* input data only */ + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (gather_comp, index, + params->src_iov, 0, &size, + aad_buf, aad_offset); + + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer" + " space, size %d needed", + size); + return -1; + } + } + + /* mac data */ + if (mac_len) + index = oct_crypto_fill_sg2_comp_from_buf (gather_comp, index, + ¶ms->mac_buf); + } + else + { + /* input data */ + size = inputlen - iv_len; + if (size) + { + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (gather_comp, index, + params->src_iov, 0, &size, + aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + + gather_sz = (index + 2) / 3; + g_size_bytes = gather_sz * sizeof (struct roc_sg2list_comp); + + /* + * Output Scatter List + */ + + index = 0; + scatter_comp = + (struct roc_sg2list_comp *) ((u8 *) gather_comp + g_size_bytes); + + index = oct_crypto_fill_sg2_comp ( + scatter_comp, index, (u64) offset_vaddr + ROC_SE_OFF_CTRL_LEN, iv_len); + + /* Add output data */ + if ((!decrypt) && (req_flags & ROC_SE_VALID_MAC_BUF)) + { + size = outputlen - iv_len - mac_len; + if (size) + { + + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (scatter_comp, index, + params->dst_iov, 0, &size, + aad_buf, aad_offset); + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + + /* mac data */ + if (mac_len) + index = oct_crypto_fill_sg2_comp_from_buf (scatter_comp, index, + ¶ms->mac_buf); + } + else + { + /* Output including mac */ + size = outputlen - iv_len; + if (size) + { + u32 aad_offset = aad_len ? passthrough_len : 0; + + index = oct_crypto_fill_sg2_comp_from_iov (scatter_comp, index, + params->dst_iov, 0, &size, + aad_buf, aad_offset); + + if (PREDICT_FALSE (size)) + { + clib_warning ("Cryptodev: Insufficient buffer space," + " size %d needed", + size); + return -1; + } + } + } + + scatter_sz = (index + 2) / 3; + + cpt_inst_w5.s.gather_sz = gather_sz; + cpt_inst_w6.s.scatter_sz = scatter_sz; + + cpt_inst_w5.s.dptr = (u64) gather_comp; + cpt_inst_w6.s.rptr = (u64) scatter_comp; + + inst->w5.u64 = cpt_inst_w5.u64; + inst->w6.u64 = cpt_inst_w6.u64; + + if (PREDICT_FALSE ((scatter_sz >> 4) || (gather_sz >> 4))) + { + clib_warning ( + "Cryptodev: Exceeds max supported components. Reduce segments"); + ret = -1; + } + + return ret; +} + +static_always_inline int +oct_crypto_cpt_hmac_prep (u32 flags, u64 d_offs, u64 d_lens, + struct roc_se_fc_params *fc_params, + struct cpt_inst_s *inst, u8 is_decrypt) +{ + u32 encr_data_len, auth_data_len, aad_len = 0; + i32 inputlen, outputlen, enc_dlen, auth_dlen; + u32 encr_offset, auth_offset, iv_offset = 0; + union cpt_inst_w4 cpt_inst_w4; + u32 cipher_type; + struct roc_se_ctx *se_ctx; + u32 passthrough_len = 0; + const u8 *src = NULL; + u64 offset_ctrl; + u8 iv_len = 16; + u8 op_minor; + u32 mac_len; + int ret; + + encr_offset = ROC_SE_ENCR_OFFSET (d_offs); + auth_offset = ROC_SE_AUTH_OFFSET (d_offs); + encr_data_len = ROC_SE_ENCR_DLEN (d_lens); + auth_data_len = ROC_SE_AUTH_DLEN (d_lens); + + if (PREDICT_FALSE (flags & ROC_SE_VALID_AAD_BUF)) + { + /* We don't support both AAD and auth data separately */ + auth_data_len = 0; + auth_offset = 0; + aad_len = fc_params->aad_buf.size; + } + + se_ctx = fc_params->ctx; + cipher_type = se_ctx->enc_cipher; + mac_len = se_ctx->mac_len; + cpt_inst_w4.u64 = se_ctx->template_w4.u64; + op_minor = cpt_inst_w4.s.opcode_minor; + + if (PREDICT_FALSE (flags & ROC_SE_VALID_AAD_BUF)) + { + /* + * When AAD is given, data above encr_offset is pass through + * Since AAD is given as separate pointer and not as offset, + * this is a special case as we need to fragment input data + * into passthrough + encr_data and then insert AAD in between. + */ + passthrough_len = encr_offset; + auth_offset = passthrough_len + iv_len; + encr_offset = passthrough_len + aad_len + iv_len; + auth_data_len = aad_len + encr_data_len; + } + else + { + encr_offset += iv_len; + auth_offset += iv_len; + } + + auth_dlen = auth_offset + auth_data_len; + enc_dlen = encr_data_len + encr_offset; + + cpt_inst_w4.s.opcode_major = ROC_SE_MAJOR_OP_FC; + + if (is_decrypt) + { + cpt_inst_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_DECRYPT; + + if (auth_dlen > enc_dlen) + { + inputlen = auth_dlen + mac_len; + outputlen = auth_dlen; + } + else + { + inputlen = enc_dlen + mac_len; + outputlen = enc_dlen; + } + } + else + { + cpt_inst_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_ENCRYPT; + + /* Round up to 16 bytes alignment */ + if (PREDICT_FALSE (encr_data_len & 0xf)) + { + if (PREDICT_TRUE (cipher_type == ROC_SE_AES_CBC) || + (cipher_type == ROC_SE_DES3_CBC)) + enc_dlen = PLT_ALIGN_CEIL (encr_data_len, 8) + encr_offset; + } + + /* + * auth_dlen is larger than enc_dlen in Authentication cases + * like AES GMAC Authentication + */ + if (PREDICT_FALSE (auth_dlen > enc_dlen)) + { + inputlen = auth_dlen; + outputlen = auth_dlen + mac_len; + } + else + { + inputlen = enc_dlen; + outputlen = enc_dlen + mac_len; + } + } + + if (op_minor & ROC_SE_FC_MINOR_OP_HMAC_FIRST) + outputlen = enc_dlen; + + cpt_inst_w4.s.param1 = encr_data_len; + cpt_inst_w4.s.param2 = auth_data_len; + + if (PREDICT_FALSE ((encr_offset >> 16) || (iv_offset >> 8) || + (auth_offset >> 8))) + { + clib_warning ("Cryptodev: Offset not supported"); + clib_warning ( + "Cryptodev: enc_offset: %d, iv_offset : %d, auth_offset: %d", + encr_offset, iv_offset, auth_offset); + return -1; + } + + offset_ctrl = clib_host_to_net_u64 ( + ((u64) encr_offset << 16) | ((u64) iv_offset << 8) | ((u64) auth_offset)); + + src = fc_params->iv_buf; + + inst->w4.u64 = cpt_inst_w4.u64; + +#ifdef PLATFORM_OCTEON9 + ret = oct_crypto_sg_inst_prep (fc_params, inst, offset_ctrl, src, iv_len, 0, + 0, inputlen, outputlen, passthrough_len, + flags, 0, is_decrypt); +#else + ret = oct_crypto_sg2_inst_prep (fc_params, inst, offset_ctrl, src, iv_len, 0, + 0, inputlen, outputlen, passthrough_len, + flags, 0, is_decrypt); +#endif + + if (PREDICT_FALSE (ret)) + return -1; + + return 0; +} + +static_always_inline int +oct_crypto_fill_fc_params (oct_crypto_sess_t *sess, struct cpt_inst_s *inst, + const bool is_aead, u8 aad_length, u8 *payload, + vnet_crypto_async_frame_elt_t *elts, void *mdata, + u32 cipher_data_length, u32 cipher_data_offset, + u32 auth_data_length, u32 auth_data_offset, + vlib_buffer_t *b, u16 adj_len) +{ + struct roc_se_fc_params fc_params = { 0 }; + struct roc_se_ctx *ctx = &sess->cpt_ctx; + u64 d_offs = 0, d_lens = 0; + vlib_buffer_t *buffer = b; + u32 flags = 0, index = 0; + u8 op_minor = 0, cpt_op; + char src[SRC_IOV_SIZE]; + u32 *iv_buf; + + cpt_op = sess->cpt_op; + + if (is_aead) + { + flags |= ROC_SE_VALID_IV_BUF; + iv_buf = (u32 *) elts->iv; + iv_buf[3] = clib_host_to_net_u32 (0x1); + fc_params.iv_buf = elts->iv; + + d_offs = cipher_data_offset; + d_offs = d_offs << 16; + + d_lens = cipher_data_length; + d_lens = d_lens << 32; + + fc_params.aad_buf.vaddr = elts->aad; + fc_params.aad_buf.size = aad_length; + flags |= ROC_SE_VALID_AAD_BUF; + + if (sess->cpt_ctx.mac_len) + { + flags |= ROC_SE_VALID_MAC_BUF; + fc_params.mac_buf.size = sess->cpt_ctx.mac_len; + fc_params.mac_buf.vaddr = elts->tag; + } + } + else + { + op_minor = ctx->template_w4.s.opcode_minor; + + flags |= ROC_SE_VALID_IV_BUF; + + fc_params.iv_buf = elts->iv; + + d_offs = cipher_data_offset; + d_offs = (d_offs << 16) | auth_data_offset; + + d_lens = cipher_data_length; + d_lens = (d_lens << 32) | auth_data_length; + + if (PREDICT_TRUE (sess->cpt_ctx.mac_len)) + { + if (!(op_minor & ROC_SE_FC_MINOR_OP_HMAC_FIRST)) + { + flags |= ROC_SE_VALID_MAC_BUF; + fc_params.mac_buf.size = sess->cpt_ctx.mac_len; + fc_params.mac_buf.vaddr = elts->digest; + } + } + } + + fc_params.ctx = &sess->cpt_ctx; + + fc_params.src_iov = (void *) src; + + fc_params.src_iov->bufs[index].vaddr = payload; + fc_params.src_iov->bufs[index].size = b->current_length - adj_len; + index++; + + while (buffer->flags & VLIB_BUFFER_NEXT_PRESENT) + { + buffer = vlib_get_buffer (vlib_get_main (), buffer->next_buffer); + fc_params.src_iov->bufs[index].vaddr = + buffer->data + buffer->current_data; + fc_params.src_iov->bufs[index].size = buffer->current_length; + index++; + } + + fc_params.src_iov->buf_cnt = index; + + fc_params.dst_iov = (void *) src; + + fc_params.meta_buf.vaddr = mdata; + fc_params.meta_buf.size = OCT_SCATTER_GATHER_BUFFER_SIZE; + + return oct_crypto_cpt_hmac_prep (flags, d_offs, d_lens, &fc_params, inst, + cpt_op); +} + +static_always_inline u64 +oct_cpt_inst_w7_get (oct_crypto_sess_t *sess, struct roc_cpt *roc_cpt) +{ + union cpt_inst_w7 inst_w7; + + inst_w7.u64 = 0; + inst_w7.s.cptr = (u64) &sess->cpt_ctx.se_ctx.fctx; + /* Set the engine group */ + inst_w7.s.egrp = roc_cpt->eng_grp[CPT_ENG_TYPE_IE]; + + return inst_w7.u64; +} + +static_always_inline i32 +oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, + u32 key_index, u8 type) +{ + vnet_crypto_key_t *crypto_key, *auth_key; + roc_se_cipher_type enc_type = 0; + roc_se_auth_type auth_type = 0; + vnet_crypto_key_t *key; + u32 digest_len = ~0; + i32 rv = 0; + + key = vnet_crypto_get_key (key_index); + + switch (key->async_alg) + { + case VNET_CRYPTO_ALG_AES_128_CBC_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA1_TAG12: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA1_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA224_TAG14: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA224_TAG14: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA224_TAG14: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA224; + digest_len = 14; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA256_TAG16: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA256; + digest_len = 16; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA384_TAG24: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA384; + digest_len = 24; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_192_CBC_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_256_CBC_SHA512_TAG32: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_SHA2_SHA512; + digest_len = 32; + break; + case VNET_CRYPTO_ALG_AES_128_CBC_MD5_TAG12: + case VNET_CRYPTO_ALG_AES_192_CBC_MD5_TAG12: + case VNET_CRYPTO_ALG_AES_256_CBC_MD5_TAG12: + enc_type = ROC_SE_AES_CBC; + auth_type = ROC_SE_MD5_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_AES_128_CTR_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_192_CTR_SHA1_TAG12: + case VNET_CRYPTO_ALG_AES_256_CTR_SHA1_TAG12: + enc_type = ROC_SE_AES_CTR; + auth_type = ROC_SE_SHA1_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_3DES_CBC_MD5_TAG12: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_MD5_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA1_TAG12: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA1_TYPE; + digest_len = 12; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA224_TAG14: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA224; + digest_len = 14; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA256_TAG16: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA256; + digest_len = 16; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA384_TAG24: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA384; + digest_len = 24; + break; + case VNET_CRYPTO_ALG_3DES_CBC_SHA512_TAG32: + enc_type = ROC_SE_DES3_CBC; + auth_type = ROC_SE_SHA2_SHA512; + digest_len = 32; + break; + default: + clib_warning ( + "Cryptodev: Undefined link algo %u specified. Key index %u", + key->async_alg, key_index); + return -1; + } + + if (type == VNET_CRYPTO_OP_TYPE_ENCRYPT) + sess->cpt_ctx.ciph_then_auth = true; + else + sess->cpt_ctx.auth_then_ciph = true; + + sess->iv_length = 16; + sess->cpt_op = type; + + crypto_key = vnet_crypto_get_key (key->index_crypto); + rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, crypto_key->data, + vec_len (crypto_key->data)); + if (rv) + { + clib_warning ("Cryptodev: Error in setting cipher key for enc type %u", + enc_type); + return -1; + } + + auth_key = vnet_crypto_get_key (key->index_integ); + + rv = roc_se_auth_key_set (&sess->cpt_ctx, auth_type, auth_key->data, + vec_len (auth_key->data), digest_len); + if (rv) + { + clib_warning ("Cryptodev: Error in setting auth key for auth type %u", + auth_type); + return -1; + } + + return 0; +} + +static_always_inline i32 +oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, + u32 key_index, u8 type) +{ + vnet_crypto_key_t *key = vnet_crypto_get_key (key_index); + roc_se_cipher_type enc_type = 0; + roc_se_auth_type auth_type = 0; + u32 digest_len = ~0; + i32 rv = 0; + + switch (key->async_alg) + { + case VNET_CRYPTO_ALG_AES_128_GCM: + case VNET_CRYPTO_ALG_AES_192_GCM: + case VNET_CRYPTO_ALG_AES_256_GCM: + enc_type = ROC_SE_AES_GCM; + sess->aes_gcm = 1; + sess->iv_offset = 0; + sess->iv_length = 16; + sess->cpt_ctx.mac_len = 16; + sess->cpt_op = type; + digest_len = 16; + break; + case VNET_CRYPTO_ALG_CHACHA20_POLY1305: + enc_type = ROC_SE_CHACHA20; + auth_type = ROC_SE_POLY1305; + break; + default: + clib_warning ( + "Cryptodev: Undefined cipher algo %u specified. Key index %u", + key->async_alg, key_index); + return -1; + } + + rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, key->data, + vec_len (key->data)); + if (rv) + { + clib_warning ("Cryptodev: Error in setting cipher key for enc type %u", + enc_type); + return -1; + } + + rv = roc_se_auth_key_set (&sess->cpt_ctx, auth_type, NULL, 0, digest_len); + if (rv) + { + clib_warning ("Cryptodev: Error in setting auth key for auth type %u", + auth_type); + return -1; + } + + if (enc_type == ROC_SE_CHACHA20) + sess->cpt_ctx.template_w4.s.opcode_minor |= BIT (5); + + return 0; +} + +static_always_inline i32 +oct_crypto_session_init (vlib_main_t *vm, oct_crypto_sess_t *session, + vnet_crypto_key_index_t key_index, int op_type) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + vnet_crypto_key_t *key; + oct_crypto_dev_t *ocd; + i32 rv = 0; + + ocd = ocm->crypto_dev[op_type]; + + key = vnet_crypto_get_key (key_index); + + if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + rv = oct_crypto_link_session_update (vm, session, key_index, op_type); + else + rv = oct_crypto_aead_session_update (vm, session, key_index, op_type); + + if (rv) + { + oct_crypto_session_free (vm, session); + return -1; + } + + session->crypto_dev = ocd; + + session->cpt_inst_w7 = + oct_cpt_inst_w7_get (session, session->crypto_dev->roc_cpt); + + session->initialised = 1; + + return 0; +} + +static_always_inline void +oct_crypto_update_frame_error_status (vnet_crypto_async_frame_t *f, u32 index, + vnet_crypto_op_status_t s) +{ + u32 i; + + for (i = index; i < f->n_elts; i++) + f->elts[i].status = s; + + if (index == 0) + f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED; +} + +static_always_inline int +oct_crypto_enqueue_enc_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame, + const u8 is_aead, u8 aad_len, const u8 type) +{ + u32 i, enq_tail, enc_auth_len, buffer_index, nb_infl_allowed; + struct cpt_inst_s inst[VNET_CRYPTO_FRAME_SIZE]; + u32 crypto_start_offset, integ_start_offset; + oct_crypto_main_t *ocm = &oct_crypto_main; + vnet_crypto_async_frame_elt_t *elts; + oct_crypto_dev_t *crypto_dev = NULL; + oct_crypto_inflight_req_t *infl_req; + oct_crypto_pending_queue_t *pend_q; + u64 dptr_start_ptr, curr_ptr; + oct_crypto_sess_t *sess; + u32 crypto_total_length; + oct_crypto_key_t *key; + vlib_buffer_t *buffer; + u16 adj_len; + int ret; + + /* GCM packets having 8 bytes of aad and 8 bytes of iv */ + u8 aad_iv = 8 + 8; + + pend_q = &ocm->pend_q[vlib_get_thread_index ()]; + + enq_tail = pend_q->enq_tail; + + nb_infl_allowed = pend_q->n_desc - pend_q->n_crypto_inflight; + if (PREDICT_FALSE (nb_infl_allowed == 0)) + { + oct_crypto_update_frame_error_status ( + frame, 0, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + + infl_req = &pend_q->req_queue[enq_tail]; + infl_req->frame = frame; + + for (i = 0; i < frame->n_elts; i++) + { + elts = &frame->elts[i]; + buffer_index = frame->buffer_indices[i]; + key = vec_elt_at_index (ocm->keys[type], elts->key_index); + + if (PREDICT_FALSE (!key->sess)) + { + oct_crypto_update_frame_error_status ( + frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + + sess = key->sess; + + if (PREDICT_FALSE (!sess->initialised)) + oct_crypto_session_init (vm, sess, elts->key_index, type); + + crypto_dev = sess->crypto_dev; + + clib_memset (inst + i, 0, sizeof (struct cpt_inst_s)); + + buffer = vlib_get_buffer (vm, buffer_index); + + if (is_aead) + { + dptr_start_ptr = + (u64) (buffer->data + (elts->crypto_start_offset - aad_iv)); + curr_ptr = (u64) (buffer->data + buffer->current_data); + adj_len = (u16) (dptr_start_ptr - curr_ptr); + + crypto_total_length = elts->crypto_total_length; + crypto_start_offset = aad_iv; + integ_start_offset = 0; + + ret = oct_crypto_fill_fc_params ( + sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, + (oct_crypto_scatter_gather_t *) (infl_req->sg_data) + i, + crypto_total_length /* cipher_len */, + crypto_start_offset /* cipher_offset */, 0 /* auth_len */, + integ_start_offset /* auth_off */, buffer, adj_len); + if (PREDICT_FALSE (ret < 0)) + { + oct_crypto_update_frame_error_status ( + frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + } + else + { + dptr_start_ptr = (u64) (buffer->data + elts->integ_start_offset); + + enc_auth_len = elts->crypto_total_length + elts->integ_length_adj; + + curr_ptr = (u64) (buffer->data + buffer->current_data); + adj_len = (u16) (dptr_start_ptr - curr_ptr); + + crypto_total_length = elts->crypto_total_length; + crypto_start_offset = + elts->crypto_start_offset - elts->integ_start_offset; + integ_start_offset = 0; + + ret = oct_crypto_fill_fc_params ( + sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, + (oct_crypto_scatter_gather_t *) (infl_req->sg_data) + i, + crypto_total_length /* cipher_len */, + crypto_start_offset /* cipher_offset */, + enc_auth_len /* auth_len */, integ_start_offset /* auth_off */, + buffer, adj_len); + if (PREDICT_FALSE (ret < 0)) + { + oct_crypto_update_frame_error_status ( + frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + } + + inst[i].w7.u64 = sess->cpt_inst_w7; + inst[i].res_addr = (u64) &infl_req->res[i]; + } + + oct_crypto_burst_submit (crypto_dev, inst, frame->n_elts); + + infl_req->elts = frame->n_elts; + OCT_MOD_INC (pend_q->enq_tail, pend_q->n_desc); + pend_q->n_crypto_inflight++; + + return 0; +} + +int +oct_crypto_enqueue_linked_alg_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_enc_dec ( + vm, frame, 0 /* is_aead */, 0 /* aad_len */, VNET_CRYPTO_OP_TYPE_ENCRYPT); +} + +int +oct_crypto_enqueue_linked_alg_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_enc_dec ( + vm, frame, 0 /* is_aead */, 0 /* aad_len */, VNET_CRYPTO_OP_TYPE_DECRYPT); +} + +int +oct_crypto_enqueue_aead_aad_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame, u8 aad_len) +{ + return oct_crypto_enqueue_enc_dec (vm, frame, 1 /* is_aead */, aad_len, + VNET_CRYPTO_OP_TYPE_ENCRYPT); +} + +static_always_inline int +oct_crypto_enqueue_aead_aad_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame, u8 aad_len) +{ + return oct_crypto_enqueue_enc_dec (vm, frame, 1 /* is_aead */, aad_len, + VNET_CRYPTO_OP_TYPE_DECRYPT); +} + +int +oct_crypto_enqueue_aead_aad_8_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_enc (vm, frame, 8); +} + +int +oct_crypto_enqueue_aead_aad_12_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_enc (vm, frame, 12); +} + +int +oct_crypto_enqueue_aead_aad_0_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_enc (vm, frame, 0); +} + +int +oct_crypto_enqueue_aead_aad_8_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_dec (vm, frame, 8); +} + +int +oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_dec (vm, frame, 12); +} + +int +oct_crypto_enqueue_aead_aad_0_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame) +{ + return oct_crypto_enqueue_aead_aad_dec (vm, frame, 0); +} + +vnet_crypto_async_frame_t * +oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, + u32 *enqueue_thread_idx) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + u32 deq_head, status = VNET_CRYPTO_OP_STATUS_COMPLETED; + vnet_crypto_async_frame_elt_t *fe = NULL; + oct_crypto_inflight_req_t *infl_req; + oct_crypto_pending_queue_t *pend_q; + vnet_crypto_async_frame_t *frame; + volatile union cpt_res_s *res; + int i; + + pend_q = &ocm->pend_q[vlib_get_thread_index ()]; + + if (!pend_q->n_crypto_inflight) + return NULL; + + deq_head = pend_q->deq_head; + infl_req = &pend_q->req_queue[deq_head]; + frame = infl_req->frame; + + fe = frame->elts; + + for (i = infl_req->deq_elts; i < infl_req->elts; ++i) + { + res = &infl_req->res[i]; + + if (PREDICT_FALSE (res->cn10k.compcode == CPT_COMP_NOT_DONE)) + return NULL; + + if (PREDICT_FALSE (res->cn10k.uc_compcode)) + { + if (res->cn10k.uc_compcode == ROC_SE_ERR_GC_ICV_MISCOMPARE) + status = fe[i].status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; + else + status = fe[i].status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR; + } + + infl_req->deq_elts++; + } + + clib_memset ((void *) infl_req->res, 0, + sizeof (union cpt_res_s) * VNET_CRYPTO_FRAME_SIZE); + + OCT_MOD_INC (pend_q->deq_head, pend_q->n_desc); + pend_q->n_crypto_inflight--; + + frame->state = status == VNET_CRYPTO_OP_STATUS_COMPLETED ? + VNET_CRYPTO_FRAME_STATE_SUCCESS : + VNET_CRYPTO_FRAME_STATE_ELT_ERROR; + + *nb_elts_processed = frame->n_elts; + *enqueue_thread_idx = frame->enqueue_thread_index; + + infl_req->deq_elts = 0; + infl_req->elts = 0; + + return frame; +} + +int +oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev) +{ + u32 engine_index; + + engine_index = vnet_crypto_register_engine (vm, "oct_cryptodev", 100, + "OCT Cryptodev Engine"); + +#define _(n, k, t, a) \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \ + oct_crypto_enqueue_aead_aad_##a##_enc); \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \ + oct_crypto_enqueue_aead_aad_##a##_dec); + foreach_oct_crypto_aead_async_alg +#undef _ + +#define _(c, h, k, d) \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \ + oct_crypto_enqueue_linked_alg_enc); \ + vnet_crypto_register_enqueue_handler ( \ + vm, engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \ + oct_crypto_enqueue_linked_alg_dec); + foreach_oct_crypto_link_async_alg; +#undef _ + + vnet_crypto_register_dequeue_handler (vm, engine_index, + oct_crypto_frame_dequeue); + + vnet_crypto_register_key_handler (vm, engine_index, oct_crypto_key_handler); + + return 0; +} + +int +oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev) +{ + oct_crypto_main_t *ocm = &oct_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + extern oct_plt_init_param_t oct_plt_init_param; + oct_crypto_inflight_req_t *infl_req_queue; + u32 n_inflight_req; + int i, j = 0; + + ocm->pend_q = oct_plt_init_param.oct_plt_zmalloc ( + tm->n_vlib_mains * sizeof (oct_crypto_pending_queue_t), + CLIB_CACHE_LINE_BYTES); + if (ocm->pend_q == NULL) + { + log_err (dev, "Failed to allocate memory for crypto pending queue"); + return -1; + } + + /* + * Each pending queue will get number of cpt desc / number of cores. + * And that desc count is shared across inflight entries. + */ + n_inflight_req = + (OCT_CPT_LF_MAX_NB_DESC / tm->n_vlib_mains) / VNET_CRYPTO_FRAME_SIZE; + + for (i = 0; i < tm->n_vlib_mains; ++i) + { + ocm->pend_q[i].n_desc = n_inflight_req; + + ocm->pend_q[i].req_queue = oct_plt_init_param.oct_plt_zmalloc ( + ocm->pend_q[i].n_desc * sizeof (oct_crypto_inflight_req_t), + CLIB_CACHE_LINE_BYTES); + if (ocm->pend_q[i].req_queue == NULL) + { + log_err (dev, + "Failed to allocate memory for crypto inflight request"); + goto free; + } + + for (j = 0; j <= ocm->pend_q[i].n_desc; ++j) + { + infl_req_queue = &ocm->pend_q[i].req_queue[j]; + + infl_req_queue->sg_data = oct_plt_init_param.oct_plt_zmalloc ( + OCT_SCATTER_GATHER_BUFFER_SIZE * VNET_CRYPTO_FRAME_SIZE, + CLIB_CACHE_LINE_BYTES); + if (infl_req_queue->sg_data == NULL) + { + log_err (dev, "Failed to allocate crypto scatter gather memory"); + goto free; + } + } + } + return 0; +free: + for (; i >= 0; i--) + { + if (ocm->pend_q[i].req_queue == NULL) + continue; + for (; j >= 0; j--) + { + infl_req_queue = &ocm->pend_q[i].req_queue[j]; + + if (infl_req_queue->sg_data == NULL) + continue; + + oct_plt_init_param.oct_plt_free (infl_req_queue->sg_data); + } + oct_plt_init_param.oct_plt_free (ocm->pend_q[i].req_queue); + } + oct_plt_init_param.oct_plt_free (ocm->pend_q); + + return -1; +} diff --git a/src/plugins/dev_octeon/crypto.h b/src/plugins/dev_octeon/crypto.h new file mode 100644 index 00000000000..27e1f600c68 --- /dev/null +++ b/src/plugins/dev_octeon/crypto.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#ifndef _CRYPTO_H_ +#define _CRYPTO_H_ +#include <vnet/crypto/crypto.h> +#include <vnet/ip/ip.h> + +#define OCT_MAX_N_CPT_DEV 2 + +#define OCT_CPT_LF_MAX_NB_DESC 128000 + +/* CRYPTO_ID, KEY_LENGTH_IN_BYTES, TAG_LEN, AAD_LEN */ +#define foreach_oct_crypto_aead_async_alg \ + _ (AES_128_GCM, 16, 16, 8) \ + _ (AES_128_GCM, 16, 16, 12) \ + _ (AES_192_GCM, 24, 16, 8) \ + _ (AES_192_GCM, 24, 16, 12) \ + _ (AES_256_GCM, 32, 16, 8) \ + _ (AES_256_GCM, 32, 16, 12) \ + _ (CHACHA20_POLY1305, 32, 16, 8) \ + _ (CHACHA20_POLY1305, 32, 16, 12) \ + _ (CHACHA20_POLY1305, 32, 16, 0) + +/* CRYPTO_ID, INTEG_ID, KEY_LENGTH_IN_BYTES, DIGEST_LEN */ +#define foreach_oct_crypto_link_async_alg \ + _ (AES_128_CBC, SHA1, 16, 12) \ + _ (AES_192_CBC, SHA1, 24, 12) \ + _ (AES_256_CBC, SHA1, 32, 12) \ + _ (AES_128_CBC, SHA256, 16, 16) \ + _ (AES_192_CBC, SHA256, 24, 16) \ + _ (AES_256_CBC, SHA256, 32, 16) \ + _ (AES_128_CBC, SHA384, 16, 24) \ + _ (AES_192_CBC, SHA384, 24, 24) \ + _ (AES_256_CBC, SHA384, 32, 24) \ + _ (AES_128_CBC, SHA512, 16, 32) \ + _ (AES_192_CBC, SHA512, 24, 32) \ + _ (AES_256_CBC, SHA512, 32, 32) \ + _ (AES_128_CBC, MD5, 16, 12) \ + _ (AES_192_CBC, MD5, 24, 12) \ + _ (AES_256_CBC, MD5, 32, 12) \ + _ (3DES_CBC, MD5, 24, 12) \ + _ (3DES_CBC, SHA1, 24, 12) \ + _ (3DES_CBC, SHA256, 24, 16) \ + _ (3DES_CBC, SHA384, 24, 24) \ + _ (3DES_CBC, SHA512, 24, 32) \ + _ (AES_128_CTR, SHA1, 16, 12) \ + _ (AES_192_CTR, SHA1, 24, 12) \ + _ (AES_256_CTR, SHA1, 32, 12) + +#define OCT_MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++) + +#define OCT_SCATTER_GATHER_BUFFER_SIZE 1024 + +#define CPT_LMT_SIZE_COPY (sizeof (struct cpt_inst_s) / 16) +#define OCT_MAX_LMT_SZ 16 + +#define SRC_IOV_SIZE \ + (sizeof (struct roc_se_iov_ptr) + \ + (sizeof (struct roc_se_buf_ptr) * ROC_MAX_SG_CNT)) + +#define OCT_CPT_LMT_GET_LINE_ADDR(lmt_addr, lmt_num) \ + (void *) ((u64) (lmt_addr) + ((u64) (lmt_num) << ROC_LMT_LINE_SIZE_LOG2)) + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + struct roc_cpt *roc_cpt; + struct roc_cpt_lmtline lmtline; + struct roc_cpt_lf lf; + vnet_dev_t *dev; +} oct_crypto_dev_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + /** CPT opcode */ + u16 cpt_op : 4; + /** Flag for AES GCM */ + u16 aes_gcm : 1; + /** IV length in bytes */ + u8 iv_length; + /** Auth IV length in bytes */ + u8 auth_iv_length; + /** IV offset in bytes */ + u16 iv_offset; + /** Auth IV offset in bytes */ + u16 auth_iv_offset; + /** CPT inst word 7 */ + u64 cpt_inst_w7; + /* initialise as part of first packet */ + u8 initialised; + /* store link key index in case of linked algo */ + vnet_crypto_key_index_t key_index; + oct_crypto_dev_t *crypto_dev; + struct roc_se_ctx cpt_ctx; +} oct_crypto_sess_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + oct_crypto_sess_t *sess; + oct_crypto_dev_t *crypto_dev; +} oct_crypto_key_t; + +typedef struct oct_crypto_scatter_gather +{ + u8 buf[OCT_SCATTER_GATHER_BUFFER_SIZE]; +} oct_crypto_scatter_gather_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + /** Result data of all entries in the frame */ + volatile union cpt_res_s res[VNET_CRYPTO_FRAME_SIZE]; + /** Scatter gather data */ + void *sg_data; + /** Frame pointer */ + vnet_crypto_async_frame_t *frame; + /** Number of async elements in frame */ + u16 elts; + /** Next read entry in frame, when dequeue */ + u16 deq_elts; +} oct_crypto_inflight_req_t; + +typedef struct +{ + /** Array of pending request */ + oct_crypto_inflight_req_t *req_queue; + /** Number of inflight operations in queue */ + u32 n_crypto_inflight; + /** Tail of queue to be used for enqueue */ + u16 enq_tail; + /** Head of queue to be used for dequeue */ + u16 deq_head; + /** Number of descriptors */ + u16 n_desc; +} oct_crypto_pending_queue_t; + +typedef struct +{ + oct_crypto_dev_t *crypto_dev[OCT_MAX_N_CPT_DEV]; + oct_crypto_key_t *keys[VNET_CRYPTO_ASYNC_OP_N_TYPES]; + oct_crypto_pending_queue_t *pend_q; + int n_cpt; + u8 started; +} oct_crypto_main_t; + +extern oct_crypto_main_t oct_crypto_main; + +void oct_crypto_key_del_handler (vlib_main_t *vm, + vnet_crypto_key_index_t key_index); + +void oct_crypto_key_add_handler (vlib_main_t *vm, + vnet_crypto_key_index_t key_index); + +void oct_crypto_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop, + vnet_crypto_key_index_t idx); + +int oct_crypto_enqueue_linked_alg_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_linked_alg_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_8_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_12_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_0_enc (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_8_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +int oct_crypto_enqueue_aead_aad_0_dec (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); +vnet_crypto_async_frame_t *oct_crypto_frame_dequeue (vlib_main_t *vm, + u32 *nb_elts_processed, + u32 *enqueue_thread_idx); +int oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev); +int oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev); +#endif /* _CRYPTO_H_ */ diff --git a/src/plugins/dev_octeon/flow.c b/src/plugins/dev_octeon/flow.c index 5bef25f5369..e86425ec85d 100644 --- a/src/plugins/dev_octeon/flow.c +++ b/src/plugins/dev_octeon/flow.c @@ -131,6 +131,7 @@ oct_flow_validate_params (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_cfg_type_t type, u32 flow_index, uword *priv_data) { + vnet_dev_port_interfaces_t *ifs = port->interfaces; vnet_flow_t *flow = vnet_get_flow (flow_index); u32 last_queue; u32 qid; @@ -151,11 +152,11 @@ oct_flow_validate_params (vlib_main_t *vm, vnet_dev_port_t *port, if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE) { qid = flow->redirect_queue; - if (qid > port->intf.num_rx_queues - 1 || qid < 0) + if (qid > ifs->num_rx_queues - 1 || qid < 0) { log_err (port->dev, "Given Q(%d) is invalid, supported range is %d-%d", qid, 0, - port->intf.num_rx_queues - 1); + ifs->num_rx_queues - 1); return VNET_DEV_ERR_NOT_SUPPORTED; } } @@ -163,12 +164,12 @@ oct_flow_validate_params (vlib_main_t *vm, vnet_dev_port_t *port, if (flow->actions & VNET_FLOW_ACTION_RSS) { last_queue = flow->queue_index + flow->queue_num; - if (last_queue > port->intf.num_rx_queues - 1) + if (last_queue > ifs->num_rx_queues - 1) { log_err (port->dev, "Given Q range(%d-%d) is invalid, supported range is %d-%d", flow->queue_index, flow->queue_index + flow->queue_num, 0, - port->intf.num_rx_queues - 1); + ifs->num_rx_queues - 1); return VNET_DEV_ERR_NOT_SUPPORTED; } } @@ -538,6 +539,7 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, struct roc_npc_item_info item_info[ROC_NPC_ITEM_TYPE_END] = {}; struct roc_npc_action actions[ROC_NPC_ITEM_TYPE_END] = {}; oct_port_t *oct_port = vnet_dev_get_port_data (port); + vnet_dev_port_interfaces_t *ifs = port->interfaces; ethernet_header_t eth_spec = {}, eth_mask = {}; sctp_header_t sctp_spec = {}, sctp_mask = {}; gtpu_header_t gtpu_spec = {}, gtpu_mask = {}; @@ -775,7 +777,7 @@ parse_flow_actions: log_err (port->dev, "RSS action has no queues"); return VNET_DEV_ERR_NOT_SUPPORTED; } - queues = clib_mem_alloc (sizeof (u16) * port->intf.num_rx_queues); + queues = clib_mem_alloc (sizeof (u16) * ifs->num_rx_queues); for (index = 0; index < flow->queue_num; index++) queues[index] = flow->queue_index++; diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c index 2f0c82c1c01..99cadddfc24 100644 --- a/src/plugins/dev_octeon/init.c +++ b/src/plugins/dev_octeon/init.c @@ -10,6 +10,7 @@ #include <vnet/plugin/plugin.h> #include <vpp/app/version.h> #include <dev_octeon/octeon.h> +#include <dev_octeon/crypto.h> #include <base/roc_api.h> #include <common.h> @@ -54,7 +55,9 @@ static struct _ (0xa064, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"), _ (0xa0f8, LBK_VF, "Marvell Octeon Loopback Unit VF"), _ (0xa0f7, SDP_VF, "Marvell Octeon System DPI Packet Interface Unit VF"), - _ (0xa0f3, CPT_VF, "Marvell Octeon Cryptographic Accelerator Unit VF"), + _ (0xa0f3, O10K_CPT_VF, + "Marvell Octeon-10 Cryptographic Accelerator Unit VF"), + _ (0xa0fe, O9K_CPT_VF, "Marvell Octeon-9 Cryptographic Accelerator Unit VF"), #undef _ }; @@ -191,17 +194,113 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) return vnet_dev_port_add (vm, dev, 0, &port_add_args); } +static int +oct_conf_cpt (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd, + int nb_lf) +{ + struct roc_cpt *roc_cpt = ocd->roc_cpt; + int rrv; + + if ((rrv = roc_cpt_eng_grp_add (roc_cpt, CPT_ENG_TYPE_SE)) < 0) + { + log_err (dev, "Could not add CPT SE engines"); + return cnx_return_roc_err (dev, rrv, "roc_cpt_eng_grp_add"); + } + if ((rrv = roc_cpt_eng_grp_add (roc_cpt, CPT_ENG_TYPE_IE)) < 0) + { + log_err (dev, "Could not add CPT IE engines"); + return cnx_return_roc_err (dev, rrv, "roc_cpt_eng_grp_add"); + } + if (roc_cpt->eng_grp[CPT_ENG_TYPE_IE] != ROC_CPT_DFLT_ENG_GRP_SE_IE) + { + log_err (dev, "Invalid CPT IE engine group configuration"); + return -1; + } + if (roc_cpt->eng_grp[CPT_ENG_TYPE_SE] != ROC_CPT_DFLT_ENG_GRP_SE) + { + log_err (dev, "Invalid CPT SE engine group configuration"); + return -1; + } + if ((rrv = roc_cpt_dev_configure (roc_cpt, nb_lf, false, 0)) < 0) + { + log_err (dev, "could not configure crypto device %U", + format_vlib_pci_addr, roc_cpt->pci_dev->addr); + return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_configure"); + } + return 0; +} + +static vnet_dev_rv_t +oct_conf_cpt_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd) +{ + struct roc_cpt *roc_cpt = ocd->roc_cpt; + struct roc_cpt_lmtline *cpt_lmtline; + struct roc_cpt_lf *cpt_lf; + int rrv; + + cpt_lf = &ocd->lf; + cpt_lmtline = &ocd->lmtline; + + cpt_lf->nb_desc = OCT_CPT_LF_MAX_NB_DESC; + cpt_lf->lf_id = 0; + if ((rrv = roc_cpt_lf_init (roc_cpt, cpt_lf)) < 0) + return cnx_return_roc_err (dev, rrv, "roc_cpt_lf_init"); + + roc_cpt_iq_enable (cpt_lf); + + if ((rrv = roc_cpt_lmtline_init (roc_cpt, cpt_lmtline, 0) < 0)) + return cnx_return_roc_err (dev, rrv, "roc_cpt_lmtline_init"); + + return 0; +} + static vnet_dev_rv_t oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev) { + oct_crypto_main_t *ocm = &oct_crypto_main; + extern oct_plt_init_param_t oct_plt_init_param; oct_device_t *cd = vnet_dev_get_data (dev); + oct_crypto_dev_t *ocd = NULL; int rrv; - struct roc_cpt cpt = { - .pci_dev = &cd->plt_pci_dev, - }; - if ((rrv = roc_cpt_dev_init (&cpt))) + if (ocm->n_cpt == OCT_MAX_N_CPT_DEV || ocm->started) + return VNET_DEV_ERR_NOT_SUPPORTED; + + ocd = oct_plt_init_param.oct_plt_zmalloc (sizeof (oct_crypto_dev_t), + CLIB_CACHE_LINE_BYTES); + + ocd->roc_cpt = oct_plt_init_param.oct_plt_zmalloc (sizeof (struct roc_cpt), + CLIB_CACHE_LINE_BYTES); + ocd->roc_cpt->pci_dev = &cd->plt_pci_dev; + + ocd->dev = dev; + + if ((rrv = roc_cpt_dev_init (ocd->roc_cpt))) return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_init"); + + if ((rrv = oct_conf_cpt (vm, dev, ocd, 1))) + return rrv; + + if ((rrv = oct_conf_cpt_queue (vm, dev, ocd))) + return rrv; + + if (!ocm->n_cpt) + { + /* + * Initialize s/w queues, which are common across multiple + * crypto devices + */ + oct_conf_sw_queue (vm, dev); + + ocm->crypto_dev[0] = ocd; + } + + ocm->crypto_dev[1] = ocd; + + oct_init_crypto_engine_handlers (vm, dev); + + ocm->n_cpt++; + return VNET_DEV_OK; } @@ -256,7 +355,8 @@ oct_init (vlib_main_t *vm, vnet_dev_t *dev) case OCT_DEVICE_TYPE_SDP_VF: return oct_init_nix (vm, dev); - case OCT_DEVICE_TYPE_CPT_VF: + case OCT_DEVICE_TYPE_O10K_CPT_VF: + case OCT_DEVICE_TYPE_O9K_CPT_VF: return oct_init_cpt (vm, dev); default: diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h index a87a5e3e1ed..ccf8f62880d 100644 --- a/src/plugins/dev_octeon/octeon.h +++ b/src/plugins/dev_octeon/octeon.h @@ -30,7 +30,8 @@ typedef enum OCT_DEVICE_TYPE_RVU_VF, OCT_DEVICE_TYPE_LBK_VF, OCT_DEVICE_TYPE_SDP_VF, - OCT_DEVICE_TYPE_CPT_VF, + OCT_DEVICE_TYPE_O10K_CPT_VF, + OCT_DEVICE_TYPE_O9K_CPT_VF, } __clib_packed oct_device_type_t; typedef struct @@ -41,7 +42,6 @@ typedef struct u8 full_duplex : 1; u32 speed; struct plt_pci_device plt_pci_dev; - struct roc_cpt cpt; struct roc_nix *nix; } oct_device_t; @@ -102,7 +102,6 @@ typedef struct u64 aura_handle; u64 io_addr; void *lmt_addr; - oct_npa_batch_alloc_cl128_t *ba_buffer; u8 ba_first_cl; u8 ba_num_cl; diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c index 528683fa3c7..f8a7d6ba7db 100644 --- a/src/plugins/dev_octeon/port.c +++ b/src/plugins/dev_octeon/port.c @@ -129,6 +129,7 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_t *dev = port->dev; oct_device_t *cd = vnet_dev_get_data (dev); oct_port_t *cp = vnet_dev_get_port_data (port); + vnet_dev_port_interfaces_t *ifs = port->interfaces; u8 mac_addr[PLT_ETHER_ADDR_LEN]; struct roc_nix *nix = cd->nix; vnet_dev_rv_t rv; @@ -136,14 +137,14 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) log_debug (dev, "port init: port %u", port->port_id); - if ((rrv = roc_nix_lf_alloc (nix, port->intf.num_rx_queues, - port->intf.num_tx_queues, rxq_cfg))) + if ((rrv = roc_nix_lf_alloc (nix, ifs->num_rx_queues, ifs->num_tx_queues, + rxq_cfg))) { oct_port_deinit (vm, port); return oct_roc_err ( dev, rrv, "roc_nix_lf_alloc(nb_rxq = %u, nb_txq = %d, rxq_cfg=0x%lx) failed", - port->intf.num_rx_queues, port->intf.num_tx_queues, rxq_cfg); + ifs->num_rx_queues, ifs->num_tx_queues, rxq_cfg); } cp->lf_allocated = 1; @@ -428,6 +429,7 @@ oct_port_start (vlib_main_t *vm, vnet_dev_port_t *port) { vnet_dev_t *dev = port->dev; oct_device_t *cd = vnet_dev_get_data (dev); + oct_port_t *cp = vnet_dev_get_port_data (port); struct roc_nix *nix = cd->nix; struct roc_nix_eeprom_info eeprom_info = {}; vnet_dev_rv_t rv; @@ -451,6 +453,12 @@ oct_port_start (vlib_main_t *vm, vnet_dev_port_t *port) goto done; } + if ((rrv = roc_npc_mcam_enable_all_entries (&cp->npc, true))) + { + rv = oct_roc_err (dev, rrv, "roc_npc_mcam_enable_all_entries() failed"); + goto done; + } + vnet_dev_poll_port_add (vm, port, 0.5, oct_port_poll); if (roc_nix_eeprom_info_get (nix, &eeprom_info) == 0) @@ -469,6 +477,7 @@ oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) { vnet_dev_t *dev = port->dev; oct_device_t *cd = vnet_dev_get_data (dev); + oct_port_t *cp = vnet_dev_get_port_data (port); struct roc_nix *nix = cd->nix; int rrv; @@ -476,6 +485,14 @@ oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_poll_port_remove (vm, port, oct_port_poll); + /* Disable all the NPC entries */ + rrv = roc_npc_mcam_enable_all_entries (&cp->npc, false); + if (rrv) + { + oct_roc_err (dev, rrv, "roc_npc_mcam_enable_all_entries() failed"); + return; + } + rrv = roc_nix_npc_rx_ena_dis (nix, false); if (rrv) { @@ -575,6 +592,10 @@ oct_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port, rv = oct_roc_err (dev, rrv, "roc_nix_mac_addr_set() failed"); } } + + rrv = roc_nix_rss_default_setup (nix, default_rss_flowkey); + if (rrv) + rv = oct_roc_err (dev, rrv, "roc_nix_rss_default_setup() failed"); } } diff --git a/src/plugins/dev_octeon/roc_helper.c b/src/plugins/dev_octeon/roc_helper.c index 16e0a871a9d..c1166b654cf 100644 --- a/src/plugins/dev_octeon/roc_helper.c +++ b/src/plugins/dev_octeon/roc_helper.c @@ -75,13 +75,12 @@ oct_drv_physmem_alloc (vlib_main_t *vm, u32 size, u32 align) if (align) { - /* Force cache line alloc in case alignment is less than cache line */ - align = align < CLIB_CACHE_LINE_BYTES ? CLIB_CACHE_LINE_BYTES : align; + /* Force ROC align alloc in case alignment is less than ROC align */ + align = align < ROC_ALIGN ? ROC_ALIGN : align; mem = vlib_physmem_alloc_aligned_on_numa (vm, size, align, 0); } else - mem = - vlib_physmem_alloc_aligned_on_numa (vm, size, CLIB_CACHE_LINE_BYTES, 0); + mem = vlib_physmem_alloc_aligned_on_numa (vm, size, ROC_ALIGN, 0); if (!mem) return NULL; diff --git a/src/plugins/dev_octeon/rx_node.c b/src/plugins/dev_octeon/rx_node.c index b057c4d7047..833227eeea8 100644 --- a/src/plugins/dev_octeon/rx_node.c +++ b/src/plugins/dev_octeon/rx_node.c @@ -103,7 +103,7 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vnet_dev_rx_queue_t *rxq, u32 n) { oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); - vlib_buffer_template_t bt = rxq->buffer_template; + vlib_buffer_template_t bt = vnet_dev_get_rx_queue_if_buffer_template (rxq); u32 b0_err_flags = 0, b1_err_flags = 0; u32 b2_err_flags = 0, b3_err_flags = 0; u32 n_left, err_flags = 0; @@ -347,9 +347,9 @@ oct_rx_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, oct_nix_rx_cqe_desc_t *descs = crq->cq.desc_base; oct_nix_lf_cq_op_status_t status; oct_rx_node_ctx_t _ctx = { - .next_index = rxq->next_index, - .sw_if_index = port->intf.sw_if_index, - .hw_if_index = port->intf.hw_if_index, + .next_index = vnet_dev_get_rx_queue_if_next_index(rxq), + .sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq), + .hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq), }, *ctx = &_ctx; /* get head and tail from NIX_LF_CQ_OP_STATUS */ diff --git a/src/plugins/dhcp/client.c b/src/plugins/dhcp/client.c index 8fa67c616b2..d81d2935577 100644 --- a/src/plugins/dhcp/client.c +++ b/src/plugins/dhcp/client.c @@ -1153,7 +1153,9 @@ dhcp_client_set_command_fn (vlib_main_t * vm, a->is_add = is_add; a->sw_if_index = sw_if_index; a->hostname = hostname; - a->client_identifier = format (0, "vpp 1.1%c", 0); + a->client_identifier = + format (0, "%U", format_ethernet_address, + vnet_sw_interface_get_hw_address (vnet_get_main (), sw_if_index)); a->set_broadcast_flag = set_broadcast_flag; /* diff --git a/src/plugins/dhcp/dhcp4_proxy_node.c b/src/plugins/dhcp/dhcp4_proxy_node.c index 2b49d49bb7f..740ae8043e0 100644 --- a/src/plugins/dhcp/dhcp4_proxy_node.c +++ b/src/plugins/dhcp/dhcp4_proxy_node.c @@ -321,7 +321,8 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, o->length += id_len + 5; } - len = o->length + 3; + /* 2 bytes for option header 82+len */ + len = o->length + 2; b0->current_length += len; /* Fix IP header length and checksum */ old_l0 = ip0->length; diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index a069fbe3818..2440439989f 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -131,7 +131,6 @@ typedef struct u32 interface_number_from_port_id : 1; u32 use_intel_phdr_cksum : 1; u32 int_unmaskable : 1; - vlib_simple_counter_main_t *xstats_counters; } dpdk_driver_t; dpdk_driver_t *dpdk_driver_find (const char *name, const char **desc); @@ -211,6 +210,8 @@ typedef struct struct rte_eth_stats last_stats; struct rte_eth_xstat *xstats; f64 time_last_stats_update; + vlib_simple_counter_main_t xstats_counters; + u32 *xstats_symlinks; /* mac address */ u8 *default_mac_address; diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h index e5b5a35df80..794953da55e 100644 --- a/src/plugins/dpdk/device/dpdk_priv.h +++ b/src/plugins/dpdk/device/dpdk_priv.h @@ -54,39 +54,28 @@ dpdk_get_xstats (dpdk_device_t *xd, u32 thread_index) { int ret; int i; - int len; if (!(xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)) return; - if (xd->driver == 0) - return; - len = rte_eth_xstats_get (xd->port_id, NULL, 0); - if (len < 0) - return; - - vec_validate (xd->xstats, len - 1); - ret = rte_eth_xstats_get (xd->port_id, xd->xstats, len); - if (ret < 0 || ret > len) + ret = rte_eth_xstats_get (xd->port_id, xd->xstats, vec_len (xd->xstats)); + if (ret < 0) { - /* Failed, expand vector and try again on next time around the track. */ - vec_validate (xd->xstats, ret - 1); - vec_set_len (xd->xstats, 0); dpdk_log_warn ("rte_eth_xstats_get(%d) failed: %d", xd->port_id, ret); return; } - if (len == vec_len (xd->driver->xstats_counters)) + else if (ret != vec_len (xd->xstats)) { - vec_foreach_index (i, xd->xstats) - { - vlib_set_simple_counter (&xd->driver->xstats_counters[i], - thread_index, xd->sw_if_index, - xd->xstats[i].value); - } + dpdk_log_warn ( + "rte_eth_xstats_get(%d) returned %d/%d stats. Resetting counters.", + xd->port_id, ret, vec_len (xd->xstats)); + dpdk_counters_xstats_init (xd); + return; } - else + + vec_foreach_index (i, xd->xstats) { - dpdk_log_warn ("rte_eth_xstats_get vector size mismatch (%d/%d", len, - vec_len (xd->driver->xstats_counters)); + vlib_set_simple_counter (&xd->xstats_counters, thread_index, i, + xd->xstats[i].value); } } diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index fa1b234874d..ec9e6045de7 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -227,71 +227,71 @@ dpdk_find_startup_config (struct rte_eth_dev_info *di) } /* - * Initialise or refresh the xstats counters for a device + * Initialise the xstats counters for a device */ void dpdk_counters_xstats_init (dpdk_device_t *xd) { int len, ret, i; struct rte_eth_xstat_name *xstats_names = 0; - char *name; - dpdk_driver_t *dr = xd->driver; - /* Only support xstats for supported drivers */ - if (!dr) - return; + if (vec_len (xd->xstats_symlinks) > 0) + { + /* xstats already initialized. Reset counters */ + vec_foreach_index (i, xd->xstats_symlinks) + { + vlib_stats_remove_entry (xd->xstats_symlinks[i]); + } + } + else + { + xd->xstats_counters.stat_segment_name = + (char *) format (0, "/if/xstats/%d%c", xd->sw_if_index, 0); + xd->xstats_counters.counters = 0; + } len = rte_eth_xstats_get_names (xd->port_id, 0, 0); if (len < 0) { - dpdk_log_err ("[%u] rte_eth_xstats_get_names failed: %d", xd->port_id, - len); - return; - } - /* Counters for this driver is already initialised */ - if (vec_len (dr->xstats_counters) == len) - { - vec_foreach_index (i, dr->xstats_counters) - { - vlib_validate_simple_counter (&dr->xstats_counters[i], - xd->sw_if_index); - vlib_zero_simple_counter (&dr->xstats_counters[i], xd->sw_if_index); - } + dpdk_log_err ("[%u] rte_eth_xstats_get_names failed: %d. DPDK xstats " + "not configured.", + xd->port_id, len); return; } - /* Same driver, different interface, different length of counter array. */ - ASSERT (vec_len (dr->xstats_counters) == 0); + vlib_validate_simple_counter (&xd->xstats_counters, len); + vlib_zero_simple_counter (&xd->xstats_counters, len); vec_validate (xstats_names, len - 1); + vec_validate (xd->xstats, len - 1); + vec_validate (xd->xstats_symlinks, len - 1); ret = rte_eth_xstats_get_names (xd->port_id, xstats_names, len); if (ret >= 0 && ret <= len) { - vec_validate (dr->xstats_counters, len - 1); vec_foreach_index (i, xstats_names) { - name = (char *) format (0, "/if/%s/%s%c", dr->drivers->name, - xstats_names[i].name, 0); - /* There is a bug in the ENA driver where the xstats names are not * unique. */ - if (vlib_stats_find_entry_index (name) != STAT_SEGMENT_INDEX_INVALID) + xd->xstats_symlinks[i] = vlib_stats_add_symlink ( + xd->xstats_counters.stats_entry_index, i, "/interfaces/%U/%s%c", + format_vnet_sw_if_index_name, vnet_get_main (), xd->sw_if_index, + xstats_names[i].name, 0); + if (xd->xstats_symlinks[i] == STAT_SEGMENT_INDEX_INVALID) { - vec_free (name); - name = (char *) format (0, "/if/%s/%s_%d%c", dr->drivers->name, - xstats_names[i].name, i, 0); + xd->xstats_symlinks[i] = vlib_stats_add_symlink ( + xd->xstats_counters.stats_entry_index, i, + "/interfaces/%U/%s_%d%c", format_vnet_sw_if_index_name, + vnet_get_main (), xd->sw_if_index, xstats_names[i].name, i, 0); } - - dr->xstats_counters[i].name = name; - dr->xstats_counters[i].stat_segment_name = name; - dr->xstats_counters[i].counters = 0; - vlib_validate_simple_counter (&dr->xstats_counters[i], - xd->sw_if_index); - vlib_zero_simple_counter (&dr->xstats_counters[i], xd->sw_if_index); - vec_free (name); } } + else + { + dpdk_log_err ("[%u] rte_eth_xstats_get_names failed: %d. DPDK xstats " + "not configured.", + xd->port_id, ret); + } vec_free (xstats_names); } @@ -503,6 +503,14 @@ dpdk_lib_init (dpdk_main_t * dm) else if (dr && dr->n_tx_desc) xd->conf.n_tx_desc = dr->n_tx_desc; + if (xd->conf.n_tx_desc > di.tx_desc_lim.nb_max) + { + dpdk_log_warn ("[%u] Configured number of TX descriptors (%u) is " + "bigger than maximum supported (%u)", + port_id, xd->conf.n_tx_desc, di.tx_desc_lim.nb_max); + xd->conf.n_tx_desc = di.tx_desc_lim.nb_max; + } + dpdk_log_debug ( "[%u] n_rx_queues: %u n_tx_queues: %u n_rx_desc: %u n_tx_desc: %u", port_id, xd->conf.n_rx_queues, xd->conf.n_tx_queues, diff --git a/src/plugins/hs_apps/CMakeLists.txt b/src/plugins/hs_apps/CMakeLists.txt index ba03e393f44..eae100949d4 100644 --- a/src/plugins/hs_apps/CMakeLists.txt +++ b/src/plugins/hs_apps/CMakeLists.txt @@ -21,7 +21,7 @@ add_vpp_plugin(hs_apps hs_apps.c http_cli.c http_client_cli.c - http_simple_post.c + http_client.c http_tps.c proxy.c test_builtins.c diff --git a/src/plugins/hs_apps/echo_client.c b/src/plugins/hs_apps/echo_client.c index 8dec5d86824..d5edffbd02e 100644 --- a/src/plugins/hs_apps/echo_client.c +++ b/src/plugins/hs_apps/echo_client.c @@ -946,15 +946,16 @@ ec_connect_rpc (void *args) a->api_context = ci; if (needs_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = ecm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = ecm->ckpair_index; } rv = vnet_connect (a); if (needs_crypto) - clib_mem_free (a->sep_ext.ext_cfg); + session_endpoint_free_ext_cfgs (&a->sep_ext); if (rv) { diff --git a/src/plugins/hs_apps/echo_server.c b/src/plugins/hs_apps/echo_server.c index 756a1cc3451..b981e775b57 100644 --- a/src/plugins/hs_apps/echo_server.c +++ b/src/plugins/hs_apps/echo_server.c @@ -591,6 +591,7 @@ echo_server_listen () i32 rv; echo_server_main_t *esm = &echo_server_main; vnet_listen_args_t _args = {}, *args = &_args; + int needs_crypto; if ((rv = parse_uri (esm->server_uri, &args->sep_ext))) { @@ -598,11 +599,14 @@ echo_server_listen () } args->app_index = esm->app_index; args->sep_ext.port = hs_make_data_port (args->sep_ext.port); - if (echo_client_transport_needs_crypto (args->sep_ext.transport_proto)) + needs_crypto = + echo_client_transport_needs_crypto (args->sep_ext.transport_proto); + if (needs_crypto) { - session_endpoint_alloc_ext_cfg (&args->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - args->sep_ext.ext_cfg->crypto.ckpair_index = esm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &args->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = esm->ckpair_index; } if (args->sep_ext.transport_proto == TRANSPORT_PROTO_UDP) @@ -612,8 +616,8 @@ echo_server_listen () rv = vnet_listen (args); esm->listener_handle = args->handle; - if (args->sep_ext.ext_cfg) - clib_mem_free (args->sep_ext.ext_cfg); + if (needs_crypto) + session_endpoint_free_ext_cfgs (&args->sep_ext); return rv; } diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c index 18b57f6c29d..dfa90f9eced 100644 --- a/src/plugins/hs_apps/http_cli.c +++ b/src/plugins/hs_apps/http_cli.c @@ -74,6 +74,10 @@ typedef struct /* pool of uri maps */ hcs_uri_map_t *uri_map_pool; + + /* for appns */ + u8 *appns_id; + u64 appns_secret; } hcs_main_t; static hcs_main_t hcs_main; @@ -402,7 +406,7 @@ hcs_ts_rx_callback (session_t *ts) } if (is_encoded) { - u8 *decoded = http_percent_decode (args.buf); + u8 *decoded = http_percent_decode (args.buf, vec_len (args.buf)); vec_free (args.buf); args.buf = decoded; } @@ -597,6 +601,11 @@ hcs_attach () hcm->fifo_size ? hcm->fifo_size : 32 << 10; a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos; + if (hcm->appns_id) + { + a->namespace_id = hcm->appns_id; + a->options[APP_OPTIONS_NAMESPACE_SECRET] = hcm->appns_secret; + } if (vnet_application_attach (a)) { @@ -651,9 +660,10 @@ hcs_listen () if (need_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = hcm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = hcm->ckpair_index; } rv = vnet_listen (a); @@ -667,15 +677,25 @@ hcs_listen () } if (need_crypto) - clib_mem_free (a->sep_ext.ext_cfg); + session_endpoint_free_ext_cfgs (&a->sep_ext); return rv; } +static void +hcs_detach () +{ + vnet_app_detach_args_t _a, *a = &_a; + hcs_main_t *hcm = &hcs_main; + a->app_index = hcm->app_index; + a->api_client_index = APP_INVALID_INDEX; + hcm->app_index = ~0; + vnet_application_detach (a); +} + static int hcs_unlisten () { - session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; hcs_main_t *hcm = &hcs_main; vnet_unlisten_args_t _a, *a = &_a; char *uri; @@ -688,9 +708,6 @@ hcs_unlisten () uri = (char *) hcm->uri; ASSERT (uri); - if (parse_uri (uri, &sep)) - return -1; - value = hash_get_mem (hcm->index_by_uri, uri); if (value) { @@ -700,8 +717,11 @@ hcs_unlisten () rv = vnet_unlisten (a); if (rv == 0) { + hash_unset_mem (hcm->index_by_uri, uri); vec_free (map->uri); pool_put (hcm->uri_map_pool, map); + if (pool_elts (hcm->uri_map_pool) == 0) + hcs_detach (); } } else @@ -710,17 +730,6 @@ hcs_unlisten () return rv; } -static void -hcs_detach () -{ - vnet_app_detach_args_t _a, *a = &_a; - hcs_main_t *hcm = &hcs_main; - a->app_index = hcm->app_index; - a->api_client_index = APP_INVALID_INDEX; - hcm->app_index = ~0; - vnet_application_detach (a); -} - static int hcs_create (vlib_main_t *vm) { @@ -776,6 +785,10 @@ hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input, hcm->fifo_size <<= 10; else if (unformat (line_input, "uri %_%v%_", &hcm->uri)) ; + else if (unformat (line_input, "appns %_%v%_", &hcm->appns_id)) + ; + else if (unformat (line_input, "secret %lu", &hcm->appns_secret)) + ; else if (unformat (line_input, "listener")) { if (unformat (line_input, "add")) @@ -804,31 +817,30 @@ hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input, start_server: if (hcm->uri == 0) - hcm->uri = format (0, "tcp://0.0.0.0/80%c", 0); + hcm->uri = format (0, "tcp://0.0.0.0/80"); if (hcm->app_index != (u32) ~0) { + if (hcm->appns_id && (listener_add != ~0)) + { + error = clib_error_return ( + 0, "appns must not be specified for listener add/del"); + goto done; + } if (listener_add == 1) { if (hcs_listen ()) - { - error = clib_error_return (0, "failed to start listening %v", - hcm->uri); - goto done; - } - else - goto done; + error = + clib_error_return (0, "failed to start listening %v", hcm->uri); + goto done; } else if (listener_add == 0) { - if (hcs_unlisten () != 0) - { - error = - clib_error_return (0, "failed to stop listening %v", hcm->uri); - goto done; - } - else - goto done; + rv = hcs_unlisten (); + if (rv != 0) + error = clib_error_return ( + 0, "failed to stop listening %v, rv = %d", hcm->uri, rv); + goto done; } else { @@ -855,6 +867,7 @@ start_server: } done: + vec_free (hcm->appns_id); vec_free (hcm->uri); return error; } @@ -863,7 +876,7 @@ VLIB_CLI_COMMAND (hcs_create_command, static) = { .path = "http cli server", .short_help = "http cli server [uri <uri>] [fifo-size <nbytes>] " "[private-segment-size <nMG>] [prealloc-fifos <n>] " - "[listener <add|del>]", + "[listener <add|del>] [appns <app-ns> secret <appns-secret>]", .function = hcs_create_command_fn, }; diff --git a/src/plugins/hs_apps/http_client.c b/src/plugins/hs_apps/http_client.c new file mode 100644 index 00000000000..05a87ec7de8 --- /dev/null +++ b/src/plugins/hs_apps/http_client.c @@ -0,0 +1,743 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2024 Cisco Systems, Inc. + */ + +#include <vnet/session/application.h> +#include <vnet/session/application_interface.h> +#include <vnet/session/session.h> +#include <http/http.h> +#include <http/http_header_names.h> +#include <http/http_content_types.h> +#include <http/http_status_codes.h> +#include <vppinfra/unix.h> + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u32 session_index; + u32 thread_index; + u32 vpp_session_index; + u64 to_recv; + u8 is_closed; +} hc_session_t; + +typedef struct +{ + hc_session_t *sessions; + u32 thread_index; + vlib_main_t *vlib_main; +} hc_worker_t; + +typedef struct +{ + u32 app_index; + u32 cli_node_index; + u8 attached; + u8 *uri; + session_endpoint_cfg_t connect_sep; + u8 *target; + u8 *headers_buf; + u8 *data; + u64 data_offset; + hc_worker_t *wrk; + u8 *resp_headers; + u8 *http_response; + u8 *response_status; + http_header_ht_t *custom_header; + u8 is_file; + u8 use_ptr; + u8 *filename; + bool verbose; + f64 timeout; + http_req_method_t req_method; +} hc_main_t; + +typedef enum +{ + HC_CONNECT_FAILED = 1, + HC_TRANSPORT_CLOSED, + HC_REPLY_RECEIVED, +} hc_cli_signal_t; + +static hc_main_t hc_main; + +static inline hc_worker_t * +hc_worker_get (u32 thread_index) +{ + return &hc_main.wrk[thread_index]; +} + +static inline hc_session_t * +hc_session_get (u32 session_index, u32 thread_index) +{ + hc_worker_t *wrk = hc_worker_get (thread_index); + wrk->vlib_main = vlib_get_main_by_index (thread_index); + return pool_elt_at_index (wrk->sessions, session_index); +} + +static void +hc_ho_session_free (u32 hs_index) +{ + hc_worker_t *wrk = hc_worker_get (0); + pool_put_index (wrk->sessions, hs_index); +} + +static hc_session_t * +hc_session_alloc (hc_worker_t *wrk) +{ + hc_session_t *s; + + pool_get_zero (wrk->sessions, s); + s->session_index = s - wrk->sessions; + s->thread_index = wrk->thread_index; + + return s; +} + +static int +hc_session_connected_callback (u32 app_index, u32 hc_session_index, + session_t *s, session_error_t err) +{ + hc_main_t *hcm = &hc_main; + hc_session_t *hc_session, *new_hc_session; + hc_worker_t *wrk; + http_msg_t msg; + u64 to_send; + u32 n_enq; + u8 n_segs; + int rv; + http_header_ht_t *header; + http_header_t *req_headers = 0; + u32 new_hc_index; + + HTTP_DBG (1, "ho hc_index: %d", hc_session_index); + + if (err) + { + clib_warning ("hc_session_index[%d] connected error: %U", + hc_session_index, format_session_error, err); + vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index, + HC_CONNECT_FAILED, 0); + return -1; + } + + hc_session = hc_session_get (hc_session_index, 0); + wrk = hc_worker_get (s->thread_index); + new_hc_session = hc_session_alloc (wrk); + new_hc_index = new_hc_session->session_index; + clib_memcpy_fast (new_hc_session, hc_session, sizeof (*hc_session)); + hc_session->vpp_session_index = s->session_index; + + new_hc_session->session_index = new_hc_index; + new_hc_session->thread_index = s->thread_index; + new_hc_session->vpp_session_index = s->session_index; + HTTP_DBG (1, "new hc_index: %d", new_hc_session->session_index); + s->opaque = new_hc_index; + + if (hcm->req_method == HTTP_REQ_POST) + { + if (hcm->is_file) + http_add_header ( + &req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); + else + http_add_header ( + &req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_content_type_token (HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED)); + } + + vec_foreach (header, hcm->custom_header) + http_add_header (&req_headers, (const char *) header->name, + vec_len (header->name), (const char *) header->value, + vec_len (header->value)); + + hcm->headers_buf = http_serialize_headers (req_headers); + vec_free (req_headers); + + msg.method_type = hcm->req_method; + if (hcm->req_method == HTTP_REQ_POST) + msg.data.body_len = vec_len (hcm->data); + else + msg.data.body_len = 0; + + msg.type = HTTP_MSG_REQUEST; + /* request target */ + msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; + msg.data.target_path_len = vec_len (hcm->target); + /* custom headers */ + msg.data.headers_len = vec_len (hcm->headers_buf); + /* total length */ + msg.data.len = + msg.data.target_path_len + msg.data.headers_len + msg.data.body_len; + + if (hcm->use_ptr) + { + uword target = pointer_to_uword (hcm->target); + uword headers = pointer_to_uword (hcm->headers_buf); + uword body = pointer_to_uword (hcm->data); + msg.data.type = HTTP_MSG_DATA_PTR; + svm_fifo_seg_t segs[4] = { + { (u8 *) &msg, sizeof (msg) }, + { (u8 *) &target, sizeof (target) }, + { (u8 *) &headers, sizeof (headers) }, + { (u8 *) &body, sizeof (body) }, + }; + + n_segs = (hcm->req_method == HTTP_REQ_GET) ? 3 : 4; + rv = svm_fifo_enqueue_segments (s->tx_fifo, segs, n_segs, + 0 /* allow partial */); + if (hcm->req_method == HTTP_REQ_POST) + ASSERT (rv == (sizeof (msg) + sizeof (target) + sizeof (headers) + + sizeof (body))); + else + ASSERT (rv == (sizeof (msg) + sizeof (target) + sizeof (headers))); + goto done; + } + + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.target_path_offset = 0; + msg.data.headers_offset = msg.data.target_path_len; + msg.data.body_offset = msg.data.headers_offset + msg.data.headers_len; + + rv = svm_fifo_enqueue (s->tx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + + rv = svm_fifo_enqueue (s->tx_fifo, vec_len (hcm->target), hcm->target); + ASSERT (rv == vec_len (hcm->target)); + + rv = svm_fifo_enqueue (s->tx_fifo, vec_len (hcm->headers_buf), + hcm->headers_buf); + ASSERT (rv == msg.data.headers_len); + + if (hcm->req_method == HTTP_REQ_POST) + { + to_send = vec_len (hcm->data); + n_enq = clib_min (svm_fifo_size (s->tx_fifo), to_send); + + rv = svm_fifo_enqueue (s->tx_fifo, n_enq, hcm->data); + if (rv < to_send) + { + hcm->data_offset = (rv > 0) ? rv : 0; + svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + } + } + +done: + if (svm_fifo_set_event (s->tx_fifo)) + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); + + return 0; +} + +static void +hc_session_disconnect_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + vnet_disconnect_args_t _a = { 0 }, *a = &_a; + int rv; + + a->handle = session_handle (s); + a->app_index = hcm->app_index; + if ((rv = vnet_disconnect_session (a))) + clib_warning ("warning: disconnect returned: %U", format_session_error, + rv); +} + +static void +hc_session_transport_closed_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index, + HC_TRANSPORT_CLOSED, 0); +} + +static void +hc_ho_cleanup_callback (session_t *ts) +{ + HTTP_DBG (1, "ho hc_index: %d:", ts->opaque); + hc_ho_session_free (ts->opaque); +} + +static void +hc_session_reset_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + hc_session_t *hc_session; + vnet_disconnect_args_t _a = { 0 }, *a = &_a; + int rv; + + hc_session = hc_session_get (s->opaque, s->thread_index); + hc_session->is_closed = 1; + + a->handle = session_handle (s); + a->app_index = hcm->app_index; + if ((rv = vnet_disconnect_session (a))) + clib_warning ("warning: disconnect returned: %U", format_session_error, + rv); +} + +static int +hc_rx_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + hc_session_t *hc_session; + http_msg_t msg; + int rv; + + hc_session = hc_session_get (s->opaque, s->thread_index); + + if (hc_session->is_closed) + { + clib_warning ("hc_session_index[%d] is closed", s->opaque); + return -1; + } + + if (hc_session->to_recv == 0) + { + rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + + if (msg.type != HTTP_MSG_REPLY) + { + clib_warning ("unexpected msg type %d", msg.type); + return -1; + } + + if (msg.data.headers_len) + { + http_header_table_t *ht; + vec_validate (hcm->resp_headers, msg.data.headers_len - 1); + rv = svm_fifo_peek (s->rx_fifo, msg.data.headers_offset, + msg.data.headers_len, hcm->resp_headers); + + ASSERT (rv == msg.data.headers_len); + HTTP_DBG (1, (char *) hcm->resp_headers); + + if (http_parse_headers (hcm->resp_headers, &ht)) + { + clib_warning ("invalid headers received"); + return -1; + } + http_free_header_table (ht); + + hcm->response_status = + format (0, "%U", format_http_status_code, msg.code); + } + + if (msg.data.body_len == 0) + { + svm_fifo_dequeue_drop_all (s->rx_fifo); + goto done; + } + + /* drop everything up to body */ + svm_fifo_dequeue_drop (s->rx_fifo, msg.data.body_offset); + hc_session->to_recv = msg.data.body_len; + if (msg.code != HTTP_STATUS_OK && hc_session->to_recv == 0) + { + goto done; + } + vec_validate (hcm->http_response, msg.data.body_len - 1); + vec_reset_length (hcm->http_response); + } + + u32 max_deq = svm_fifo_max_dequeue (s->rx_fifo); + + u32 n_deq = clib_min (hc_session->to_recv, max_deq); + u32 curr = vec_len (hcm->http_response); + rv = svm_fifo_dequeue (s->rx_fifo, n_deq, hcm->http_response + curr); + if (rv < 0) + { + clib_warning ("app dequeue(n=%d) failed; rv = %d", n_deq, rv); + return -1; + } + + ASSERT (rv == n_deq); + vec_set_len (hcm->http_response, curr + n_deq); + ASSERT (hc_session->to_recv >= rv); + hc_session->to_recv -= rv; + +done: + if (hc_session->to_recv == 0) + { + hc_session_disconnect_callback (s); + vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index, + HC_REPLY_RECEIVED, 0); + } + + return 0; +} + +static int +hc_tx_callback (session_t *s) +{ + hc_main_t *hcm = &hc_main; + u64 to_send; + int rv; + + to_send = vec_len (hcm->data) - hcm->data_offset; + rv = svm_fifo_enqueue (s->tx_fifo, to_send, hcm->data + hcm->data_offset); + + if (rv <= 0) + { + svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + return 0; + } + + if (rv < to_send) + { + hcm->data_offset += rv; + svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + } + + if (svm_fifo_set_event (s->tx_fifo)) + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); + + return 0; +} + +static session_cb_vft_t hc_session_cb_vft = { + .session_connected_callback = hc_session_connected_callback, + .session_disconnect_callback = hc_session_disconnect_callback, + .session_transport_closed_callback = hc_session_transport_closed_callback, + .session_reset_callback = hc_session_reset_callback, + .builtin_app_rx_callback = hc_rx_callback, + .builtin_app_tx_callback = hc_tx_callback, + .half_open_cleanup_callback = hc_ho_cleanup_callback, +}; + +static clib_error_t * +hc_attach () +{ + hc_main_t *hcm = &hc_main; + vnet_app_attach_args_t _a, *a = &_a; + u64 options[18]; + int rv; + + clib_memset (a, 0, sizeof (*a)); + clib_memset (options, 0, sizeof (options)); + + a->api_client_index = APP_INVALID_INDEX; + a->name = format (0, "http_client"); + a->session_cb_vft = &hc_session_cb_vft; + a->options = options; + a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; + + if ((rv = vnet_application_attach (a))) + return clib_error_return (0, "attach returned: %U", format_session_error, + rv); + + hcm->app_index = a->app_index; + vec_free (a->name); + hcm->attached = 1; + + return 0; +} + +static int +hc_connect_rpc (void *rpc_args) +{ + vnet_connect_args_t *a = rpc_args; + int rv; + + rv = vnet_connect (a); + if (rv > 0) + clib_warning (0, "connect returned: %U", format_session_error, rv); + + vec_free (a); + return rv; +} + +static void +hc_connect () +{ + hc_main_t *hcm = &hc_main; + vnet_connect_args_t *a = 0; + hc_worker_t *wrk; + hc_session_t *hc_session; + + vec_validate (a, 0); + clib_memset (a, 0, sizeof (a[0])); + + clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep)); + a->app_index = hcm->app_index; + + /* allocate http session on main thread */ + wrk = hc_worker_get (0); + hc_session = hc_session_alloc (wrk); + a->api_context = hc_session->session_index; + + session_send_rpc_evt_to_thread_force (transport_cl_thread (), hc_connect_rpc, + a); +} + +static clib_error_t * +hc_run (vlib_main_t *vm) +{ + hc_main_t *hcm = &hc_main; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + hc_worker_t *wrk; + uword event_type, *event_data = 0; + clib_error_t *err; + FILE *file_ptr; + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (hcm->wrk, num_threads - 1); + vec_foreach (wrk, hcm->wrk) + wrk->thread_index = wrk - hcm->wrk; + + if ((err = hc_attach ())) + return clib_error_return (0, "http client attach: %U", format_clib_error, + err); + + hc_connect (); + + vlib_process_wait_for_event_or_clock (vm, hcm->timeout); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case ~0: + err = clib_error_return (0, "error: timeout"); + break; + case HC_CONNECT_FAILED: + err = clib_error_return (0, "error: failed to connect"); + break; + case HC_TRANSPORT_CLOSED: + err = clib_error_return (0, "error: transport closed"); + break; + case HC_REPLY_RECEIVED: + if (hcm->filename) + { + file_ptr = + fopen ((char *) format (0, "/tmp/%v", hcm->filename), "w"); + if (file_ptr == NULL) + { + vlib_cli_output (vm, "couldn't open file %v", hcm->filename); + } + else + { + fprintf (file_ptr, "< %s\n< %s\n< %s", hcm->response_status, + hcm->resp_headers, hcm->http_response); + fclose (file_ptr); + vlib_cli_output (vm, "file saved (/tmp/%v)", hcm->filename); + } + } + if (hcm->verbose) + vlib_cli_output (vm, "< %v\n< %v", hcm->response_status, + hcm->resp_headers); + vlib_cli_output (vm, "<\n%v", hcm->http_response); + + break; + default: + err = clib_error_return (0, "error: unexpected event %d", event_type); + break; + } + + vec_free (event_data); + return err; +} + +static int +hc_detach () +{ + hc_main_t *hcm = &hc_main; + vnet_app_detach_args_t _da, *da = &_da; + int rv; + + if (!hcm->attached) + return 0; + + da->app_index = hcm->app_index; + da->api_client_index = APP_INVALID_INDEX; + rv = vnet_application_detach (da); + hcm->attached = 0; + hcm->app_index = APP_INVALID_INDEX; + + return rv; +} + +static void +hcc_worker_cleanup (hc_worker_t *wrk) +{ + pool_free (wrk->sessions); +} + +static void +hc_cleanup () +{ + hc_main_t *hcm = &hc_main; + hc_worker_t *wrk; + http_header_ht_t *header; + + vec_foreach (wrk, hcm->wrk) + hcc_worker_cleanup (wrk); + + vec_free (hcm->uri); + vec_free (hcm->target); + vec_free (hcm->headers_buf); + vec_free (hcm->data); + vec_free (hcm->resp_headers); + vec_free (hcm->http_response); + vec_free (hcm->response_status); + vec_free (hcm->wrk); + vec_free (hcm->filename); + vec_foreach (header, hcm->custom_header) + { + vec_free (header->name); + vec_free (header->value); + } + vec_free (hcm->custom_header); +} + +static clib_error_t * +hc_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + hc_main_t *hcm = &hc_main; + clib_error_t *err = 0; + unformat_input_t _line_input, *line_input = &_line_input; + u8 *path = 0; + u8 *file_data; + http_header_ht_t new_header; + u8 *name; + u8 *value; + int rv; + hcm->timeout = 10; + + if (hcm->attached) + return clib_error_return (0, "failed: already running!"); + + hcm->use_ptr = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "expected required arguments"); + + hcm->req_method = + (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) && + unformat (line_input, "post") ? + HTTP_REQ_POST : + HTTP_REQ_GET; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "uri %s", &hcm->uri)) + ; + else if (unformat (line_input, "data %v", &hcm->data)) + hcm->is_file = 0; + else if (unformat (line_input, "target %s", &hcm->target)) + ; + else if (unformat (line_input, "file %s", &path)) + hcm->is_file = 1; + else if (unformat (line_input, "use-ptr")) + hcm->use_ptr = 1; + else if (unformat (line_input, "save-to %s", &hcm->filename)) + { + if (strstr ((char *) hcm->filename, "..") || + strchr ((char *) hcm->filename, '/')) + { + err = clib_error_return ( + 0, "illegal characters in filename '%v'", hcm->filename); + goto done; + } + } + else if (unformat (line_input, "header %v:%v", &name, &value)) + { + new_header.name = name; + new_header.value = value; + vec_add1 (hcm->custom_header, new_header); + } + else if (unformat (line_input, "verbose")) + hcm->verbose = true; + else if (unformat (line_input, "timeout %f", &hcm->timeout)) + ; + else + { + err = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!hcm->uri) + { + err = clib_error_return (0, "URI not defined"); + goto done; + } + if (!hcm->target) + { + err = clib_error_return (0, "target not defined"); + goto done; + } + if (!hcm->data && hcm->req_method == HTTP_REQ_POST) + { + if (path) + { + err = clib_file_contents ((char *) path, &file_data); + if (err) + goto done; + hcm->data = file_data; + } + else + { + err = clib_error_return (0, "data not defined"); + goto done; + } + } + + if ((rv = parse_uri ((char *) hcm->uri, &hcm->connect_sep))) + { + err = + clib_error_return (0, "URI parse error: %U", format_session_error, rv); + goto done; + } + + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vlib_worker_thread_barrier_sync (vm); + vnet_session_enable_disable (vm, &args); + vlib_worker_thread_barrier_release (vm); + + hcm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index; + + err = hc_run (vm); + + if ((rv = hc_detach ())) + { + /* don't override last error */ + if (!err) + err = clib_error_return (0, "detach returned: %U", + format_session_error, rv); + else + clib_warning ("warning: detach returned: %U", format_session_error, + rv); + } + +done: + vec_free (path); + hc_cleanup (); + unformat_free (line_input); + return err; +} + +VLIB_CLI_COMMAND (hc_command, static) = { + .path = "http client", + .short_help = "[post] uri http://<ip-addr> target <origin-form> " + "[data <form-urlencoded> | file <file-path>] [use-ptr] " + "[save-to <filename>] [header <Key:Value>] [verbose] " + "[timeout <seconds> (default = 10)]", + .function = hc_command_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * +hc_main_init () +{ + hc_main_t *hcm = &hc_main; + hcm->app_index = APP_INVALID_INDEX; + return 0; +} + +VLIB_INIT_FUNCTION (hc_main_init); diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c index 4b8ef173bd9..861af7f03e2 100644 --- a/src/plugins/hs_apps/http_client_cli.c +++ b/src/plugins/hs_apps/http_client_cli.c @@ -98,6 +98,13 @@ hcc_session_get (u32 hs_index, u32 thread_index) return pool_elt_at_index (wrk->sessions, hs_index); } +static void +hcc_ho_session_free (u32 hs_index) +{ + hcc_worker_t *wrk = hcc_worker_get (0); + pool_put_index (wrk->sessions, hs_index); +} + static int hcc_ts_accept_callback (session_t *ts) { @@ -125,9 +132,10 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as, hcc_worker_t *wrk; http_msg_t msg; u8 *headers_buf; + u32 new_hs_index; int rv; - HCC_DBG ("hc_index: %d", hc_index); + HCC_DBG ("ho hc_index: %d", hc_index); if (err) { @@ -138,19 +146,22 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as, return -1; } - /* TODO delete half open session once the support is added in http layer */ hs = hcc_session_get (hc_index, 0); wrk = hcc_worker_get (as->thread_index); new_hs = hcc_session_alloc (wrk); + new_hs_index = new_hs->session_index; clib_memcpy_fast (new_hs, hs, sizeof (*hs)); + new_hs->session_index = new_hs_index; + new_hs->thread_index = as->thread_index; + new_hs->vpp_session_index = as->session_index; + HCC_DBG ("new hc_index: %d", new_hs->session_index); + as->opaque = new_hs_index; - hs->vpp_session_index = as->session_index; - - http_add_header (&hs->req_headers, + http_add_header (&new_hs->req_headers, http_header_name_token (HTTP_HEADER_ACCEPT), http_content_type_token (HTTP_CONTENT_TEXT_HTML)); - headers_buf = http_serialize_headers (hs->req_headers); - vec_free (hs->req_headers); + headers_buf = http_serialize_headers (new_hs->req_headers); + vec_free (new_hs->req_headers); msg.type = HTTP_MSG_REQUEST; msg.method_type = HTTP_REQ_GET; @@ -300,6 +311,13 @@ hcc_ts_transport_closed (session_t *s) HCC_TRANSPORT_CLOSED, 0); } +static void +hcc_ho_cleanup_callback (session_t *ts) +{ + HCC_DBG ("ho hc_index: %d:", ts->opaque); + hcc_ho_session_free (ts->opaque); +} + static session_cb_vft_t hcc_session_cb_vft = { .session_accept_callback = hcc_ts_accept_callback, .session_disconnect_callback = hcc_ts_disconnect_callback, @@ -308,6 +326,7 @@ static session_cb_vft_t hcc_session_cb_vft = { .builtin_app_tx_callback = hcc_ts_tx_callback, .session_reset_callback = hcc_ts_reset_callback, .session_transport_closed_callback = hcc_ts_transport_closed, + .half_open_cleanup_callback = hcc_ho_cleanup_callback, }; static clib_error_t * @@ -362,6 +381,7 @@ hcc_connect_rpc (void *rpc_args) if (rv) clib_warning (0, "connect returned: %U", format_session_error, rv); + session_endpoint_free_ext_cfgs (&a->sep_ext); vec_free (a); return rv; } @@ -380,6 +400,7 @@ hcc_connect () hcc_main_t *hcm = &hcc_main; hcc_worker_t *wrk; hcc_session_t *hs; + transport_endpt_ext_cfg_t *ext_cfg; vec_validate (a, 0); clib_memset (a, 0, sizeof (a[0])); @@ -387,6 +408,11 @@ hcc_connect () clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep)); a->app_index = hcm->app_index; + /* set http (response) timeout to 10 seconds */ + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); + ext_cfg->opaque = 10; + /* allocate http session on main thread */ wrk = hcc_worker_get (0); hs = hcc_session_alloc (wrk); @@ -407,7 +433,7 @@ hcc_run (vlib_main_t *vm, int print_output) hcc_worker_t *wrk; num_threads = 1 /* main thread */ + vtm->n_threads; - vec_validate (hcm->wrk, num_threads); + vec_validate (hcm->wrk, num_threads - 1); vec_foreach (wrk, hcm->wrk) { wrk->thread_index = wrk - hcm->wrk; diff --git a/src/plugins/hs_apps/http_simple_post.c b/src/plugins/hs_apps/http_simple_post.c deleted file mode 100644 index 6212eac1c97..00000000000 --- a/src/plugins/hs_apps/http_simple_post.c +++ /dev/null @@ -1,581 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright(c) 2024 Cisco Systems, Inc. - */ - -#include <vnet/session/application.h> -#include <vnet/session/application_interface.h> -#include <vnet/session/session.h> -#include <http/http.h> -#include <http/http_header_names.h> -#include <http/http_content_types.h> -#include <vppinfra/unix.h> - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u32 session_index; - u32 thread_index; - u32 vpp_session_index; - u8 is_closed; -} hsp_session_t; - -typedef struct -{ - hsp_session_t *sessions; - u32 thread_index; -} hsp_worker_t; - -typedef struct -{ - u32 app_index; - vlib_main_t *vlib_main; - u32 cli_node_index; - u8 attached; - u8 *uri; - session_endpoint_cfg_t connect_sep; - u8 *target; - u8 *headers_buf; - u8 *data; - u64 data_offset; - hsp_worker_t *wrk; - u8 *http_response; - u8 is_file; - u8 use_ptr; -} hsp_main_t; - -typedef enum -{ - HSP_CONNECT_FAILED = 1, - HSP_TRANSPORT_CLOSED, - HSP_REPLY_RECEIVED, -} hsp_cli_signal_t; - -static hsp_main_t hsp_main; - -static inline hsp_worker_t * -hsp_worker_get (u32 thread_index) -{ - return &hsp_main.wrk[thread_index]; -} - -static inline hsp_session_t * -hsp_session_get (u32 session_index, u32 thread_index) -{ - hsp_worker_t *wrk = hsp_worker_get (thread_index); - return pool_elt_at_index (wrk->sessions, session_index); -} - -static hsp_session_t * -hsp_session_alloc (hsp_worker_t *wrk) -{ - hsp_session_t *s; - - pool_get_zero (wrk->sessions, s); - s->session_index = s - wrk->sessions; - s->thread_index = wrk->thread_index; - - return s; -} - -static int -hsp_session_connected_callback (u32 app_index, u32 hsp_session_index, - session_t *s, session_error_t err) -{ - hsp_main_t *hspm = &hsp_main; - hsp_session_t *hsp_session, *new_hsp_session; - hsp_worker_t *wrk; - http_header_t *headers = 0; - http_msg_t msg; - u64 to_send; - u32 n_enq; - int rv; - - if (err) - { - clib_warning ("hsp_session_index[%d] connected error: %U", - hsp_session_index, format_session_error, err); - vlib_process_signal_event_mt (hspm->vlib_main, hspm->cli_node_index, - HSP_CONNECT_FAILED, 0); - return -1; - } - - hsp_session = hsp_session_get (hsp_session_index, 0); - wrk = hsp_worker_get (s->thread_index); - new_hsp_session = hsp_session_alloc (wrk); - clib_memcpy_fast (new_hsp_session, hsp_session, sizeof (*hsp_session)); - hsp_session->vpp_session_index = s->session_index; - - if (hspm->is_file) - { - http_add_header ( - &headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), - http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); - } - else - { - http_add_header ( - &headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), - http_content_type_token (HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED)); - } - hspm->headers_buf = http_serialize_headers (headers); - vec_free (headers); - - msg.type = HTTP_MSG_REQUEST; - msg.method_type = HTTP_REQ_POST; - /* request target */ - msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; - msg.data.target_path_len = vec_len (hspm->target); - /* custom headers */ - msg.data.headers_len = vec_len (hspm->headers_buf); - /* request body */ - msg.data.body_len = vec_len (hspm->data); - /* total length */ - msg.data.len = - msg.data.target_path_len + msg.data.headers_len + msg.data.body_len; - - if (hspm->use_ptr) - { - uword target = pointer_to_uword (hspm->target); - uword headers = pointer_to_uword (hspm->headers_buf); - uword body = pointer_to_uword (hspm->data); - msg.data.type = HTTP_MSG_DATA_PTR; - svm_fifo_seg_t segs[4] = { - { (u8 *) &msg, sizeof (msg) }, - { (u8 *) &target, sizeof (target) }, - { (u8 *) &headers, sizeof (headers) }, - { (u8 *) &body, sizeof (body) }, - }; - - rv = - svm_fifo_enqueue_segments (s->tx_fifo, segs, 4, 0 /* allow partial */); - ASSERT (rv == (sizeof (msg) + sizeof (target) + sizeof (headers) + - sizeof (body))); - goto done; - } - - msg.data.type = HTTP_MSG_DATA_INLINE; - msg.data.target_path_offset = 0; - msg.data.headers_offset = msg.data.target_path_len; - msg.data.body_offset = msg.data.headers_offset + msg.data.headers_len; - - rv = svm_fifo_enqueue (s->tx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); - - rv = svm_fifo_enqueue (s->tx_fifo, vec_len (hspm->target), hspm->target); - ASSERT (rv == vec_len (hspm->target)); - - rv = svm_fifo_enqueue (s->tx_fifo, vec_len (hspm->headers_buf), - hspm->headers_buf); - ASSERT (rv == msg.data.headers_len); - - to_send = vec_len (hspm->data); - n_enq = clib_min (svm_fifo_size (s->tx_fifo), to_send); - - rv = svm_fifo_enqueue (s->tx_fifo, n_enq, hspm->data); - - if (rv < to_send) - { - hspm->data_offset = (rv > 0) ? rv : 0; - svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - } - -done: - if (svm_fifo_set_event (s->tx_fifo)) - session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); - - return 0; -} - -static void -hsp_session_disconnect_callback (session_t *s) -{ - hsp_main_t *hspm = &hsp_main; - vnet_disconnect_args_t _a = { 0 }, *a = &_a; - int rv; - - a->handle = session_handle (s); - a->app_index = hspm->app_index; - if ((rv = vnet_disconnect_session (a))) - clib_warning ("warning: disconnect returned: %U", format_session_error, - rv); -} - -static void -hsp_session_transport_closed_callback (session_t *s) -{ - hsp_main_t *hspm = &hsp_main; - - vlib_process_signal_event_mt (hspm->vlib_main, hspm->cli_node_index, - HSP_TRANSPORT_CLOSED, 0); -} - -static void -hsp_session_reset_callback (session_t *s) -{ - hsp_main_t *hspm = &hsp_main; - hsp_session_t *hsp_session; - vnet_disconnect_args_t _a = { 0 }, *a = &_a; - int rv; - - hsp_session = hsp_session_get (s->opaque, s->thread_index); - hsp_session->is_closed = 1; - - a->handle = session_handle (s); - a->app_index = hspm->app_index; - if ((rv = vnet_disconnect_session (a))) - clib_warning ("warning: disconnect returned: %U", format_session_error, - rv); -} - -static int -hsp_rx_callback (session_t *s) -{ - hsp_main_t *hspm = &hsp_main; - hsp_session_t *hsp_session; - http_msg_t msg; - int rv; - - hsp_session = hsp_session_get (s->opaque, s->thread_index); - - if (hsp_session->is_closed) - { - clib_warning ("hsp_session_index[%d] is closed", s->opaque); - return -1; - } - - rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); - - if (msg.type != HTTP_MSG_REPLY) - { - clib_warning ("unexpected msg type %d", msg.type); - return -1; - } - - svm_fifo_dequeue_drop_all (s->rx_fifo); - - if (msg.code == HTTP_STATUS_OK) - hspm->http_response = format (0, "request success"); - else - hspm->http_response = format (0, "request failed"); - - hsp_session_disconnect_callback (s); - vlib_process_signal_event_mt (hspm->vlib_main, hspm->cli_node_index, - HSP_REPLY_RECEIVED, 0); - return 0; -} - -static int -hsp_tx_callback (session_t *s) -{ - hsp_main_t *hspm = &hsp_main; - u64 to_send; - u32 n_enq; - int rv; - - to_send = vec_len (hspm->data) - hspm->data_offset; - n_enq = clib_min (svm_fifo_size (s->tx_fifo), to_send); - - rv = svm_fifo_enqueue (s->tx_fifo, n_enq, hspm->data + hspm->data_offset); - - if (rv <= 0) - { - svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - return 0; - } - - if (rv < to_send) - { - hspm->data_offset += rv; - svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - } - - if (svm_fifo_set_event (s->tx_fifo)) - session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); - - return 0; -} - -static session_cb_vft_t hsp_session_cb_vft = { - .session_connected_callback = hsp_session_connected_callback, - .session_disconnect_callback = hsp_session_disconnect_callback, - .session_transport_closed_callback = hsp_session_transport_closed_callback, - .session_reset_callback = hsp_session_reset_callback, - .builtin_app_rx_callback = hsp_rx_callback, - .builtin_app_tx_callback = hsp_tx_callback, -}; - -static clib_error_t * -hsp_attach () -{ - hsp_main_t *hspm = &hsp_main; - vnet_app_attach_args_t _a, *a = &_a; - u64 options[18]; - int rv; - - clib_memset (a, 0, sizeof (*a)); - clib_memset (options, 0, sizeof (options)); - - a->api_client_index = APP_INVALID_INDEX; - a->name = format (0, "http_simple_post"); - a->session_cb_vft = &hsp_session_cb_vft; - a->options = options; - a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; - - if ((rv = vnet_application_attach (a))) - return clib_error_return (0, "attach returned: %U", format_session_error, - rv); - - hspm->app_index = a->app_index; - vec_free (a->name); - hspm->attached = 1; - - return 0; -} - -static int -hsp_connect_rpc (void *rpc_args) -{ - vnet_connect_args_t *a = rpc_args; - int rv; - - rv = vnet_connect (a); - if (rv) - clib_warning (0, "connect returned: %U", format_session_error, rv); - - vec_free (a); - return rv; -} - -static void -hsp_connect () -{ - hsp_main_t *hspm = &hsp_main; - vnet_connect_args_t *a = 0; - hsp_worker_t *wrk; - hsp_session_t *hsp_session; - - vec_validate (a, 0); - clib_memset (a, 0, sizeof (a[0])); - - clib_memcpy (&a->sep_ext, &hspm->connect_sep, sizeof (hspm->connect_sep)); - a->app_index = hspm->app_index; - - /* allocate http session on main thread */ - wrk = hsp_worker_get (0); - hsp_session = hsp_session_alloc (wrk); - a->api_context = hsp_session->session_index; - - session_send_rpc_evt_to_thread_force (transport_cl_thread (), - hsp_connect_rpc, a); -} - -static clib_error_t * -hsp_run (vlib_main_t *vm) -{ - hsp_main_t *hspm = &hsp_main; - vlib_thread_main_t *vtm = vlib_get_thread_main (); - u32 num_threads; - hsp_worker_t *wrk; - uword event_type, *event_data = 0; - clib_error_t *err; - - num_threads = 1 /* main thread */ + vtm->n_threads; - vec_validate (hspm->wrk, num_threads); - vec_foreach (wrk, hspm->wrk) - wrk->thread_index = wrk - hspm->wrk; - - if ((err = hsp_attach ())) - return clib_error_return (0, "http simple post attach: %U", - format_clib_error, err); - - hsp_connect (); - - vlib_process_wait_for_event_or_clock (vm, 10); - event_type = vlib_process_get_events (vm, &event_data); - switch (event_type) - { - case ~0: - err = clib_error_return (0, "error: timeout"); - break; - case HSP_CONNECT_FAILED: - err = clib_error_return (0, "error: failed to connect"); - break; - case HSP_TRANSPORT_CLOSED: - err = clib_error_return (0, "error: transport closed"); - break; - case HSP_REPLY_RECEIVED: - vlib_cli_output (vm, "%v", hspm->http_response); - break; - default: - err = clib_error_return (0, "error: unexpected event %d", event_type); - break; - } - - vec_free (event_data); - return err; -} - -static int -hsp_detach () -{ - hsp_main_t *hspm = &hsp_main; - vnet_app_detach_args_t _da, *da = &_da; - int rv; - - if (!hspm->attached) - return 0; - - da->app_index = hspm->app_index; - da->api_client_index = APP_INVALID_INDEX; - rv = vnet_application_detach (da); - hspm->attached = 0; - hspm->app_index = APP_INVALID_INDEX; - - return rv; -} - -static void -hcc_worker_cleanup (hsp_worker_t *wrk) -{ - pool_free (wrk->sessions); -} - -static void -hsp_cleanup () -{ - hsp_main_t *hspm = &hsp_main; - hsp_worker_t *wrk; - - vec_foreach (wrk, hspm->wrk) - hcc_worker_cleanup (wrk); - - vec_free (hspm->uri); - vec_free (hspm->target); - vec_free (hspm->headers_buf); - vec_free (hspm->data); - vec_free (hspm->http_response); - vec_free (hspm->wrk); -} - -static clib_error_t * -hsp_command_fn (vlib_main_t *vm, unformat_input_t *input, - vlib_cli_command_t *cmd) -{ - hsp_main_t *hspm = &hsp_main; - clib_error_t *err = 0; - unformat_input_t _line_input, *line_input = &_line_input; - u8 *path = 0; - u8 *file_data; - int rv; - - if (hspm->attached) - return clib_error_return (0, "failed: already running!"); - - hspm->use_ptr = 0; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return clib_error_return (0, "expected required arguments"); - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "uri %s", &hspm->uri)) - ; - else if (unformat (line_input, "data %v", &hspm->data)) - hspm->is_file = 0; - else if (unformat (line_input, "target %s", &hspm->target)) - ; - else if (unformat (line_input, "file %s", &path)) - hspm->is_file = 1; - else if (unformat (line_input, "use-ptr")) - hspm->use_ptr = 1; - else - { - err = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (!hspm->uri) - { - err = clib_error_return (0, "URI not defined"); - goto done; - } - if (!hspm->target) - { - err = clib_error_return (0, "target not defined"); - goto done; - } - if (!hspm->data) - { - if (path) - { - err = clib_file_contents ((char *) path, &file_data); - if (err) - goto done; - hspm->data = file_data; - } - else - { - err = clib_error_return (0, "data not defined"); - goto done; - } - } - - if ((rv = parse_uri ((char *) hspm->uri, &hspm->connect_sep))) - { - err = - clib_error_return (0, "URI parse error: %U", format_session_error, rv); - goto done; - } - - session_enable_disable_args_t args = { .is_en = 1, - .rt_engine_type = - RT_BACKEND_ENGINE_RULE_TABLE }; - vlib_worker_thread_barrier_sync (vm); - vnet_session_enable_disable (vm, &args); - vlib_worker_thread_barrier_release (vm); - - hspm->cli_node_index = - vlib_get_current_process (vm)->node_runtime.node_index; - - err = hsp_run (vm); - - if ((rv = hsp_detach ())) - { - /* don't override last error */ - if (!err) - err = clib_error_return (0, "detach returned: %U", - format_session_error, rv); - else - clib_warning ("warning: detach returned: %U", format_session_error, - rv); - } - -done: - hsp_cleanup (); - unformat_free (line_input); - return err; -} - -VLIB_CLI_COMMAND (hsp_command, static) = { - .path = "http post", - .short_help = "uri http://<ip-addr> target <origin-form> " - "[data <form-urlencoded> | file <file-path>] [use-ptr]", - .function = hsp_command_fn, - .is_mp_safe = 1, -}; - -static clib_error_t * -hsp_main_init (vlib_main_t *vm) -{ - hsp_main_t *hspm = &hsp_main; - - hspm->app_index = APP_INVALID_INDEX; - hspm->vlib_main = vm; - return 0; -} - -VLIB_INIT_FUNCTION (hsp_main_init); diff --git a/src/plugins/hs_apps/http_tps.c b/src/plugins/hs_apps/http_tps.c index cdeafa5d54a..a40a31caf63 100644 --- a/src/plugins/hs_apps/http_tps.c +++ b/src/plugins/hs_apps/http_tps.c @@ -641,15 +641,16 @@ hts_start_listen (hts_main_t *htm, session_endpoint_cfg_t *sep, u8 *uri, if (need_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = htm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = htm->ckpair_index; } rv = vnet_listen (a); if (need_crypto) - clib_mem_free (a->sep_ext.ext_cfg); + session_endpoint_free_ext_cfgs (&a->sep_ext); if (rv) return rv; diff --git a/src/plugins/hs_apps/proxy.c b/src/plugins/hs_apps/proxy.c index c7e7b2a653c..d7fe6fb54df 100644 --- a/src/plugins/hs_apps/proxy.c +++ b/src/plugins/hs_apps/proxy.c @@ -19,50 +19,145 @@ #include <vnet/session/application_interface.h> #include <hs_apps/proxy.h> #include <vnet/tcp/tcp.h> +#include <http/http.h> +#include <http/http_header_names.h> proxy_main_t proxy_main; #define TCP_MSS 1460 -typedef struct +static proxy_session_side_ctx_t * +proxy_session_side_ctx_alloc (proxy_worker_t *wrk) { - session_endpoint_cfg_t sep; - u32 app_index; - u32 api_context; -} proxy_connect_args_t; + proxy_session_side_ctx_t *ctx; + + pool_get_zero (wrk->ctx_pool, ctx); + ctx->sc_index = ctx - wrk->ctx_pool; + ctx->ps_index = ~0; + + return ctx; +} static void -proxy_cb_fn (void *data, u32 data_len) +proxy_session_side_ctx_free (proxy_worker_t *wrk, + proxy_session_side_ctx_t *ctx) { - proxy_connect_args_t *pa = (proxy_connect_args_t *) data; - vnet_connect_args_t a; + pool_put (wrk->ctx_pool, ctx); +} - clib_memset (&a, 0, sizeof (a)); - a.api_context = pa->api_context; - a.app_index = pa->app_index; - clib_memcpy (&a.sep_ext, &pa->sep, sizeof (pa->sep)); - vnet_connect (&a); - if (a.sep_ext.ext_cfg) - clib_mem_free (a.sep_ext.ext_cfg); +static proxy_session_side_ctx_t * +proxy_session_side_ctx_get (proxy_worker_t *wrk, u32 ctx_index) +{ + return pool_elt_at_index (wrk->ctx_pool, ctx_index); } static void -proxy_call_main_thread (vnet_connect_args_t * a) +proxy_send_http_resp (session_t *s, http_status_code_t sc, + http_header_t *resp_headers) { - if (vlib_get_thread_index () == 0) + http_msg_t msg; + int rv; + u8 *headers_buf = 0; + + if (vec_len (resp_headers)) { - vnet_connect (a); - if (a->sep_ext.ext_cfg) - clib_mem_free (a->sep_ext.ext_cfg); + headers_buf = http_serialize_headers (resp_headers); + msg.data.len = msg.data.headers_len = vec_len (headers_buf); } else + msg.data.len = msg.data.headers_len = 0; + + msg.type = HTTP_MSG_REPLY; + msg.code = sc; + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.headers_offset = 0; + msg.data.body_len = 0; + msg.data.body_offset = 0; + rv = svm_fifo_enqueue (s->tx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + if (msg.data.headers_len) { - proxy_connect_args_t args; - args.api_context = a->api_context; - args.app_index = a->app_index; - clib_memcpy (&args.sep, &a->sep_ext, sizeof (a->sep_ext)); - vl_api_rpc_call_main_thread (proxy_cb_fn, (u8 *) & args, sizeof (args)); + rv = svm_fifo_enqueue (s->tx_fifo, vec_len (headers_buf), headers_buf); + ASSERT (rv == vec_len (headers_buf)); + vec_free (headers_buf); } + + if (svm_fifo_set_event (s->tx_fifo)) + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); +} + +static void +proxy_do_connect (vnet_connect_args_t *a) +{ + ASSERT (session_vlib_thread_is_cl_thread ()); + vnet_connect (a); + session_endpoint_free_ext_cfgs (&a->sep_ext); +} + +static void +proxy_handle_connects_rpc (void *args) +{ + u32 thread_index = pointer_to_uword (args), n_connects = 0, n_pending; + proxy_worker_t *wrk; + u32 max_connects; + + wrk = proxy_worker_get (thread_index); + + clib_spinlock_lock (&wrk->pending_connects_lock); + + n_pending = clib_fifo_elts (wrk->pending_connects); + max_connects = clib_min (32, n_pending); + vec_validate (wrk->burst_connects, max_connects); + + while (n_connects < max_connects) + clib_fifo_sub1 (wrk->pending_connects, wrk->burst_connects[n_connects++]); + + clib_spinlock_unlock (&wrk->pending_connects_lock); + + /* Do connects without locking pending_connects */ + n_connects = 0; + while (n_connects < max_connects) + { + proxy_do_connect (&wrk->burst_connects[n_connects]); + n_connects += 1; + } + + /* More work to do, program rpc */ + if (max_connects < n_pending) + session_send_rpc_evt_to_thread_force ( + transport_cl_thread (), proxy_handle_connects_rpc, + uword_to_pointer ((uword) thread_index, void *)); +} + +static void +proxy_program_connect (vnet_connect_args_t *a) +{ + u32 connects_thread = transport_cl_thread (), thread_index, n_pending; + proxy_worker_t *wrk; + + thread_index = vlib_get_thread_index (); + + /* If already on first worker, handle request */ + if (thread_index == connects_thread) + { + proxy_do_connect (a); + return; + } + + /* If not on first worker, queue request */ + wrk = proxy_worker_get (thread_index); + + clib_spinlock_lock (&wrk->pending_connects_lock); + + clib_fifo_add1 (wrk->pending_connects, *a); + n_pending = clib_fifo_elts (wrk->pending_connects); + + clib_spinlock_unlock (&wrk->pending_connects_lock); + + if (n_pending == 1) + session_send_rpc_evt_to_thread_force ( + connects_thread, proxy_handle_connects_rpc, + uword_to_pointer ((uword) thread_index, void *)); } static proxy_session_t * @@ -85,16 +180,6 @@ proxy_session_get (u32 ps_index) return pool_elt_at_index (pm->sessions, ps_index); } -static inline proxy_session_t * -proxy_session_get_if_valid (u32 ps_index) -{ - proxy_main_t *pm = &proxy_main; - - if (pool_is_free_index (pm->sessions, ps_index)) - return 0; - return pool_elt_at_index (pm->sessions, ps_index); -} - static void proxy_session_free (proxy_session_t *ps) { @@ -115,7 +200,7 @@ proxy_session_postponed_free_rpc (void *arg) clib_spinlock_lock_if_init (&pm->sessions_lock); ps = proxy_session_get (ps_index); - segment_manager_dealloc_fifos (ps->server_rx_fifo, ps->server_tx_fifo); + segment_manager_dealloc_fifos (ps->po.rx_fifo, ps->po.tx_fifo); proxy_session_free (ps); clib_spinlock_unlock_if_init (&pm->sessions_lock); @@ -126,54 +211,79 @@ proxy_session_postponed_free_rpc (void *arg) static void proxy_session_postponed_free (proxy_session_t *ps) { - session_send_rpc_evt_to_thread (ps->po_thread_index, + /* Passive open session handle has been invalidated so we don't have thread + * index at this point */ + session_send_rpc_evt_to_thread (ps->po.rx_fifo->master_thread_index, proxy_session_postponed_free_rpc, uword_to_pointer (ps->ps_index, void *)); } static void +proxy_session_close_po (proxy_session_t *ps) +{ + vnet_disconnect_args_t _a = {}, *a = &_a; + proxy_main_t *pm = &proxy_main; + + ASSERT (!vlib_num_workers () || + CLIB_SPINLOCK_IS_LOCKED (&pm->sessions_lock)); + + a->handle = ps->po.session_handle; + a->app_index = pm->server_app_index; + vnet_disconnect_session (a); + + ps->po_disconnected = 1; +} + +static void +proxy_session_close_ao (proxy_session_t *ps) +{ + vnet_disconnect_args_t _a = {}, *a = &_a; + proxy_main_t *pm = &proxy_main; + + ASSERT (!vlib_num_workers () || + CLIB_SPINLOCK_IS_LOCKED (&pm->sessions_lock)); + + a->handle = ps->ao.session_handle; + a->app_index = pm->active_open_app_index; + vnet_disconnect_session (a); + + ps->ao_disconnected = 1; +} + +static void proxy_try_close_session (session_t * s, int is_active_open) { proxy_main_t *pm = &proxy_main; - proxy_session_t *ps = 0; - vnet_disconnect_args_t _a, *a = &_a; + proxy_session_side_ctx_t *sc; + proxy_session_t *ps; + proxy_worker_t *wrk; + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); clib_spinlock_lock_if_init (&pm->sessions_lock); - ps = proxy_session_get (s->opaque); + ps = proxy_session_get (sc->ps_index); if (is_active_open) { - a->handle = ps->vpp_active_open_handle; - a->app_index = pm->active_open_app_index; - vnet_disconnect_session (a); - ps->ao_disconnected = 1; + proxy_session_close_ao (ps); if (!ps->po_disconnected) { - ASSERT (ps->vpp_server_handle != SESSION_INVALID_HANDLE); - a->handle = ps->vpp_server_handle; - a->app_index = pm->server_app_index; - vnet_disconnect_session (a); - ps->po_disconnected = 1; + ASSERT (ps->po.session_handle != SESSION_INVALID_HANDLE); + proxy_session_close_po (ps); } } else { - a->handle = ps->vpp_server_handle; - a->app_index = pm->server_app_index; - vnet_disconnect_session (a); - ps->po_disconnected = 1; + proxy_session_close_po (ps); if (!ps->ao_disconnected && !ps->active_open_establishing) { /* Proxy session closed before active open */ - if (ps->vpp_active_open_handle != SESSION_INVALID_HANDLE) - { - a->handle = ps->vpp_active_open_handle; - a->app_index = pm->active_open_app_index; - vnet_disconnect_session (a); - } + if (ps->ao.session_handle != SESSION_INVALID_HANDLE) + proxy_session_close_ao (ps); ps->ao_disconnected = 1; } } @@ -181,29 +291,63 @@ proxy_try_close_session (session_t * s, int is_active_open) } static void +proxy_try_side_ctx_cleanup (session_t *s) +{ + proxy_main_t *pm = &proxy_main; + proxy_session_t *ps; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); + if (sc->state == PROXY_SC_S_CREATED) + return; + + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (sc->ps_index); + + if (!ps->po_disconnected) + proxy_session_close_po (ps); + + if (!ps->ao_disconnected) + proxy_session_close_ao (ps); + + clib_spinlock_unlock_if_init (&pm->sessions_lock); +} + +static void proxy_try_delete_session (session_t * s, u8 is_active_open) { proxy_main_t *pm = &proxy_main; proxy_session_t *ps = 0; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; + u32 ps_index; + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); + ps_index = sc->ps_index; + + proxy_session_side_ctx_free (wrk, sc); clib_spinlock_lock_if_init (&pm->sessions_lock); - ps = proxy_session_get (s->opaque); + ps = proxy_session_get (ps_index); if (is_active_open) { - ps->vpp_active_open_handle = SESSION_INVALID_HANDLE; + ps->ao.session_handle = SESSION_INVALID_HANDLE; /* Revert master thread index change on connect notification */ - ps->server_rx_fifo->master_thread_index = ps->po_thread_index; + ps->po.rx_fifo->master_thread_index = + ps->po.tx_fifo->master_thread_index; /* Passive open already cleaned up */ - if (ps->vpp_server_handle == SESSION_INVALID_HANDLE) + if (ps->po.session_handle == SESSION_INVALID_HANDLE) { - ASSERT (s->rx_fifo->refcnt == 1); - /* The two sides of the proxy on different threads */ - if (ps->po_thread_index != s->thread_index) + if (ps->po.tx_fifo->master_thread_index != s->thread_index) { /* This is not the right thread to delete the fifos */ s->rx_fifo = 0; @@ -211,14 +355,17 @@ proxy_try_delete_session (session_t * s, u8 is_active_open) proxy_session_postponed_free (ps); } else - proxy_session_free (ps); + { + ASSERT (s->rx_fifo->refcnt == 1); + proxy_session_free (ps); + } } } else { - ps->vpp_server_handle = SESSION_INVALID_HANDLE; + ps->po.session_handle = SESSION_INVALID_HANDLE; - if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE) + if (ps->ao.session_handle == SESSION_INVALID_HANDLE) { if (!ps->active_open_establishing) proxy_session_free (ps); @@ -275,16 +422,26 @@ static int proxy_accept_callback (session_t * s) { proxy_main_t *pm = &proxy_main; + proxy_session_side_ctx_t *sc; proxy_session_t *ps; + proxy_worker_t *wrk; + transport_proto_t tp = session_get_transport_proto (s); + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_alloc (wrk); + s->opaque = sc->sc_index; clib_spinlock_lock_if_init (&pm->sessions_lock); ps = proxy_session_alloc (); - ps->vpp_server_handle = session_handle (s); - ps->vpp_active_open_handle = SESSION_INVALID_HANDLE; - ps->po_thread_index = s->thread_index; - s->opaque = ps->ps_index; + ps->po.session_handle = session_handle (s); + ps->po.rx_fifo = s->rx_fifo; + ps->po.tx_fifo = s->tx_fifo; + + ps->ao.session_handle = SESSION_INVALID_HANDLE; + sc->ps_index = ps->ps_index; + sc->is_http = tp == TRANSPORT_PROTO_HTTP ? 1 : 0; clib_spinlock_unlock_if_init (&pm->sessions_lock); @@ -325,98 +482,166 @@ proxy_transport_needs_crypto (transport_proto_t proto) return proto == TRANSPORT_PROTO_TLS; } -static int -proxy_rx_callback (session_t * s) +static void +proxy_session_start_connect (proxy_session_side_ctx_t *sc, session_t *s) { + int actual_transfer __attribute__ ((unused)); + vnet_connect_args_t _a = {}, *a = &_a; proxy_main_t *pm = &proxy_main; - u32 thread_index = vlib_get_thread_index (); - svm_fifo_t *ao_tx_fifo; + u32 max_dequeue, ps_index; proxy_session_t *ps; - - ASSERT (s->thread_index == thread_index); + transport_proto_t tp = session_get_transport_proto (s); clib_spinlock_lock_if_init (&pm->sessions_lock); - ps = proxy_session_get (s->opaque); + ps = proxy_session_get (sc->ps_index); - if (PREDICT_TRUE (ps->vpp_active_open_handle != SESSION_INVALID_HANDLE)) + /* maybe we were already here */ + if (ps->active_open_establishing) { clib_spinlock_unlock_if_init (&pm->sessions_lock); + return; + } - ao_tx_fifo = s->rx_fifo; + ps->active_open_establishing = 1; + ps_index = ps->ps_index; - /* - * Send event for active open tx fifo - */ - if (svm_fifo_set_event (ao_tx_fifo)) + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + if (tp == TRANSPORT_PROTO_HTTP) + { + http_msg_t msg; + u8 *target_buf = 0; + http_uri_t target_uri; + http_header_t *resp_headers = 0; + session_endpoint_cfg_t target_sep = SESSION_ENDPOINT_CFG_NULL; + int rv; + + rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + + if (msg.type != HTTP_MSG_REQUEST) + { + proxy_send_http_resp (s, HTTP_STATUS_INTERNAL_ERROR, 0); + return; + } + if (msg.method_type != HTTP_REQ_CONNECT) + { + http_add_header (&resp_headers, + http_header_name_token (HTTP_HEADER_ALLOW), + http_token_lit ("CONNECT")); + proxy_send_http_resp (s, HTTP_STATUS_METHOD_NOT_ALLOWED, + resp_headers); + vec_free (resp_headers); + return; + } + + if (msg.data.target_form != HTTP_TARGET_AUTHORITY_FORM || + msg.data.target_path_len == 0) { - u32 ao_thread_index = ao_tx_fifo->master_thread_index; - u32 ao_session_index = ao_tx_fifo->shr->master_session_index; - if (session_send_io_evt_to_thread_custom (&ao_session_index, - ao_thread_index, - SESSION_IO_EVT_TX)) - clib_warning ("failed to enqueue tx evt"); + proxy_send_http_resp (s, HTTP_STATUS_BAD_REQUEST, 0); + return; } - if (svm_fifo_max_enqueue (ao_tx_fifo) <= TCP_MSS) - svm_fifo_add_want_deq_ntf (ao_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + /* read target uri */ + target_buf = vec_new (u8, msg.data.target_path_len); + rv = svm_fifo_peek (s->rx_fifo, msg.data.target_path_offset, + msg.data.target_path_len, target_buf); + ASSERT (rv == msg.data.target_path_len); + svm_fifo_dequeue_drop (s->rx_fifo, msg.data.len); + rv = http_parse_authority_form_target (target_buf, &target_uri); + vec_free (target_buf); + if (rv) + { + proxy_send_http_resp (s, HTTP_STATUS_BAD_REQUEST, 0); + return; + } + target_sep.is_ip4 = target_uri.is_ip4; + target_sep.ip = target_uri.ip; + target_sep.port = target_uri.port; + target_sep.transport_proto = TRANSPORT_PROTO_TCP; + clib_memcpy (&a->sep_ext, &target_sep, sizeof (target_sep)); } else { - vnet_connect_args_t _a, *a = &_a; - svm_fifo_t *tx_fifo, *rx_fifo; - u32 max_dequeue, ps_index; - int actual_transfer __attribute__ ((unused)); + max_dequeue = svm_fifo_max_dequeue_cons (s->rx_fifo); + if (PREDICT_FALSE (max_dequeue == 0)) + return; - /* maybe we were already here */ - if (ps->active_open_establishing) - { - clib_spinlock_unlock_if_init (&pm->sessions_lock); - return 0; - } + max_dequeue = clib_min (pm->rcv_buffer_size, max_dequeue); + actual_transfer = + svm_fifo_peek (s->rx_fifo, 0 /* relative_offset */, max_dequeue, + pm->rx_buf[s->thread_index]); - rx_fifo = s->rx_fifo; - tx_fifo = s->tx_fifo; + /* Expectation is that here actual data just received is parsed and based + * on its contents, the destination and parameters of the connect to the + * upstream are decided + */ - ASSERT (rx_fifo->master_thread_index == thread_index); - ASSERT (tx_fifo->master_thread_index == thread_index); + clib_memcpy (&a->sep_ext, &pm->client_sep, sizeof (pm->client_sep)); + } - max_dequeue = svm_fifo_max_dequeue_cons (s->rx_fifo); + a->api_context = ps_index; + a->app_index = pm->active_open_app_index; - if (PREDICT_FALSE (max_dequeue == 0)) + if (proxy_transport_needs_crypto (a->sep.transport_proto)) + { + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = pm->ckpair_index; + } + + proxy_program_connect (a); +} + +static int +proxy_rx_callback (session_t *s) +{ + proxy_session_side_ctx_t *sc; + svm_fifo_t *ao_tx_fifo; + proxy_session_t *ps; + proxy_worker_t *wrk; + + ASSERT (s->thread_index == vlib_get_thread_index ()); + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); + + if (PREDICT_FALSE (sc->state < PROXY_SC_S_ESTABLISHED)) + { + proxy_main_t *pm = &proxy_main; + + if (sc->state == PROXY_SC_S_CREATED) { - clib_spinlock_unlock_if_init (&pm->sessions_lock); + proxy_session_start_connect (sc, s); + sc->state = PROXY_SC_S_CONNECTING; return 0; } - max_dequeue = clib_min (pm->rcv_buffer_size, max_dequeue); - actual_transfer = svm_fifo_peek (rx_fifo, 0 /* relative_offset */ , - max_dequeue, pm->rx_buf[thread_index]); + clib_spinlock_lock_if_init (&pm->sessions_lock); - /* $$$ your message in this space: parse url, etc. */ + ps = proxy_session_get (sc->ps_index); + sc->pair = ps->ao; - clib_memset (a, 0, sizeof (*a)); + clib_spinlock_unlock_if_init (&pm->sessions_lock); - ps->server_rx_fifo = rx_fifo; - ps->server_tx_fifo = tx_fifo; - ps->active_open_establishing = 1; - ps_index = ps->ps_index; + if (sc->pair.session_handle == SESSION_INVALID_HANDLE) + return 0; - clib_spinlock_unlock_if_init (&pm->sessions_lock); + sc->state = PROXY_SC_S_ESTABLISHED; + } - clib_memcpy (&a->sep_ext, &pm->client_sep, sizeof (pm->client_sep)); - a->api_context = ps_index; - a->app_index = pm->active_open_app_index; + ao_tx_fifo = s->rx_fifo; - if (proxy_transport_needs_crypto (a->sep.transport_proto)) - { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = pm->ckpair_index; - } + /* + * Send event for active open tx fifo + */ + if (svm_fifo_set_event (ao_tx_fifo)) + session_program_tx_io_evt (sc->pair.session_handle, SESSION_IO_EVT_TX); - proxy_call_main_thread (a); - } + if (svm_fifo_max_enqueue (ao_tx_fifo) <= TCP_MSS) + svm_fifo_add_want_deq_ntf (ao_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); return 0; } @@ -437,8 +662,8 @@ proxy_force_ack (void *handlep) static int proxy_tx_callback (session_t * proxy_s) { - proxy_main_t *pm = &proxy_main; - proxy_session_t *ps; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; u32 min_free; min_free = clib_min (svm_fifo_size (proxy_s->tx_fifo) >> 3, 128 << 10); @@ -448,21 +673,17 @@ proxy_tx_callback (session_t * proxy_s) return 0; } - clib_spinlock_lock_if_init (&pm->sessions_lock); - - ps = proxy_session_get (proxy_s->opaque); - - if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE) - goto unlock; + wrk = proxy_worker_get (proxy_s->thread_index); + sc = proxy_session_side_ctx_get (wrk, proxy_s->opaque); + if (sc->state < PROXY_SC_S_ESTABLISHED) + return 0; /* Force ack on active open side to update rcv wnd. Make sure it's done on * the right thread */ - void *arg = uword_to_pointer (ps->vpp_active_open_handle, void *); - session_send_rpc_evt_to_thread (ps->server_rx_fifo->master_thread_index, - proxy_force_ack, arg); - -unlock: - clib_spinlock_unlock_if_init (&pm->sessions_lock); + void *arg = uword_to_pointer (sc->pair.session_handle, void *); + session_send_rpc_evt_to_thread ( + session_thread_from_handle (sc->pair.session_handle), proxy_force_ack, + arg); return 0; } @@ -471,7 +692,10 @@ static void proxy_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf) { if (ntf == SESSION_CLEANUP_TRANSPORT) - return; + { + proxy_try_side_ctx_cleanup (s); + return; + } proxy_try_delete_session (s, 0 /* is_active_open */ ); } @@ -497,10 +721,17 @@ active_open_alloc_session_fifos (session_t *s) clib_spinlock_lock_if_init (&pm->sessions_lock); + /* Active open opaque is pointing at proxy session */ ps = proxy_session_get (s->opaque); - txf = ps->server_rx_fifo; - rxf = ps->server_tx_fifo; + if (ps->po_disconnected) + { + clib_spinlock_unlock_if_init (&pm->sessions_lock); + return SESSION_E_ALLOC; + } + + txf = ps->po.rx_fifo; + rxf = ps->po.tx_fifo; /* * Reset the active-open tx-fifo master indices so the active-open session @@ -531,31 +762,43 @@ active_open_connected_callback (u32 app_index, u32 opaque, { proxy_main_t *pm = &proxy_main; proxy_session_t *ps; - u8 thread_index = vlib_get_thread_index (); - - /* - * Setup proxy session handle. - */ - clib_spinlock_lock_if_init (&pm->sessions_lock); - - ps = proxy_session_get (opaque); + proxy_worker_t *wrk; + proxy_session_side_ctx_t *sc; + session_t *po_s; + transport_proto_t tp; /* Connection failed */ if (err) { - vnet_disconnect_args_t _a, *a = &_a; + clib_spinlock_lock_if_init (&pm->sessions_lock); - a->handle = ps->vpp_server_handle; - a->app_index = pm->server_app_index; - vnet_disconnect_session (a); - ps->po_disconnected = 1; - } - else - { - ps->vpp_active_open_handle = session_handle (s); - ps->active_open_establishing = 0; + ps = proxy_session_get (opaque); + po_s = session_get_from_handle (ps->po.session_handle); + tp = session_get_transport_proto (po_s); + if (tp == TRANSPORT_PROTO_HTTP) + { + proxy_send_http_resp (po_s, HTTP_STATUS_BAD_GATEWAY, 0); + } + ps->ao_disconnected = 1; + proxy_session_close_po (ps); + + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + return 0; } + wrk = proxy_worker_get (s->thread_index); + + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (opaque); + + ps->ao.rx_fifo = s->rx_fifo; + ps->ao.tx_fifo = s->tx_fifo; + ps->ao.session_handle = session_handle (s); + + ps->active_open_establishing = 0; + /* Passive open session was already closed! */ if (ps->po_disconnected) { @@ -565,21 +808,136 @@ active_open_connected_callback (u32 app_index, u32 opaque, return -1; } - s->opaque = opaque; + po_s = session_get_from_handle (ps->po.session_handle); + tp = session_get_transport_proto (po_s); + + sc = proxy_session_side_ctx_alloc (wrk); + sc->pair = ps->po; + sc->ps_index = ps->ps_index; clib_spinlock_unlock_if_init (&pm->sessions_lock); - /* - * Send event for active open tx fifo - */ - ASSERT (s->thread_index == thread_index); - if (svm_fifo_set_event (s->tx_fifo)) - session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX); + sc->state = PROXY_SC_S_ESTABLISHED; + s->opaque = sc->sc_index; + sc->is_http = tp == TRANSPORT_PROTO_HTTP ? 1 : 0; + + if (tp == TRANSPORT_PROTO_HTTP) + { + proxy_send_http_resp (po_s, HTTP_STATUS_OK, 0); + } + else + { + /* + * Send event for active open tx fifo + */ + ASSERT (s->thread_index == vlib_get_thread_index ()); + if (svm_fifo_set_event (s->tx_fifo)) + session_program_tx_io_evt (session_handle (s), SESSION_IO_EVT_TX); + } return 0; } static void +active_open_migrate_po_fixup_rpc (void *arg) +{ + u32 ps_index = pointer_to_uword (arg); + proxy_session_side_ctx_t *po_sc; + proxy_main_t *pm = &proxy_main; + session_handle_t po_sh; + proxy_worker_t *wrk; + proxy_session_t *ps; + session_t *po_s; + + wrk = proxy_worker_get (vlib_get_thread_index ()); + + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (ps_index); + + po_s = session_get_from_handle (ps->po.session_handle); + po_s->rx_fifo = ps->po.rx_fifo; + po_s->tx_fifo = ps->po.tx_fifo; + + po_sc = proxy_session_side_ctx_get (wrk, po_s->opaque); + po_sc->pair = ps->ao; + po_sh = ps->po.session_handle; + + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + session_program_tx_io_evt (po_sh, SESSION_IO_EVT_TX); +} + +static void +active_open_migrate_rpc (void *arg) +{ + u32 ps_index = pointer_to_uword (arg); + proxy_main_t *pm = &proxy_main; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; + proxy_session_t *ps; + session_t *s; + + wrk = proxy_worker_get (vlib_get_thread_index ()); + sc = proxy_session_side_ctx_alloc (wrk); + + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (ps_index); + sc->ps_index = ps->ps_index; + + s = session_get_from_handle (ps->ao.session_handle); + s->opaque = sc->sc_index; + s->flags &= ~SESSION_F_IS_MIGRATING; + + /* Fixup passive open session because of migration and zc */ + ps->ao.rx_fifo = ps->po.tx_fifo = s->rx_fifo; + ps->ao.tx_fifo = ps->po.rx_fifo = s->tx_fifo; + + ps->po.tx_fifo->shr->master_session_index = + session_index_from_handle (ps->po.session_handle); + ps->po.tx_fifo->master_thread_index = + session_thread_from_handle (ps->po.session_handle); + + sc->pair = ps->po; + + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + session_send_rpc_evt_to_thread ( + session_thread_from_handle (sc->pair.session_handle), + active_open_migrate_po_fixup_rpc, uword_to_pointer (sc->ps_index, void *)); +} + +static void +active_open_migrate_callback (session_t *s, session_handle_t new_sh) +{ + proxy_main_t *pm = &proxy_main; + proxy_session_side_ctx_t *sc; + proxy_session_t *ps; + proxy_worker_t *wrk; + + wrk = proxy_worker_get (s->thread_index); + sc = proxy_session_side_ctx_get (wrk, s->opaque); + + /* NOTE: this is just an example. ZC makes this migration rather + * tedious. Probably better approaches could be found */ + clib_spinlock_lock_if_init (&pm->sessions_lock); + + ps = proxy_session_get (sc->ps_index); + ps->ao.session_handle = new_sh; + ps->ao.rx_fifo = 0; + ps->ao.tx_fifo = 0; + + clib_spinlock_unlock_if_init (&pm->sessions_lock); + + session_send_rpc_evt_to_thread (session_thread_from_handle (new_sh), + active_open_migrate_rpc, + uword_to_pointer (sc->ps_index, void *)); + + proxy_session_side_ctx_free (wrk, sc); +} + +static void active_open_reset_callback (session_t * s) { proxy_try_close_session (s, 1 /* is_active_open */ ); @@ -625,8 +983,8 @@ active_open_rx_callback (session_t * s) static int active_open_tx_callback (session_t * ao_s) { - proxy_main_t *pm = &proxy_main; - proxy_session_t *ps; + proxy_session_side_ctx_t *sc; + proxy_worker_t *wrk; u32 min_free; min_free = clib_min (svm_fifo_size (ao_s->tx_fifo) >> 3, 128 << 10); @@ -636,22 +994,27 @@ active_open_tx_callback (session_t * ao_s) return 0; } - clib_spinlock_lock_if_init (&pm->sessions_lock); - - ps = proxy_session_get_if_valid (ao_s->opaque); - if (!ps) - goto unlock; + wrk = proxy_worker_get (ao_s->thread_index); + sc = proxy_session_side_ctx_get (wrk, ao_s->opaque); - if (ps->vpp_server_handle == SESSION_INVALID_HANDLE) - goto unlock; - - /* Force ack on proxy side to update rcv wnd */ - void *arg = uword_to_pointer (ps->vpp_server_handle, void *); - session_send_rpc_evt_to_thread ( - session_thread_from_handle (ps->vpp_server_handle), proxy_force_ack, arg); + if (sc->state < PROXY_SC_S_ESTABLISHED) + return 0; -unlock: - clib_spinlock_unlock_if_init (&pm->sessions_lock); + if (sc->is_http) + { + /* notify HTTP transport */ + session_t *po = session_get_from_handle (sc->pair.session_handle); + session_send_io_evt_to_thread_custom ( + &po->session_index, po->thread_index, SESSION_IO_EVT_RX); + } + else + { + /* Force ack on proxy side to update rcv wnd */ + void *arg = uword_to_pointer (sc->pair.session_handle, void *); + session_send_rpc_evt_to_thread ( + session_thread_from_handle (sc->pair.session_handle), proxy_force_ack, + arg); + } return 0; } @@ -668,6 +1031,7 @@ active_open_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf) static session_cb_vft_t active_open_clients = { .session_reset_callback = active_open_reset_callback, .session_connected_callback = active_open_connected_callback, + .session_migrate_callback = active_open_migrate_callback, .session_accept_callback = active_open_create_callback, .session_disconnect_callback = active_open_disconnect_callback, .session_cleanup_callback = active_open_cleanup_callback, @@ -760,22 +1124,33 @@ proxy_server_listen () { proxy_main_t *pm = &proxy_main; vnet_listen_args_t _a, *a = &_a; - int rv; + int rv, need_crypto; clib_memset (a, 0, sizeof (*a)); a->app_index = pm->server_app_index; clib_memcpy (&a->sep_ext, &pm->server_sep, sizeof (pm->server_sep)); - if (proxy_transport_needs_crypto (a->sep.transport_proto)) + /* Make sure listener is marked connected for transports like udp */ + a->sep_ext.transport_flags = TRANSPORT_CFG_F_CONNECTED; + need_crypto = proxy_transport_needs_crypto (a->sep.transport_proto); + if (need_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = pm->ckpair_index; + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = pm->ckpair_index; + } + /* set http timeout for connect-proxy */ + if (pm->server_sep.transport_proto == TRANSPORT_PROTO_HTTP) + { + transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); + ext_cfg->opaque = pm->idle_timeout; } rv = vnet_listen (a); - if (a->sep_ext.ext_cfg) - clib_mem_free (a->sep_ext.ext_cfg); + if (need_crypto) + session_endpoint_free_ext_cfgs (&a->sep_ext); return rv; } @@ -801,15 +1176,25 @@ proxy_server_create (vlib_main_t * vm) { vlib_thread_main_t *vtm = vlib_get_thread_main (); proxy_main_t *pm = &proxy_main; + proxy_worker_t *wrk; u32 num_threads; int i; + if (vlib_num_workers ()) + clib_spinlock_init (&pm->sessions_lock); + num_threads = 1 /* main thread */ + vtm->n_threads; vec_validate (pm->rx_buf, num_threads - 1); for (i = 0; i < num_threads; i++) vec_validate (pm->rx_buf[i], pm->rcv_buffer_size); + vec_validate (pm->workers, vlib_num_workers ()); + vec_foreach (wrk, pm->workers) + { + clib_spinlock_init (&wrk->pending_connects_lock); + } + proxy_server_add_ckpair (); if (proxy_server_attach ()) @@ -817,11 +1202,6 @@ proxy_server_create (vlib_main_t * vm) clib_warning ("failed to attach server app"); return -1; } - if (proxy_server_listen ()) - { - clib_warning ("failed to start listening"); - return -1; - } if (active_open_attach ()) { clib_warning ("failed to attach active open app"); @@ -853,9 +1233,6 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input, pm->private_segment_count = 0; pm->segment_size = 512 << 20; - if (vlib_num_workers ()) - clib_spinlock_init (&pm->sessions_lock); - if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -887,6 +1264,8 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input, vec_add1 (server_uri, 0); else if (unformat (line_input, "client-uri %s", &client_uri)) vec_add1 (client_uri, 0); + else if (unformat (line_input, "idle-timeout %d", &pm->idle_timeout)) + ; else { error = clib_error_return (0, "unknown input `%U'", @@ -901,38 +1280,45 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input, default_server_uri); server_uri = format (0, "%s%c", default_server_uri, 0); } - if (!client_uri) - { - clib_warning ("No client-uri provided, Using default: %s", - default_client_uri); - client_uri = format (0, "%s%c", default_client_uri, 0); - } - if (parse_uri ((char *) server_uri, &pm->server_sep)) { error = clib_error_return (0, "Invalid server uri %v", server_uri); goto done; } - if (parse_uri ((char *) client_uri, &pm->client_sep)) + + /* http proxy get target within request */ + if (pm->server_sep.transport_proto != TRANSPORT_PROTO_HTTP) { - error = clib_error_return (0, "Invalid client uri %v", client_uri); - goto done; + if (!client_uri) + { + clib_warning ("No client-uri provided, Using default: %s", + default_client_uri); + client_uri = format (0, "%s%c", default_client_uri, 0); + } + if (parse_uri ((char *) client_uri, &pm->client_sep)) + { + error = clib_error_return (0, "Invalid client uri %v", client_uri); + goto done; + } } - session_enable_disable_args_t args = { .is_en = 1, - .rt_engine_type = - RT_BACKEND_ENGINE_RULE_TABLE }; - vnet_session_enable_disable (vm, &args); - - rv = proxy_server_create (vm); - switch (rv) + if (pm->server_app_index == APP_INVALID_INDEX) { - case 0: - break; - default: - error = clib_error_return (0, "server_create returned %d", rv); + session_enable_disable_args_t args = { .is_en = 1, + .rt_engine_type = + RT_BACKEND_ENGINE_RULE_TABLE }; + vnet_session_enable_disable (vm, &args); + rv = proxy_server_create (vm); + if (rv) + { + error = clib_error_return (0, "server_create returned %d", rv); + goto done; + } } + if (proxy_server_listen ()) + error = clib_error_return (0, "failed to start listening"); + done: unformat_free (line_input); vec_free (client_uri); @@ -940,14 +1326,14 @@ done: return error; } -VLIB_CLI_COMMAND (proxy_create_command, static) = -{ +VLIB_CLI_COMMAND (proxy_create_command, static) = { .path = "test proxy server", - .short_help = "test proxy server [server-uri <tcp://ip/port>]" - "[client-uri <tcp://ip/port>][fifo-size <nn>[k|m]]" - "[max-fifo-size <nn>[k|m]][high-watermark <nn>]" - "[low-watermark <nn>][rcv-buf-size <nn>][prealloc-fifos <nn>]" - "[private-segment-size <mem>][private-segment-count <nn>]", + .short_help = "test proxy server [server-uri <proto://ip/port>]" + "[client-uri <tcp://ip/port>][fifo-size <nn>[k|m]]" + "[max-fifo-size <nn>[k|m]][high-watermark <nn>]" + "[low-watermark <nn>][rcv-buf-size <nn>][prealloc-fifos <nn>]" + "[private-segment-size <mem>][private-segment-count <nn>]" + "[idle-timeout <nn>]", .function = proxy_server_create_command_fn, }; @@ -957,6 +1343,8 @@ proxy_main_init (vlib_main_t * vm) proxy_main_t *pm = &proxy_main; pm->server_client_index = ~0; pm->active_open_client_index = ~0; + pm->server_app_index = APP_INVALID_INDEX; + pm->idle_timeout = 600; /* connect-proxy default idle timeout 10 minutes */ return 0; } diff --git a/src/plugins/hs_apps/proxy.h b/src/plugins/hs_apps/proxy.h index 26f4de2f729..75567e4c1ba 100644 --- a/src/plugins/hs_apps/proxy.h +++ b/src/plugins/hs_apps/proxy.h @@ -26,23 +26,57 @@ #include <vnet/session/session.h> #include <vnet/session/application_interface.h> +#define foreach_proxy_session_side_state \ + _ (CREATED, "created") \ + _ (CONNECTING, "connecting") \ + _ (ESTABLISHED, "establiehed") \ + _ (CLOSED, "closed") + +typedef enum proxy_session_side_state_ +{ +#define _(sym, str) PROXY_SC_S_##sym, + foreach_proxy_session_side_state +#undef _ +} proxy_session_side_state_t; +typedef struct proxy_session_side_ +{ + session_handle_t session_handle; + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; +} proxy_session_side_t; + +typedef struct proxy_session_side_ctx_ +{ + proxy_session_side_t pair; + proxy_session_side_state_t state; + u32 sc_index; + u32 ps_index; + u8 is_http; +} proxy_session_side_ctx_t; + typedef struct { - svm_fifo_t *server_rx_fifo; - svm_fifo_t *server_tx_fifo; + proxy_session_side_t po; /**< passive open side */ + proxy_session_side_t ao; /**< active open side */ - session_handle_t vpp_server_handle; - session_handle_t vpp_active_open_handle; volatile int active_open_establishing; volatile int po_disconnected; volatile int ao_disconnected; u32 ps_index; - u32 po_thread_index; } proxy_session_t; +typedef struct proxy_worker_ +{ + proxy_session_side_ctx_t *ctx_pool; + clib_spinlock_t pending_connects_lock; + vnet_connect_args_t *pending_connects; + vnet_connect_args_t *burst_connects; +} proxy_worker_t; + typedef struct { + proxy_worker_t *workers; /**< per-thread data */ proxy_session_t *sessions; /**< session pool, shared */ clib_spinlock_t sessions_lock; /**< lock for session pool */ u8 **rx_buf; /**< intermediate rx buffers */ @@ -63,6 +97,7 @@ typedef struct u32 private_segment_count; /**< Number of private fifo segs */ u64 segment_size; /**< size of fifo segs */ u8 prealloc_fifos; /**< Request fifo preallocation */ + u32 idle_timeout; /**< connect-proxy timeout for idle connections */ int rcv_buffer_size; session_endpoint_cfg_t server_sep; session_endpoint_cfg_t client_sep; @@ -75,6 +110,13 @@ typedef struct extern proxy_main_t proxy_main; +static inline proxy_worker_t * +proxy_worker_get (u32 thread_index) +{ + proxy_main_t *pm = &proxy_main; + return vec_elt_at_index (pm->workers, thread_index); +} + #endif /* __included_proxy_h__ */ /* diff --git a/src/plugins/hs_apps/sapi/vpp_echo_common.c b/src/plugins/hs_apps/sapi/vpp_echo_common.c index 5ce04d1b75b..09ba583cf78 100644 --- a/src/plugins/hs_apps/sapi/vpp_echo_common.c +++ b/src/plugins/hs_apps/sapi/vpp_echo_common.c @@ -330,8 +330,8 @@ format_transport_proto (u8 * s, va_list * args) case TRANSPORT_PROTO_UDP: s = format (s, "UDP"); break; - case TRANSPORT_PROTO_NONE: - s = format (s, "NONE"); + case TRANSPORT_PROTO_CT: + s = format (s, "CT"); break; case TRANSPORT_PROTO_TLS: s = format (s, "TLS"); diff --git a/src/plugins/hs_apps/test_builtins.c b/src/plugins/hs_apps/test_builtins.c index 631c1f1a8a2..c314e71b5df 100644 --- a/src/plugins/hs_apps/test_builtins.c +++ b/src/plugins/hs_apps/test_builtins.c @@ -16,6 +16,7 @@ typedef struct tb_main_ tw_timer_elt_t *delayed_resps; tw_timer_wheel_2t_1w_2048sl_t tw; hss_session_send_fn send_data; + u8 *test_data; } tb_main_t; static tb_main_t tb_main; @@ -51,7 +52,7 @@ VLIB_REGISTER_NODE (test_builtins_timer_process_node) = { }; static void -send_data_to_hss (hss_session_handle_t sh, u8 *data) +send_data_to_hss (hss_session_handle_t sh, u8 *data, u8 free_vec_data) { tb_main_t *tbm = &tb_main; hss_url_handler_args_t args = {}; @@ -61,7 +62,7 @@ send_data_to_hss (hss_session_handle_t sh, u8 *data) args.data_len = vec_len (data); args.ct = HTTP_CONTENT_TEXT_PLAIN; args.sc = HTTP_STATUS_OK; - args.free_vec_data = 1; + args.free_vec_data = free_vec_data; tbm->send_data (&args); } @@ -73,7 +74,7 @@ handle_get_test1 (hss_url_handler_args_t *args) clib_warning ("get request on test1"); data = format (0, "hello"); - send_data_to_hss (args->sh, data); + send_data_to_hss (args->sh, data, 1); return HSS_URL_HANDLER_ASYNC; } @@ -85,7 +86,7 @@ handle_get_test2 (hss_url_handler_args_t *args) clib_warning ("get request on test2"); data = format (0, "some data"); - send_data_to_hss (args->sh, data); + send_data_to_hss (args->sh, data, 1); return HSS_URL_HANDLER_ASYNC; } @@ -105,7 +106,7 @@ delayed_resp_cb (u32 *expired_timers) e = pool_elt_at_index (tbm->delayed_resps, pool_index); clib_warning ("sending delayed data"); data = format (0, "delayed data"); - send_data_to_hss (e->sh, data); + send_data_to_hss (e->sh, data, 1); pool_put (tbm->delayed_resps, e); } } @@ -128,7 +129,15 @@ handle_get_test_delayed (hss_url_handler_args_t *args) static hss_url_handler_rc_t handle_post_test3 (hss_url_handler_args_t *args) { - send_data_to_hss (args->sh, 0); + send_data_to_hss (args->sh, 0, 0); + return HSS_URL_HANDLER_ASYNC; +} + +static hss_url_handler_rc_t +handle_get_64bytes (hss_url_handler_args_t *args) +{ + tb_main_t *tbm = &tb_main; + send_data_to_hss (args->sh, tbm->test_data, 0); return HSS_URL_HANDLER_ASYNC; } @@ -148,10 +157,14 @@ test_builtins_init (vlib_main_t *vm) return; } + tbm->test_data = format ( + 0, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); + (*fp) (handle_get_test1, "test1", HTTP_REQ_GET); (*fp) (handle_get_test2, "test2", HTTP_REQ_GET); (*fp) (handle_get_test_delayed, "test_delayed", HTTP_REQ_GET); (*fp) (handle_post_test3, "test3", HTTP_REQ_POST); + (*fp) (handle_get_64bytes, "64B", HTTP_REQ_GET); tbm->send_data = vlib_get_plugin_symbol ("http_static_plugin.so", "hss_session_send_data"); diff --git a/src/plugins/hs_apps/vcl/vcl_test_server.c b/src/plugins/hs_apps/vcl/vcl_test_server.c index 5de53173784..008539f2585 100644 --- a/src/plugins/hs_apps/vcl/vcl_test_server.c +++ b/src/plugins/hs_apps/vcl/vcl_test_server.c @@ -416,36 +416,41 @@ static void vcl_test_init_endpoint_addr (vcl_test_server_main_t * vsm) { struct sockaddr_storage *servaddr = &vsm->servaddr; - memset (servaddr, 0, sizeof (*servaddr)); if (vsm->server_cfg.address_ip6) { struct sockaddr_in6 *server_addr = (struct sockaddr_in6 *) servaddr; - server_addr->sin6_family = AF_INET6; - server_addr->sin6_addr = in6addr_any; - server_addr->sin6_port = htons (vsm->server_cfg.port); + vsm->server_cfg.endpt.is_ip4 = 0; + vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin6_addr; + vsm->server_cfg.endpt.port = htons (vsm->server_cfg.port); } else { struct sockaddr_in *server_addr = (struct sockaddr_in *) servaddr; - server_addr->sin_family = AF_INET; - server_addr->sin_addr.s_addr = htonl (INADDR_ANY); - server_addr->sin_port = htons (vsm->server_cfg.port); + vsm->server_cfg.endpt.is_ip4 = 1; + vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin_addr; + vsm->server_cfg.endpt.port = htons (vsm->server_cfg.port); } +} + +static void +vcl_test_clear_endpoint_addr (vcl_test_server_main_t *vsm) +{ + struct sockaddr_storage *servaddr = &vsm->servaddr; + + memset (&vsm->servaddr, 0, sizeof (vsm->servaddr)); if (vsm->server_cfg.address_ip6) { struct sockaddr_in6 *server_addr = (struct sockaddr_in6 *) servaddr; - vsm->server_cfg.endpt.is_ip4 = 0; - vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin6_addr; - vsm->server_cfg.endpt.port = (uint16_t) server_addr->sin6_port; + server_addr->sin6_family = AF_INET6; + server_addr->sin6_addr = in6addr_any; } else { struct sockaddr_in *server_addr = (struct sockaddr_in *) servaddr; - vsm->server_cfg.endpt.is_ip4 = 1; - vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin_addr; - vsm->server_cfg.endpt.port = (uint16_t) server_addr->sin_port; + server_addr->sin_family = AF_INET; + server_addr->sin_addr.s_addr = htonl (INADDR_ANY); } } @@ -456,9 +461,10 @@ vcl_test_server_process_opts (vcl_test_server_main_t * vsm, int argc, int v, c; vsm->server_cfg.proto = VPPCOM_PROTO_TCP; + vcl_test_clear_endpoint_addr (vsm); opterr = 0; - while ((c = getopt (argc, argv, "6DLsw:hp:S")) != -1) + while ((c = getopt (argc, argv, "6DLsw:hp:SB:")) != -1) switch (c) { case '6': @@ -469,7 +475,22 @@ vcl_test_server_process_opts (vcl_test_server_main_t * vsm, int argc, if (vppcom_unformat_proto (&vsm->server_cfg.proto, optarg)) vtwrn ("Invalid vppcom protocol %s, defaulting to TCP", optarg); break; - + case 'B': + if (vsm->server_cfg.address_ip6) + { + if (inet_pton ( + AF_INET6, optarg, + &((struct sockaddr_in6 *) &vsm->servaddr)->sin6_addr) != 1) + vtwrn ("couldn't parse ipv6 addr %s", optarg); + } + else + { + if (inet_pton ( + AF_INET, optarg, + &((struct sockaddr_in *) &vsm->servaddr)->sin_addr) != 1) + vtwrn ("couldn't parse ipv4 addr %s", optarg); + } + break; case 'D': vsm->server_cfg.proto = VPPCOM_PROTO_UDP; break; diff --git a/src/plugins/http/CMakeLists.txt b/src/plugins/http/CMakeLists.txt index c51a7dce36d..075b8d6817b 100644 --- a/src/plugins/http/CMakeLists.txt +++ b/src/plugins/http/CMakeLists.txt @@ -16,5 +16,9 @@ add_vpp_plugin(http http.c http_buffer.c http_timer.c - http_test.c +) + +add_vpp_plugin(http_unittest + SOURCES + test/http_test.c ) diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c index 4f741c2e6b4..1ea5a08fbf6 100644 --- a/src/plugins/http/http.c +++ b/src/plugins/http/http.c @@ -36,57 +36,52 @@ const http_buffer_type_t msg_to_buf_type[] = { }; static u8 * -format_http_state (u8 *s, va_list *va) +format_http_req_state (u8 *s, va_list *va) { - http_state_t state = va_arg (*va, http_state_t); + http_req_state_t state = va_arg (*va, http_req_state_t); + u8 *t = 0; switch (state) { - case HTTP_STATE_IDLE: - return format (s, "idle"); - case HTTP_STATE_WAIT_APP_METHOD: - return format (s, "wait app method"); - case HTTP_STATE_WAIT_SERVER_REPLY: - return format (s, "wait server reply"); - case HTTP_STATE_CLIENT_IO_MORE_DATA: - return format (s, "client io more data"); - case HTTP_STATE_WAIT_CLIENT_METHOD: - return format (s, "wait client method"); - case HTTP_STATE_WAIT_APP_REPLY: - return format (s, "wait app reply"); - case HTTP_STATE_APP_IO_MORE_DATA: - return format (s, "app io more data"); - default: - break; - } - return format (s, "unknown"); -} - -#define http_state_change(_hc, _state) \ +#define _(n, s, str) \ + case HTTP_REQ_STATE_##s: \ + t = (u8 *) str; \ + break; + foreach_http_req_state +#undef _ + default : return format (s, "unknown"); + } + return format (s, "%s", t); +} + +#define http_req_state_change(_hc, _state) \ do \ { \ - HTTP_DBG (1, "changing http state %U -> %U", format_http_state, \ - (_hc)->http_state, format_http_state, _state); \ - (_hc)->http_state = _state; \ + HTTP_DBG (1, "changing http req state: %U -> %U", \ + format_http_req_state, (_hc)->req_state, \ + format_http_req_state, _state); \ + ASSERT ((_hc)->req_state != HTTP_REQ_STATE_TUNNEL); \ + (_hc)->req_state = _state; \ } \ while (0) -static inline int -http_state_is_tx_valid (http_conn_t *hc) +static u8 * +format_http_conn_state (u8 *s, va_list *args) { - http_state_t state = hc->http_state; - return (state == HTTP_STATE_APP_IO_MORE_DATA || - state == HTTP_STATE_WAIT_APP_REPLY || - state == HTTP_STATE_WAIT_APP_METHOD); -} + http_conn_t *hc = va_arg (*args, http_conn_t *); + u8 *t = 0; -static inline int -http_state_is_rx_valid (http_conn_t *hc) -{ - http_state_t state = hc->http_state; - return (state == HTTP_STATE_WAIT_SERVER_REPLY || - state == HTTP_STATE_CLIENT_IO_MORE_DATA || - state == HTTP_STATE_WAIT_CLIENT_METHOD); + switch (hc->state) + { +#define _(s, str) \ + case HTTP_CONN_STATE_##s: \ + t = (u8 *) str; \ + break; + foreach_http_conn_state +#undef _ + default : return format (s, "unknown"); + } + return format (s, "%s", t); } static inline http_worker_t * @@ -117,6 +112,15 @@ http_conn_get_w_thread (u32 hc_index, u32 thread_index) return pool_elt_at_index (wrk->conn_pool, hc_index); } +static inline http_conn_t * +http_conn_get_w_thread_if_valid (u32 hc_index, u32 thread_index) +{ + http_worker_t *wrk = http_worker_get (thread_index); + if (pool_is_free_index (wrk->conn_pool, hc_index)) + return 0; + return pool_elt_at_index (wrk->conn_pool, hc_index); +} + void http_conn_free (http_conn_t *hc) { @@ -124,6 +128,35 @@ http_conn_free (http_conn_t *hc) pool_put (wrk->conn_pool, hc); } +static inline http_conn_t * +http_ho_conn_get (u32 ho_hc_index) +{ + http_main_t *hm = &http_main; + return pool_elt_at_index (hm->ho_conn_pool, ho_hc_index); +} + +void +http_ho_conn_free (http_conn_t *ho_hc) +{ + http_main_t *hm = &http_main; + pool_put (hm->ho_conn_pool, ho_hc); +} + +static inline u32 +http_ho_conn_alloc (void) +{ + http_main_t *hm = &http_main; + http_conn_t *hc; + + pool_get_aligned_safe (hm->ho_conn_pool, hc, CLIB_CACHE_LINE_BYTES); + clib_memset (hc, 0, sizeof (*hc)); + hc->h_hc_index = hc - hm->ho_conn_pool; + hc->h_pa_session_handle = SESSION_INVALID_HANDLE; + hc->h_tc_session_handle = SESSION_INVALID_HANDLE; + hc->timeout = HTTP_CONN_TIMEOUT; + return hc->h_hc_index; +} + static u32 http_listener_alloc (void) { @@ -132,6 +165,7 @@ http_listener_alloc (void) pool_get_zero (hm->listener_pool, lhc); lhc->c_c_index = lhc - hm->listener_pool; + lhc->timeout = HTTP_CONN_TIMEOUT; return lhc->c_c_index; } @@ -167,20 +201,47 @@ http_disconnect_transport (http_conn_t *hc) } static void +http_conn_invalidate_timer_cb (u32 hs_handle) +{ + http_conn_t *hc; + + hc = + http_conn_get_w_thread_if_valid (hs_handle & 0x00FFFFFF, hs_handle >> 24); + + HTTP_DBG (1, "hc [%u]%x", hs_handle >> 24, hs_handle & 0x00FFFFFF); + if (!hc) + { + HTTP_DBG (1, "already deleted"); + return; + } + + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; + hc->pending_timer = 1; +} + +static void http_conn_timeout_cb (void *hc_handlep) { http_conn_t *hc; uword hs_handle; hs_handle = pointer_to_uword (hc_handlep); - hc = http_conn_get_w_thread (hs_handle & 0x00FFFFFF, hs_handle >> 24); + hc = + http_conn_get_w_thread_if_valid (hs_handle & 0x00FFFFFF, hs_handle >> 24); - HTTP_DBG (1, "terminate thread %d index %d hs %llx", hs_handle >> 24, - hs_handle & 0x00FFFFFF, hc); + HTTP_DBG (1, "hc [%u]%x", hs_handle >> 24, hs_handle & 0x00FFFFFF); if (!hc) - return; + { + HTTP_DBG (1, "already deleted"); + return; + } + + if (!hc->pending_timer) + { + HTTP_DBG (1, "timer not pending"); + return; + } - hc->timer_handle = ~0; session_transport_closing_notify (&hc->connection); http_disconnect_transport (hc); } @@ -200,6 +261,7 @@ http_ts_accept_callback (session_t *ts) hc_index = http_conn_alloc_w_thread (ts->thread_index); hc = http_conn_get_w_thread (hc_index, ts->thread_index); clib_memcpy_fast (hc, lhc, sizeof (*lhc)); + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; hc->c_thread_index = ts->thread_index; hc->h_hc_index = hc_index; @@ -207,7 +269,7 @@ http_ts_accept_callback (session_t *ts) hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; hc->state = HTTP_CONN_STATE_ESTABLISHED; - http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD); + http_req_state_change (hc, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD); ts->session_state = SESSION_STATE_READY; ts->opaque = hc_index; @@ -232,6 +294,7 @@ http_ts_accept_callback (session_t *ts) if ((rv = app_worker_init_accepted (as))) { HTTP_DBG (1, "failed to allocate fifos"); + hc->h_pa_session_handle = SESSION_INVALID_HANDLE; session_free (as); return rv; } @@ -273,7 +336,7 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, app_worker_t *app_wrk; int rv; - ho_hc = http_conn_get_w_thread (ho_hc_index, 0); + ho_hc = http_ho_conn_get (ho_hc_index); ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING); if (err) @@ -291,12 +354,13 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, clib_memcpy_fast (hc, ho_hc, sizeof (*hc)); + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; hc->c_thread_index = ts->thread_index; hc->h_tc_session_handle = session_handle (ts); hc->c_c_index = new_hc_index; hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; hc->state = HTTP_CONN_STATE_ESTABLISHED; - http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD); + http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD); ts->session_state = SESSION_STATE_READY; ts->opaque = new_hc_index; @@ -312,8 +376,8 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, as->session_type = session_type_from_proto_and_ip ( TRANSPORT_PROTO_HTTP, session_type_is_ip4 (ts->session_type)); - HTTP_DBG (1, "half-open hc index %d, hc index %d", ho_hc_index, - new_hc_index); + HTTP_DBG (1, "half-open hc index %x, hc [%u]%x", ho_hc_index, + ts->thread_index, new_hc_index); app_wrk = app_worker_get (hc->h_pa_wrk_index); if (!app_wrk) @@ -359,7 +423,7 @@ http_ts_reset_callback (session_t *ts) hc->state = HTTP_CONN_STATE_CLOSED; http_buffer_free (&hc->tx_buf); - http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD); + http_req_state_change (hc, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD); session_transport_reset_notify (&hc->connection); http_disconnect_transport (hc); @@ -378,9 +442,9 @@ static const char *http_error_template = "HTTP/1.1 %s\r\n" */ static const char *http_response_template = "HTTP/1.1 %s\r\n" "Date: %U GMT\r\n" - "Server: %v\r\n" - "Content-Length: %llu\r\n" - "%s"; + "Server: %v\r\n"; + +static const char *content_len_template = "Content-Length: %llu\r\n"; /** * http request boilerplate @@ -433,7 +497,7 @@ http_send_error (http_conn_t *hc, http_status_code_t ec) now = clib_timebase_now (&hm->timebase); data = format (0, http_error_template, http_status_code_str[ec], format_clib_timebase_time, now); - HTTP_DBG (1, "%v", data); + HTTP_DBG (3, "%v", data); http_send_data (hc, data, vec_len (data)); vec_free (data); } @@ -604,7 +668,7 @@ http_parse_request_line (http_conn_t *hc, http_status_code_t *ec) *ec = HTTP_STATUS_BAD_REQUEST; return -1; } - HTTP_DBG (0, "request line length: %d", i); + HTTP_DBG (2, "request line length: %d", i); hc->control_data_len = i + 2; next_line_offset = hc->control_data_len; @@ -636,18 +700,25 @@ http_parse_request_line (http_conn_t *hc, http_status_code_t *ec) hc->method = HTTP_REQ_POST; hc->target_path_offset = method_offset + 5; } + else if (!memcmp (hc->rx_buf + method_offset, "CONNECT ", 8)) + { + HTTP_DBG (0, "CONNECT method"); + hc->method = HTTP_REQ_CONNECT; + hc->target_path_offset = method_offset + 8; + hc->is_tunnel = 1; + } else { - if (hc->rx_buf[method_offset] - 'A' <= 'Z' - hc->rx_buf[method_offset]) + if (hc->rx_buf[method_offset] - 'A' <= 'Z' - 'A') { - clib_warning ("not method name: %8v", hc->rx_buf); - *ec = HTTP_STATUS_BAD_REQUEST; + clib_warning ("method not implemented: %8v", hc->rx_buf); + *ec = HTTP_STATUS_NOT_IMPLEMENTED; return -1; } else { - clib_warning ("method not implemented: %8v", hc->rx_buf); - *ec = HTTP_STATUS_NOT_IMPLEMENTED; + clib_warning ("not method name: %8v", hc->rx_buf); + *ec = HTTP_STATUS_BAD_REQUEST; return -1; } } @@ -679,9 +750,9 @@ http_parse_request_line (http_conn_t *hc, http_status_code_t *ec) } /* parse request-target */ - HTTP_DBG (0, "http at %d", i); + HTTP_DBG (2, "http at %d", i); target_len = i - hc->target_path_offset; - HTTP_DBG (0, "target_len %d", target_len); + HTTP_DBG (2, "target_len %d", target_len); if (target_len < 1) { clib_warning ("request-target not present"); @@ -697,10 +768,10 @@ http_parse_request_line (http_conn_t *hc, http_status_code_t *ec) *ec = HTTP_STATUS_BAD_REQUEST; return -1; } - HTTP_DBG (0, "request-target path length: %u", hc->target_path_len); - HTTP_DBG (0, "request-target path offset: %u", hc->target_path_offset); - HTTP_DBG (0, "request-target query length: %u", hc->target_query_len); - HTTP_DBG (0, "request-target query offset: %u", hc->target_query_offset); + HTTP_DBG (2, "request-target path length: %u", hc->target_path_len); + HTTP_DBG (2, "request-target path offset: %u", hc->target_path_offset); + HTTP_DBG (2, "request-target query length: %u", hc->target_query_len); + HTTP_DBG (2, "request-target query offset: %u", hc->target_query_offset); /* set buffer offset to nex line start */ hc->rx_buf_offset = next_line_offset; @@ -742,7 +813,7 @@ http_parse_status_line (http_conn_t *hc) clib_warning ("status line incomplete"); return -1; } - HTTP_DBG (0, "status line length: %d", i); + HTTP_DBG (2, "status line length: %d", i); if (i < 12) { clib_warning ("status line too short (%d)", i); @@ -824,7 +895,7 @@ http_identify_headers (http_conn_t *hc, http_status_code_t *ec) (hc->rx_buf[hc->rx_buf_offset + 1] == '\n')) { /* just another CRLF -> no headers */ - HTTP_DBG (0, "no headers"); + HTTP_DBG (2, "no headers"); hc->headers_len = 0; hc->control_data_len += 2; return 0; @@ -841,8 +912,8 @@ http_identify_headers (http_conn_t *hc, http_status_code_t *ec) hc->headers_offset = hc->rx_buf_offset; hc->headers_len = i - hc->rx_buf_offset + 2; hc->control_data_len += (hc->headers_len + 2); - HTTP_DBG (0, "headers length: %u", hc->headers_len); - HTTP_DBG (0, "headers offset: %u", hc->headers_offset); + HTTP_DBG (2, "headers length: %u", hc->headers_len); + HTTP_DBG (2, "headers offset: %u", hc->headers_offset); return 0; } @@ -850,16 +921,20 @@ http_identify_headers (http_conn_t *hc, http_status_code_t *ec) static int http_identify_message_body (http_conn_t *hc, http_status_code_t *ec) { - unformat_input_t input; - int i, len; - u8 *line; - u64 body_len; + int i, value_len; + u8 *end, *p, *value_start; + u64 body_len = 0, digit; hc->body_len = 0; if (hc->headers_len == 0) { - HTTP_DBG (0, "no header, no message-body"); + HTTP_DBG (2, "no header, no message-body"); + return 0; + } + if (hc->is_tunnel) + { + HTTP_DBG (2, "tunnel, no message-body"); return 0; } @@ -870,7 +945,7 @@ http_identify_message_body (http_conn_t *hc, http_status_code_t *ec) "Content-Length:"); if (i < 0) { - HTTP_DBG (0, "Content-Length header not present, no message-body"); + HTTP_DBG (2, "Content-Length header not present, no message-body"); return 0; } hc->rx_buf_offset = i + 15; @@ -882,37 +957,83 @@ http_identify_message_body (http_conn_t *hc, http_status_code_t *ec) *ec = HTTP_STATUS_BAD_REQUEST; return -1; } - len = i - hc->rx_buf_offset; - if (len < 1) + value_len = i - hc->rx_buf_offset; + if (value_len < 1) { clib_warning ("invalid header, content length value missing"); *ec = HTTP_STATUS_BAD_REQUEST; return -1; } - line = vec_new (u8, len); - clib_memcpy (line, hc->rx_buf + hc->rx_buf_offset, len); - HTTP_DBG (0, "%v", line); + end = hc->rx_buf + hc->rx_buf_offset + value_len; + p = hc->rx_buf + hc->rx_buf_offset; + /* skip leading whitespace */ + while (1) + { + if (p == end) + { + clib_warning ("value not found"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + else if (*p != ' ' && *p != '\t') + { + break; + } + p++; + value_len--; + } + value_start = p; + /* skip trailing whitespace */ + p = value_start + value_len - 1; + while (*p == ' ' || *p == '\t') + { + p--; + value_len--; + } - unformat_init_vector (&input, line); - if (!unformat (&input, "%llu", &body_len)) + if (value_len < 1) { - clib_warning ("failed to unformat content length value"); + clib_warning ("value not found"); *ec = HTTP_STATUS_BAD_REQUEST; return -1; } - unformat_free (&input); + + p = value_start; + for (i = 0; i < value_len; i++) + { + /* check for digit */ + if (!isdigit (*p)) + { + clib_warning ("expected digit"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + digit = *p - '0'; + u64 new_body_len = body_len * 10 + digit; + /* check for overflow */ + if (new_body_len < body_len) + { + clib_warning ("too big number, overflow"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + body_len = new_body_len; + p++; + } + hc->body_len = body_len; hc->body_offset = hc->headers_offset + hc->headers_len + 2; - HTTP_DBG (0, "body length: %llu", hc->body_len); - HTTP_DBG (0, "body offset: %u", hc->body_offset); + HTTP_DBG (2, "body length: %llu", hc->body_len); + HTTP_DBG (2, "body offset: %u", hc->body_offset); return 0; } static http_sm_result_t -http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) +http_req_state_wait_transport_reply (http_conn_t *hc, + transport_send_params_t *sp) { int rv; http_msg_t msg = {}; @@ -931,7 +1052,7 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) return HTTP_SM_STOP; } - HTTP_DBG (0, "%v", hc->rx_buf); + HTTP_DBG (3, "%v", hc->rx_buf); if (vec_len (hc->rx_buf) < 8) { @@ -955,6 +1076,7 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) * if there is some space send also portion of body */ as = session_get_from_handle (hc->h_pa_session_handle); max_enq = svm_fifo_max_enqueue (as->rx_fifo); + max_enq -= sizeof (msg); if (max_enq < hc->control_data_len) { clib_warning ("not enough room for control data in app's rx fifo"); @@ -984,12 +1106,12 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) if (hc->to_recv == 0) { /* all sent, we are done */ - http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD); + http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD); } else { /* stream rest of the response body */ - http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA); + http_req_state_change (hc, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); } app_wrk = app_worker_get_if_valid (as->app_wrk_index); @@ -1006,7 +1128,8 @@ error: } static http_sm_result_t -http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp) +http_req_state_wait_transport_method (http_conn_t *hc, + transport_send_params_t *sp) { http_status_code_t ec; app_worker_t *app_wrk; @@ -1022,7 +1145,7 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp) if (rv) return HTTP_SM_STOP; - HTTP_DBG (0, "%v", hc->rx_buf); + HTTP_DBG (3, "%v", hc->rx_buf); if (vec_len (hc->rx_buf) < 8) { @@ -1084,13 +1207,13 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp) /* drop everything, we do not support pipelining */ http_read_message_drop_all (hc); /* all sent, we are done */ - http_state_change (hc, HTTP_STATE_WAIT_APP_REPLY); + http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_REPLY); } else { http_read_message_drop (hc, len); /* stream rest of the response body */ - http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA); + http_req_state_change (hc, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); } app_wrk = app_worker_get_if_valid (as->app_wrk_index); @@ -1109,7 +1232,7 @@ error: } static http_sm_result_t -http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) +http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) { http_main_t *hm = &http_main; u8 *response; @@ -1120,6 +1243,7 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) http_msg_t msg; int rv; http_sm_result_t sm_result = HTTP_SM_ERROR; + http_req_state_t next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD; as = session_get_from_handle (hc->h_pa_session_handle); @@ -1157,11 +1281,21 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) /* Date */ format_clib_timebase_time, now, /* Server */ - hc->app_name, - /* Length */ - msg.data.body_len, - /* Any headers from app? */ - msg.data.headers_len ? "" : "\r\n"); + hc->app_name); + + /* RFC9110 9.3.6: A server MUST NOT send Content-Length header field in a + * 2xx (Successful) response to CONNECT. */ + if (hc->is_tunnel && http_status_code_str[msg.code][0] == '2') + { + ASSERT (msg.data.body_len == 0); + next_state = HTTP_REQ_STATE_TUNNEL; + /* cleanup some stuff we don't need anymore in tunnel mode */ + http_conn_timer_stop (hc); + vec_free (hc->rx_buf); + http_buffer_free (&hc->tx_buf); + } + else + response = format (response, content_len_template, msg.data.body_len); /* Add headers from app (if any) */ if (msg.data.headers_len) @@ -1184,7 +1318,12 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) ASSERT (rv == msg.data.headers_len); } } - HTTP_DBG (0, "%v", response); + else + { + /* No headers from app */ + response = format (response, "\r\n"); + } + HTTP_DBG (3, "%v", response); sent = http_send_data (hc, response, vec_len (response)); if (sent != vec_len (response)) @@ -1201,30 +1340,30 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) /* Start sending the actual data */ http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type], as->tx_fifo, msg.data.body_len); - http_state_change (hc, HTTP_STATE_APP_IO_MORE_DATA); + next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; sm_result = HTTP_SM_CONTINUE; } else { /* No response body, we are done */ - http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD); sm_result = HTTP_SM_STOP; } + http_req_state_change (hc, next_state); + ASSERT (sp->max_burst_size >= sent); sp->max_burst_size -= sent; return sm_result; error: http_send_error (hc, sc); - http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD); session_transport_closing_notify (&hc->connection); http_disconnect_transport (hc); return HTTP_SM_STOP; } static http_sm_result_t -http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) +http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) { http_msg_t msg; session_t *as; @@ -1232,7 +1371,7 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) u32 sent; int rv; http_sm_result_t sm_result = HTTP_SM_ERROR; - http_state_t next_state; + http_req_state_t next_state; as = session_get_from_handle (hc->h_pa_session_handle); @@ -1292,7 +1431,7 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) /* Any headers from app? */ msg.data.headers_len ? "" : "\r\n"); - next_state = HTTP_STATE_WAIT_SERVER_REPLY; + next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY; sm_result = HTTP_SM_STOP; } else if (msg.method_type == HTTP_REQ_POST) @@ -1323,7 +1462,7 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type], as->tx_fifo, msg.data.body_len); - next_state = HTTP_STATE_APP_IO_MORE_DATA; + next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; sm_result = HTTP_SM_CONTINUE; } else @@ -1353,7 +1492,7 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) ASSERT (rv == msg.data.headers_len); } } - HTTP_DBG (0, "%v", request); + HTTP_DBG (3, "%v", request); sent = http_send_data (hc, request, vec_len (request)); if (sent != vec_len (request)) @@ -1363,7 +1502,7 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) goto error; } - http_state_change (hc, next_state); + http_req_state_change (hc, next_state); goto done; error: @@ -1379,7 +1518,8 @@ done: } static http_sm_result_t -http_state_client_io_more_data (http_conn_t *hc, transport_send_params_t *sp) +http_req_state_transport_io_more_data (http_conn_t *hc, + transport_send_params_t *sp) { session_t *as, *ts; app_worker_t *app_wrk; @@ -1426,18 +1566,18 @@ http_state_client_io_more_data (http_conn_t *hc, transport_send_params_t *sp) clib_warning ("http protocol error: received more data than expected"); session_transport_closing_notify (&hc->connection); http_disconnect_transport (hc); - http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD); + http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD); return HTTP_SM_ERROR; } hc->to_recv -= rv; HTTP_DBG (1, "drained %d from ts; remains %lu", rv, hc->to_recv); /* Finished transaction: - * server back to HTTP_STATE_WAIT_APP_REPLY - * client to HTTP_STATE_WAIT_APP_METHOD */ + * server back to HTTP_REQ_STATE_WAIT_APP_REPLY + * client to HTTP_REQ_STATE_WAIT_APP_METHOD */ if (hc->to_recv == 0) - http_state_change (hc, hc->is_server ? HTTP_STATE_WAIT_APP_REPLY : - HTTP_STATE_WAIT_APP_METHOD); + http_req_state_change (hc, hc->is_server ? HTTP_REQ_STATE_WAIT_APP_REPLY : + HTTP_REQ_STATE_WAIT_APP_METHOD); app_wrk = app_worker_get_if_valid (as->app_wrk_index); if (app_wrk) @@ -1450,7 +1590,7 @@ http_state_client_io_more_data (http_conn_t *hc, transport_send_params_t *sp) } static http_sm_result_t -http_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp) +http_req_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp) { u32 max_send = 64 << 10, n_segs; http_buffer_t *hb = &hc->tx_buf; @@ -1492,37 +1632,157 @@ http_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp) session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX_FLUSH); /* Finished transaction: - * server back to HTTP_STATE_WAIT_METHOD - * client to HTTP_STATE_WAIT_SERVER_REPLY */ - http_state_change (hc, hc->is_server ? HTTP_STATE_WAIT_CLIENT_METHOD : - HTTP_STATE_WAIT_SERVER_REPLY); + * server back to HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD + * client to HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY */ + http_req_state_change (hc, hc->is_server ? + HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD : + HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY); http_buffer_free (&hc->tx_buf); } return HTTP_SM_STOP; } +static http_sm_result_t +http_req_state_tunnel_rx (http_conn_t *hc, transport_send_params_t *sp) +{ + u32 max_deq, max_enq, max_read, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + int n_written = 0; + session_t *as, *ts; + app_worker_t *app_wrk; + + HTTP_DBG (1, "tunnel received data from client"); + + as = session_get_from_handle (hc->h_pa_session_handle); + ts = session_get_from_handle (hc->h_tc_session_handle); + + max_deq = svm_fifo_max_dequeue (ts->rx_fifo); + if (PREDICT_FALSE (max_deq == 0)) + { + HTTP_DBG (1, "max_deq == 0"); + return HTTP_SM_STOP; + } + max_enq = svm_fifo_max_enqueue (as->rx_fifo); + if (max_enq == 0) + { + HTTP_DBG (1, "app's rx fifo full"); + svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + return HTTP_SM_STOP; + } + max_read = clib_min (max_enq, max_deq); + svm_fifo_segments (ts->rx_fifo, 0, segs, &n_segs, max_read); + n_written = svm_fifo_enqueue_segments (as->rx_fifo, segs, n_segs, 0); + ASSERT (n_written > 0); + HTTP_DBG (1, "transfered %u bytes", n_written); + svm_fifo_dequeue_drop (ts->rx_fifo, n_written); + app_wrk = app_worker_get_if_valid (as->app_wrk_index); + if (app_wrk) + app_worker_rx_notify (app_wrk, as); + if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) + session_program_rx_io_evt (session_handle (ts)); + + return HTTP_SM_STOP; +} + +static http_sm_result_t +http_req_state_tunnel_tx (http_conn_t *hc, transport_send_params_t *sp) +{ + u32 max_deq, max_enq, max_read, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + session_t *as, *ts; + int n_written = 0; + + HTTP_DBG (1, "tunnel received data from target"); + + as = session_get_from_handle (hc->h_pa_session_handle); + ts = session_get_from_handle (hc->h_tc_session_handle); + + max_deq = svm_fifo_max_dequeue_cons (as->tx_fifo); + if (PREDICT_FALSE (max_deq == 0)) + { + HTTP_DBG (1, "max_deq == 0"); + goto check_fifo; + } + max_enq = svm_fifo_max_enqueue_prod (ts->tx_fifo); + if (max_enq == 0) + { + HTTP_DBG (1, "ts tx fifo full"); + goto check_fifo; + } + max_read = clib_min (max_enq, max_deq); + max_read = clib_min (max_read, sp->max_burst_size); + svm_fifo_segments (as->tx_fifo, 0, segs, &n_segs, max_read); + n_written = svm_fifo_enqueue_segments (ts->tx_fifo, segs, n_segs, 0); + ASSERT (n_written > 0); + HTTP_DBG (1, "transfered %u bytes", n_written); + sp->bytes_dequeued += n_written; + sp->max_burst_size -= n_written; + svm_fifo_dequeue_drop (as->tx_fifo, n_written); + if (svm_fifo_set_event (ts->tx_fifo)) + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); + +check_fifo: + /* Deschedule and wait for deq notification if ts fifo is almost full */ + if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) + { + svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + transport_connection_deschedule (&hc->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + } + + return HTTP_SM_STOP; +} + typedef http_sm_result_t (*http_sm_handler) (http_conn_t *, transport_send_params_t *sp); -static http_sm_handler state_funcs[HTTP_N_STATES] = { - 0, /* idle state */ - http_state_wait_app_method, - http_state_wait_client_method, - http_state_wait_server_reply, - http_state_wait_app_reply, - http_state_client_io_more_data, - http_state_app_io_more_data, +static http_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = { + 0, /* idle */ + http_req_state_wait_app_method, + 0, /* wait transport reply */ + 0, /* transport io more data */ + 0, /* wait transport method */ + http_req_state_wait_app_reply, + http_req_state_app_io_more_data, + http_req_state_tunnel_tx, }; -static void -http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp) +static_always_inline int +http_req_state_is_tx_valid (http_conn_t *hc) +{ + return tx_state_funcs[hc->req_state] ? 1 : 0; +} + +static http_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = { + 0, /* idle */ + 0, /* wait app method */ + http_req_state_wait_transport_reply, + http_req_state_transport_io_more_data, + http_req_state_wait_transport_method, + 0, /* wait app reply */ + 0, /* app io more data */ + http_req_state_tunnel_rx, +}; + +static_always_inline int +http_req_state_is_rx_valid (http_conn_t *hc) +{ + return rx_state_funcs[hc->req_state] ? 1 : 0; +} + +static_always_inline void +http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp, + u8 is_tx) { http_sm_result_t res; do { - res = state_funcs[hc->http_state](hc, sp); + if (is_tx) + res = tx_state_funcs[hc->req_state](hc, sp); + else + res = rx_state_funcs[hc->req_state](hc, sp); if (res == HTTP_SM_ERROR) { HTTP_DBG (1, "error in state machine %d", res); @@ -1540,24 +1800,29 @@ http_ts_rx_callback (session_t *ts) { http_conn_t *hc; + HTTP_DBG (1, "hc [%u]%x", ts->thread_index, ts->opaque); + hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); - if (!hc) + + if (hc->state == HTTP_CONN_STATE_CLOSED) { - clib_warning ("http connection not found (ts %d)", ts->opaque); - return -1; + HTTP_DBG (1, "conn closed"); + svm_fifo_dequeue_drop_all (ts->rx_fifo); + return 0; } - if (!http_state_is_rx_valid (hc)) + if (!http_req_state_is_rx_valid (hc)) { - if (hc->state != HTTP_CONN_STATE_CLOSED) - clib_warning ("app data req state '%U' session state %u", - format_http_state, hc->http_state, hc->state); - svm_fifo_dequeue_drop_all (ts->tx_fifo); + clib_warning ("hc [%u]%x invalid rx state: http req state " + "'%U', session state '%U'", + ts->thread_index, ts->opaque, format_http_req_state, + hc->req_state, format_http_conn_state, hc); + svm_fifo_dequeue_drop_all (ts->rx_fifo); return 0; } HTTP_DBG (1, "run state machine"); - http_req_run_state_machine (hc, 0); + http_req_run_state_machine (hc, 0, 0); if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED) { @@ -1573,6 +1838,7 @@ http_ts_builtin_tx_callback (session_t *ts) http_conn_t *hc; hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); + HTTP_DBG (1, "transport connection reschedule"); transport_connection_reschedule (&hc->connection); return 0; @@ -1587,17 +1853,15 @@ http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf) return; hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); - if (!hc) - { - clib_warning ("no http connection for %u", ts->session_index); - return; - } - HTTP_DBG (1, "going to free session %x", ts->opaque); + + HTTP_DBG (1, "going to free hc [%u]%x", ts->thread_index, ts->opaque); vec_free (hc->rx_buf); http_buffer_free (&hc->tx_buf); - http_conn_timer_stop (hc); + + if (hc->pending_timer == 0) + http_conn_timer_stop (hc); session_transport_delete_notify (&hc->connection); @@ -1609,6 +1873,16 @@ http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf) http_conn_free (hc); } +static void +http_ts_ho_cleanup_callback (session_t *ts) +{ + http_conn_t *ho_hc; + HTTP_DBG (1, "half open: %x", ts->opaque); + ho_hc = http_ho_conn_get (ts->opaque); + session_half_open_delete_notify (&ho_hc->connection); + http_ho_conn_free (ho_hc); +} + int http_add_segment_callback (u32 client_index, u64 segment_handle) { @@ -1628,6 +1902,7 @@ static session_cb_vft_t http_app_cb_vft = { .session_connected_callback = http_ts_connected_callback, .session_reset_callback = http_ts_reset_callback, .session_cleanup_callback = http_ts_cleanup_callback, + .half_open_cleanup_callback = http_ts_ho_cleanup_callback, .add_segment_callback = http_add_segment_callback, .del_segment_callback = http_del_segment_callback, .builtin_app_rx_callback = http_ts_rx_callback, @@ -1679,7 +1954,7 @@ http_transport_enable (vlib_main_t *vm, u8 is_en) clib_timebase_init (&hm->timebase, 0 /* GMT */, CLIB_TIMEBASE_DAYLIGHT_NONE, &vm->clib_time /* share the system clock */); - http_timers_init (vm, http_conn_timeout_cb); + http_timers_init (vm, http_conn_timeout_cb, http_conn_invalidate_timer_cb); hm->is_init = 1; return 0; @@ -1695,6 +1970,8 @@ http_transport_connect (transport_endpoint_cfg_t *tep) http_conn_t *hc; int error; u32 hc_index; + session_t *ho; + transport_endpt_ext_cfg_t *ext_cfg; app_worker_t *app_wrk = app_worker_get (sep->app_wrk_index); clib_memset (cargs, 0, sizeof (*cargs)); @@ -1704,13 +1981,20 @@ http_transport_connect (transport_endpoint_cfg_t *tep) app = application_get (app_wrk->app_index); cargs->sep_ext.ns_index = app->ns_index; - hc_index = http_conn_alloc_w_thread (0 /* ts->thread_index */); - hc = http_conn_get_w_thread (hc_index, 0); + hc_index = http_ho_conn_alloc (); + hc = http_ho_conn_get (hc_index); hc->h_pa_wrk_index = sep->app_wrk_index; hc->h_pa_app_api_ctx = sep->opaque; hc->state = HTTP_CONN_STATE_CONNECTING; cargs->api_context = hc_index; + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); + if (ext_cfg) + { + HTTP_DBG (1, "app set timeout %u", ext_cfg->opaque); + hc->timeout = ext_cfg->opaque; + } + hc->is_server = 0; if (vec_len (app->name)) @@ -1730,6 +2014,15 @@ http_transport_connect (transport_endpoint_cfg_t *tep) if ((error = vnet_connect (cargs))) return error; + ho = session_alloc_for_half_open (&hc->connection); + ho->app_wrk_index = app_wrk->wrk_index; + ho->ho_index = app_worker_add_half_open (app_wrk, session_handle (ho)); + ho->opaque = sep->opaque; + ho->session_type = + session_type_from_proto_and_ip (TRANSPORT_PROTO_HTTP, sep->is_ip4); + hc->h_tc_session_handle = cargs->sh; + hc->c_s_index = ho->session_index; + return 0; } @@ -1741,11 +2034,12 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) http_main_t *hm = &http_main; session_endpoint_cfg_t *sep; app_worker_t *app_wrk; - transport_proto_t tp; + transport_proto_t tp = TRANSPORT_PROTO_TCP; app_listener_t *al; application_t *app; http_conn_t *lhc; u32 lhc_index; + transport_endpt_ext_cfg_t *ext_cfg; sep = (session_endpoint_cfg_t *) tep; @@ -1755,7 +2049,13 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) args->app_index = hm->app_index; args->sep_ext = *sep; args->sep_ext.ns_index = app->ns_index; - tp = sep->ext_cfg ? TRANSPORT_PROTO_TLS : TRANSPORT_PROTO_TCP; + + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (ext_cfg) + { + HTTP_DBG (1, "app set tls"); + tp = TRANSPORT_PROTO_TLS; + } args->sep_ext.transport_proto = tp; if (vnet_listen (args)) @@ -1764,6 +2064,13 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) lhc_index = http_listener_alloc (); lhc = http_listener_get (lhc_index); + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); + if (ext_cfg && ext_cfg->opaque) + { + HTTP_DBG (1, "app set timeout %u", ext_cfg->opaque); + lhc->timeout = ext_cfg->opaque; + } + /* Grab transport connection listener and link to http listener */ lhc->h_tc_session_handle = args->handle; al = app_listener_get_w_handle (lhc->h_tc_session_handle); @@ -1815,7 +2122,7 @@ http_transport_close (u32 hc_index, u32 thread_index) session_t *as; http_conn_t *hc; - HTTP_DBG (1, "App disconnecting %x", hc_index); + HTTP_DBG (1, "App disconnecting [%u]%x", thread_index, hc_index); hc = http_conn_get_w_thread (hc_index, thread_index); if (hc->state == HTTP_CONN_STATE_CONNECTING) @@ -1865,23 +2172,26 @@ http_app_tx_callback (void *session, transport_send_params_t *sp) u32 max_burst_sz, sent; http_conn_t *hc; - HTTP_DBG (1, "app session conn index %x", as->connection_index); + HTTP_DBG (1, "hc [%u]%x", as->thread_index, as->connection_index); hc = http_conn_get_w_thread (as->connection_index, as->thread_index); - if (!http_state_is_tx_valid (hc)) + + max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS; + sp->max_burst_size = max_burst_sz; + + if (!http_req_state_is_tx_valid (hc)) { - if (hc->state != HTTP_CONN_STATE_CLOSED) - clib_warning ("app data req state '%U' session state %u", - format_http_state, hc->http_state, hc->state); + clib_warning ("hc [%u]%x invalid tx state: http req state " + "'%U', session state '%U'", + as->thread_index, as->connection_index, + format_http_req_state, hc->req_state, + format_http_conn_state, hc); svm_fifo_dequeue_drop_all (as->tx_fifo); return 0; } - max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS; - sp->max_burst_size = max_burst_sz; - HTTP_DBG (1, "run state machine"); - http_req_run_state_machine (hc, sp); + http_req_run_state_machine (hc, sp, 1); if (hc->state == HTTP_CONN_STATE_APP_CLOSED) { @@ -1894,6 +2204,18 @@ http_app_tx_callback (void *session, transport_send_params_t *sp) return sent > 0 ? clib_max (sent / TRANSPORT_PACER_MIN_MSS, 1) : 0; } +static int +http_app_rx_evt_cb (transport_connection_t *tc) +{ + http_conn_t *hc = (http_conn_t *) tc; + HTTP_DBG (1, "hc [%u]%x", vlib_get_thread_index (), hc->h_hc_index); + + if (hc->req_state == HTTP_REQ_STATE_TUNNEL) + http_req_state_tunnel_rx (hc, 0); + + return 0; +} + static void http_transport_get_endpoint (u32 hc_index, u32 thread_index, transport_endpoint_t *tep, u8 is_lcl) @@ -1936,36 +2258,6 @@ format_http_listener (u8 *s, va_list *args) } static u8 * -format_http_conn_state (u8 *s, va_list *args) -{ - http_conn_t *hc = va_arg (*args, http_conn_t *); - - switch (hc->state) - { - case HTTP_CONN_STATE_LISTEN: - s = format (s, "LISTEN"); - break; - case HTTP_CONN_STATE_CONNECTING: - s = format (s, "CONNECTING"); - break; - case HTTP_CONN_STATE_ESTABLISHED: - s = format (s, "ESTABLISHED"); - break; - case HTTP_CONN_STATE_TRANSPORT_CLOSED: - s = format (s, "TRANSPORT_CLOSED"); - break; - case HTTP_CONN_STATE_APP_CLOSED: - s = format (s, "APP_CLOSED"); - break; - case HTTP_CONN_STATE_CLOSED: - s = format (s, "CLOSED"); - break; - } - - return s; -} - -static u8 * format_http_transport_connection (u8 *s, va_list *args) { u32 tc_index = va_arg (*args, u32); @@ -2002,18 +2294,61 @@ format_http_transport_listener (u8 *s, va_list *args) return s; } +static u8 * +format_http_transport_half_open (u8 *s, va_list *args) +{ + u32 ho_index = va_arg (*args, u32); + u32 __clib_unused thread_index = va_arg (*args, u32); + u32 __clib_unused verbose = va_arg (*args, u32); + http_conn_t *ho_hc; + session_t *tcp_ho; + + ho_hc = http_ho_conn_get (ho_index); + tcp_ho = session_get_from_handle (ho_hc->h_tc_session_handle); + + s = format (s, "[%d:%d][H] half-open app_wrk %u ts %d:%d", + ho_hc->c_thread_index, ho_hc->c_s_index, ho_hc->h_pa_wrk_index, + tcp_ho->thread_index, tcp_ho->session_index); + return s; +} + +static transport_connection_t * +http_transport_get_ho (u32 ho_hc_index) +{ + http_conn_t *ho_hc; + + HTTP_DBG (1, "half open: %x", ho_hc_index); + ho_hc = http_ho_conn_get (ho_hc_index); + return &ho_hc->connection; +} + +static void +http_transport_cleanup_ho (u32 ho_hc_index) +{ + http_conn_t *ho_hc; + + HTTP_DBG (1, "half open: %x", ho_hc_index); + ho_hc = http_ho_conn_get (ho_hc_index); + session_cleanup_half_open (ho_hc->h_tc_session_handle); + http_ho_conn_free (ho_hc); +} + static const transport_proto_vft_t http_proto = { .enable = http_transport_enable, .connect = http_transport_connect, .start_listen = http_start_listen, .stop_listen = http_stop_listen, .close = http_transport_close, + .cleanup_ho = http_transport_cleanup_ho, .custom_tx = http_app_tx_callback, + .app_rx_evt = http_app_rx_evt_cb, .get_connection = http_transport_get_connection, .get_listener = http_transport_get_listener, + .get_half_open = http_transport_get_ho, .get_transport_endpoint = http_transport_get_endpoint, .format_connection = format_http_transport_connection, .format_listener = format_http_transport_listener, + .format_half_open = format_http_transport_half_open, .transport_options = { .name = "http", .short_name = "H", diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h index 5f74edb5e47..b293c125465 100644 --- a/src/plugins/http/http.h +++ b/src/plugins/http/http.h @@ -59,32 +59,44 @@ typedef struct #define http_token_lit(s) (s), sizeof (s) - 1 +#define foreach_http_conn_state \ + _ (LISTEN, "listen") \ + _ (CONNECTING, "connecting") \ + _ (ESTABLISHED, "established") \ + _ (TRANSPORT_CLOSED, "transport-closed") \ + _ (APP_CLOSED, "app-closed") \ + _ (CLOSED, "closed") + typedef enum http_conn_state_ { - HTTP_CONN_STATE_LISTEN, - HTTP_CONN_STATE_CONNECTING, - HTTP_CONN_STATE_ESTABLISHED, - HTTP_CONN_STATE_TRANSPORT_CLOSED, - HTTP_CONN_STATE_APP_CLOSED, - HTTP_CONN_STATE_CLOSED +#define _(s, str) HTTP_CONN_STATE_##s, + foreach_http_conn_state +#undef _ } http_conn_state_t; -typedef enum http_state_ +#define foreach_http_req_state \ + _ (0, IDLE, "idle") \ + _ (1, WAIT_APP_METHOD, "wait app method") \ + _ (2, WAIT_TRANSPORT_REPLY, "wait transport reply") \ + _ (3, TRANSPORT_IO_MORE_DATA, "transport io more data") \ + _ (4, WAIT_TRANSPORT_METHOD, "wait transport method") \ + _ (5, WAIT_APP_REPLY, "wait app reply") \ + _ (6, APP_IO_MORE_DATA, "app io more data") \ + _ (7, TUNNEL, "tunnel") + +typedef enum http_req_state_ { - HTTP_STATE_IDLE = 0, - HTTP_STATE_WAIT_APP_METHOD, - HTTP_STATE_WAIT_CLIENT_METHOD, - HTTP_STATE_WAIT_SERVER_REPLY, - HTTP_STATE_WAIT_APP_REPLY, - HTTP_STATE_CLIENT_IO_MORE_DATA, - HTTP_STATE_APP_IO_MORE_DATA, - HTTP_N_STATES, -} http_state_t; +#define _(n, s, str) HTTP_REQ_STATE_##s = n, + foreach_http_req_state +#undef _ + HTTP_REQ_N_STATES +} http_req_state_t; typedef enum http_req_method_ { HTTP_REQ_GET = 0, HTTP_REQ_POST, + HTTP_REQ_CONNECT, } http_req_method_t; typedef enum http_msg_type_ @@ -388,6 +400,8 @@ typedef struct http_tc_ http_conn_state_t state; u32 timer_handle; + u32 timeout; + u8 pending_timer; u8 *app_name; u8 *host; u8 is_server; @@ -395,7 +409,7 @@ typedef struct http_tc_ /* * Current request */ - http_state_t http_state; + http_req_state_t req_state; http_req_method_t method; u8 *rx_buf; u32 rx_buf_offset; @@ -413,6 +427,7 @@ typedef struct http_tc_ u32 body_offset; u64 body_len; u16 status_code; + u8 is_tunnel; } http_conn_t; typedef struct http_worker_ @@ -424,6 +439,7 @@ typedef struct http_main_ { http_worker_t *wrk; http_conn_t *listener_pool; + http_conn_t *ho_conn_pool; u32 app_index; clib_timebase_t timebase; @@ -444,9 +460,10 @@ typedef struct http_main_ } http_main_t; always_inline int -_validate_target_syntax (u8 *target, int is_query, int *is_encoded) +_validate_target_syntax (u8 *target, u32 len, int is_query, int *is_encoded) { - int i, encoded = 0; + int encoded = 0; + u32 i; static uword valid_chars[4] = { /* !$&'()*+,-./0123456789:;= */ @@ -457,7 +474,7 @@ _validate_target_syntax (u8 *target, int is_query, int *is_encoded) 0x0000000000000000, }; - for (i = 0; i < vec_len (target); i++) + for (i = 0; i < len; i++) { if (clib_bitmap_get_no_check (valid_chars, target[i])) continue; @@ -468,7 +485,7 @@ _validate_target_syntax (u8 *target, int is_query, int *is_encoded) /* pct-encoded = "%" HEXDIG HEXDIG */ if (target[i] == '%') { - if ((i + 2) > vec_len (target)) + if ((i + 2) >= len) return -1; if (!isxdigit (target[i + 1]) || !isxdigit (target[i + 2])) return -1; @@ -487,7 +504,7 @@ _validate_target_syntax (u8 *target, int is_query, int *is_encoded) /** * An "absolute-path" rule validation (RFC9110 section 4.1). * - * @param path Target path to validate. + * @param path Vector of target path to validate. * @param is_encoded Return flag that indicates if percent-encoded (optional). * * @return @c 0 on success. @@ -495,13 +512,13 @@ _validate_target_syntax (u8 *target, int is_query, int *is_encoded) always_inline int http_validate_abs_path_syntax (u8 *path, int *is_encoded) { - return _validate_target_syntax (path, 0, is_encoded); + return _validate_target_syntax (path, vec_len (path), 0, is_encoded); } /** * A "query" rule validation (RFC3986 section 2.1). * - * @param query Target query to validate. + * @param query Vector of target query to validate. * @param is_encoded Return flag that indicates if percent-encoded (optional). * * @return @c 0 on success. @@ -509,7 +526,7 @@ http_validate_abs_path_syntax (u8 *path, int *is_encoded) always_inline int http_validate_query_syntax (u8 *query, int *is_encoded) { - return _validate_target_syntax (query, 1, is_encoded); + return _validate_target_syntax (query, vec_len (query), 1, is_encoded); } #define htoi(x) (isdigit (x) ? (x - '0') : (tolower (x) - 'a' + 10)) @@ -518,18 +535,19 @@ http_validate_query_syntax (u8 *query, int *is_encoded) * Decode percent-encoded data. * * @param src Data to decode. + * @param len Length of data to decode. * * @return New vector with decoded data. * * The caller is always responsible to free the returned vector. */ always_inline u8 * -http_percent_decode (u8 *src) +http_percent_decode (u8 *src, u32 len) { - int i; + u32 i; u8 *decoded_uri = 0; - for (i = 0; i < vec_len (src); i++) + for (i = 0; i < len; i++) { if (src[i] == '%') { @@ -974,6 +992,233 @@ http_serialize_authority_form_target (http_uri_t *authority) return s; } +typedef enum http_url_scheme_ +{ + HTTP_URL_SCHEME_HTTP, + HTTP_URL_SCHEME_HTTPS, +} http_url_scheme_t; + +typedef struct +{ + http_url_scheme_t scheme; + u16 port; + u32 host_offset; + u32 host_len; + u32 path_offset; + u32 path_len; + u8 host_is_ip6; +} http_url_t; + +always_inline int +_parse_port (u8 **pos, u8 *end, u16 *port) +{ + u32 value = 0; + u8 *p = *pos; + + if (!isdigit (*p)) + return -1; + value = *p - '0'; + p++; + + while (p != end) + { + if (!isdigit (*p)) + break; + value = value * 10 + *p - '0'; + if (value > CLIB_U16_MAX) + return -1; + p++; + } + *pos = p; + *port = clib_host_to_net_u16 ((u16) value); + return 0; +} + +/** + * An "absolute-form" URL parsing. + * + * @param url Vector of target URL to validate. + * @param parsed Parsed URL metadata in case of success. + * + * @return @c 0 on success. + */ +always_inline int +http_parse_absolute_form (u8 *url, http_url_t *parsed) +{ + u8 *token_start, *token_end, *end; + int is_encoded = 0; + + static uword valid_chars[4] = { + /* -.0123456789 */ + 0x03ff600000000000, + /* ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz */ + 0x07fffffe07fffffe, + 0x0000000000000000, + 0x0000000000000000, + }; + + if (vec_len (url) < 9) + { + clib_warning ("uri too short"); + return -1; + } + + clib_memset (parsed, 0, sizeof (*parsed)); + + end = url + vec_len (url); + + /* parse scheme */ + if (!memcmp (url, "http:// ", 7)) + { + parsed->scheme = HTTP_URL_SCHEME_HTTP; + parsed->port = clib_host_to_net_u16 (80); + parsed->host_offset = 7; + } + else if (!memcmp (url, "https:// ", 8)) + { + parsed->scheme = HTTP_URL_SCHEME_HTTPS; + parsed->port = clib_host_to_net_u16 (443); + parsed->host_offset = 8; + } + else + { + clib_warning ("invalid scheme"); + return -1; + } + token_start = url + parsed->host_offset; + + /* parse host */ + if (*token_start == '[') + /* IPv6 address */ + { + parsed->host_is_ip6 = 1; + parsed->host_offset++; + token_end = ++token_start; + while (1) + { + if (token_end == end) + { + clib_warning ("invalid host, IPv6 addr not terminated with ']'"); + return -1; + } + else if (*token_end == ']') + { + parsed->host_len = token_end - token_start; + token_start = token_end + 1; + break; + } + else if (*token_end != ':' && *token_end != '.' && + !isxdigit (*token_end)) + { + clib_warning ("invalid character '%u'", *token_end); + return -1; + } + token_end++; + } + } + else + { + token_end = token_start; + while (token_end != end && *token_end != ':' && *token_end != '/') + { + if (!clib_bitmap_get_no_check (valid_chars, *token_end)) + { + clib_warning ("invalid character '%u'", *token_end); + return -1; + } + token_end++; + } + parsed->host_len = token_end - token_start; + token_start = token_end; + } + + if (!parsed->host_len) + { + clib_warning ("zero length host"); + return -1; + } + + /* parse port, if any */ + if (token_start != end && *token_start == ':') + { + token_end = ++token_start; + if (_parse_port (&token_end, end, &parsed->port)) + { + clib_warning ("invalid port"); + return -1; + } + token_start = token_end; + } + + if (token_start == end) + return 0; + + token_start++; /* drop leading slash */ + parsed->path_offset = token_start - url; + parsed->path_len = end - token_start; + + if (parsed->path_len) + return _validate_target_syntax (token_start, parsed->path_len, 0, + &is_encoded); + + return 0; +} + +/** + * Parse target host and port of UDP tunnel over HTTP. + * + * @param path Path in format "{target_host}/{target_port}/". + * @param path_len Length of given path. + * @param parsed Parsed target in case of success.. + * + * @return @c 0 on success. + * + * @note Only IPv4 literals and IPv6 literals supported. + */ +always_inline int +http_parse_masque_host_port (u8 *path, u32 path_len, http_uri_t *parsed) +{ + u8 *p, *end, *decoded_host; + u32 host_len; + unformat_input_t input; + + p = path; + end = path + path_len; + clib_memset (parsed, 0, sizeof (*parsed)); + + while (p != end && *p != '/') + p++; + + host_len = p - path; + if (!host_len || (host_len == path_len) || (host_len + 1 == path_len)) + return -1; + decoded_host = http_percent_decode (path, host_len); + unformat_init_vector (&input, decoded_host); + if (unformat (&input, "%U", unformat_ip4_address, &parsed->ip.ip4)) + parsed->is_ip4 = 1; + else if (unformat (&input, "%U", unformat_ip6_address, &parsed->ip.ip6)) + parsed->is_ip4 = 0; + else + { + unformat_free (&input); + clib_warning ("unsupported target_host format"); + return -1; + } + unformat_free (&input); + + p++; + if (_parse_port (&p, end, &parsed->port)) + { + clib_warning ("invalid port"); + return -1; + } + + if (p == end || *p != '/') + return -1; + + return 0; +} + #endif /* SRC_PLUGINS_HTTP_HTTP_H_ */ /* diff --git a/src/plugins/http/http_plugin.rst b/src/plugins/http/http_plugin.rst index 56da3a810b9..f86c796bd83 100644 --- a/src/plugins/http/http_plugin.rst +++ b/src/plugins/http/http_plugin.rst @@ -16,7 +16,8 @@ Usage The plugin exposes following inline functions: ``http_validate_abs_path_syntax``, ``http_validate_query_syntax``, ``http_percent_decode``, ``http_path_remove_dot_segments``, ``http_parse_headers``, ``http_get_header``, -``http_free_header_table``, ``http_add_header``, ``http_serialize_headers``. +``http_free_header_table``, ``http_add_header``, ``http_serialize_headers``, ``http_parse_authority_form_target``, +``http_serialize_authority_form_target``, ``http_parse_absolute_form``, ``http_parse_masque_host_port``. It relies on the hoststack constructs and uses ``http_msg_data_t`` data structure for passing metadata to/from applications. @@ -82,7 +83,7 @@ Example bellow validates "absolute-path" rule, as described in RFC9110 section 4 } if (is_encoded) { - u8 *decoded = http_percent_decode (target_path); + u8 *decoded = http_percent_decode (target_path, vec_len (target_path)); vec_free (target_path); target_path = decoded; } @@ -505,3 +506,32 @@ Now we can start reading body content, following block of code could be executed /* close the session if you don't want to send another request */ /* and update state machine... */ } + +HTTP timeout +^^^^^^^^^^^^ + +HTTP plugin sets session inactivity timeout by default to 60 seconds. +Client and server applications can pass custom timeout value (in seconds) using extended configuration when doing connect or start listening respectively. +You just need to add extended configuration to session endpoint configuration which is part of ``vnet_connect_args_t`` and ``vnet_listen_args_t``. +HTTP plugin use ``opaque`` member of ``transport_endpt_ext_cfg_t``, unsigned 32bit integer seems to be sufficient (allowing the timeout to be set up to 136 years). + +The example below sets HTTP session timeout to 30 seconds (server application): + +.. code-block:: C + + vnet_listen_args_t _a, *a = &_a; + session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; + transport_endpt_ext_cfg_t *ext_cfg; + int rv; + clib_memset (a, 0, sizeof (*a)); + clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); + /* add new extended config entry */ + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); + /* your custom timeout value in seconds */ + ext_cfg->opaque = 30; + /* rest of the settings omitted for brevity */ + rv = vnet_listen (a); + /* don't forget to free extended config */ + session_endpoint_free_ext_cfgs (&a->sep_ext); + /* ... */ diff --git a/src/plugins/http/http_test.c b/src/plugins/http/http_test.c deleted file mode 100644 index 1f2f21dd19a..00000000000 --- a/src/plugins/http/http_test.c +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright(c) 2024 Cisco Systems, Inc. - */ - -#include <http/http.h> - -static clib_error_t * -test_http_authority_command_fn (vlib_main_t *vm, unformat_input_t *input, - vlib_cli_command_t *cmd) -{ - u8 *target = 0; - http_uri_t authority; - int rv; - - if (!unformat (input, "%v", &target)) - return clib_error_return (0, "error: no input provided"); - - rv = http_parse_authority_form_target (target, &authority); - vec_free (target); - if (rv) - return clib_error_return (0, "error: parsing failed"); - - target = http_serialize_authority_form_target (&authority); - vlib_cli_output (vm, "%v", target); - vec_free (target); - - return 0; -} - -VLIB_CLI_COMMAND (test_http_authority_command) = { - .path = "test http authority-form", - .short_help = "test dns authority-form", - .function = test_http_authority_command_fn, -}; diff --git a/src/plugins/http/http_timer.c b/src/plugins/http/http_timer.c index 5ee8efc8551..580f31657a9 100644 --- a/src/plugins/http/http_timer.c +++ b/src/plugins/http/http_timer.c @@ -29,7 +29,15 @@ http_timer_process_expired_cb (u32 *expired_timers) { /* Get session handle. The first bit is the timer id */ hs_handle = expired_timers[i] & 0x7FFFFFFF; - session_send_rpc_evt_to_thread (hs_handle >> 24, twc->cb_fn, + twc->invalidate_cb (hs_handle); + } + for (i = 0; i < vec_len (expired_timers); i++) + { + /* Get session handle. The first bit is the timer id */ + hs_handle = expired_timers[i] & 0x7FFFFFFF; + HTTP_DBG (1, "rpc to hc [%u]%x", hs_handle >> 24, + hs_handle & 0x00FFFFFF); + session_send_rpc_evt_to_thread (hs_handle >> 24, twc->rpc_cb, uword_to_pointer (hs_handle, void *)); } } @@ -66,7 +74,8 @@ VLIB_REGISTER_NODE (http_timer_process_node) = { }; void -http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn) +http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *rpc_cb, + http_conn_invalidate_timer_fn *invalidate_cb) { http_tw_ctx_t *twc = &http_tw_ctx; vlib_node_t *n; @@ -76,7 +85,8 @@ http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn) tw_timer_wheel_init_2t_1w_2048sl (&twc->tw, http_timer_process_expired_cb, 1.0 /* timer interval */, ~0); clib_spinlock_init (&twc->tw_lock); - twc->cb_fn = cb_fn; + twc->rpc_cb = rpc_cb; + twc->invalidate_cb = invalidate_cb; vlib_node_set_state (vm, http_timer_process_node.index, VLIB_NODE_STATE_POLLING); diff --git a/src/plugins/http/http_timer.h b/src/plugins/http/http_timer.h index eec5a4595fe..43d20d004d8 100644 --- a/src/plugins/http/http_timer.h +++ b/src/plugins/http/http_timer.h @@ -19,34 +19,37 @@ #include <http/http.h> #include <vppinfra/tw_timer_2t_1w_2048sl.h> -#define HTTP_CONN_TIMEOUT 60 +#define HTTP_CONN_TIMEOUT 60 +#define HTTP_TIMER_HANDLE_INVALID ((u32) ~0) typedef void (http_conn_timeout_fn) (void *); +typedef void (http_conn_invalidate_timer_fn) (u32 hs_handle); typedef struct http_tw_ctx_ { tw_timer_wheel_2t_1w_2048sl_t tw; clib_spinlock_t tw_lock; - http_conn_timeout_fn *cb_fn; + http_conn_timeout_fn *rpc_cb; + http_conn_invalidate_timer_fn *invalidate_cb; } http_tw_ctx_t; extern http_tw_ctx_t http_tw_ctx; -void http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn); +void http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *rpc_cb, + http_conn_invalidate_timer_fn *invalidate_cb); static inline void http_conn_timer_start (http_conn_t *hc) { http_tw_ctx_t *twc = &http_tw_ctx; u32 hs_handle; - u64 timeout; - timeout = HTTP_CONN_TIMEOUT; + ASSERT (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID); hs_handle = hc->c_thread_index << 24 | hc->c_c_index; clib_spinlock_lock (&twc->tw_lock); hc->timer_handle = - tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, timeout); + tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, hc->timeout); clib_spinlock_unlock (&twc->tw_lock); } @@ -55,12 +58,13 @@ http_conn_timer_stop (http_conn_t *hc) { http_tw_ctx_t *twc = &http_tw_ctx; - if (hc->timer_handle == ~0) + hc->pending_timer = 0; + if (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID) return; clib_spinlock_lock (&twc->tw_lock); tw_timer_stop_2t_1w_2048sl (&twc->tw, hc->timer_handle); - hc->timer_handle = ~0; + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; clib_spinlock_unlock (&twc->tw_lock); } @@ -68,15 +72,17 @@ static inline void http_conn_timer_update (http_conn_t *hc) { http_tw_ctx_t *twc = &http_tw_ctx; - u64 timeout; - - if (hc->timer_handle == ~0) - return; - - timeout = HTTP_CONN_TIMEOUT; + u32 hs_handle; clib_spinlock_lock (&twc->tw_lock); - tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, timeout); + if (hc->timer_handle != HTTP_TIMER_HANDLE_INVALID) + tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, hc->timeout); + else + { + hs_handle = hc->c_thread_index << 24 | hc->c_c_index; + hc->timer_handle = + tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, hc->timeout); + } clib_spinlock_unlock (&twc->tw_lock); } diff --git a/src/plugins/http/test/http_test.c b/src/plugins/http/test/http_test.c new file mode 100644 index 00000000000..d4ac8f46f29 --- /dev/null +++ b/src/plugins/http/test/http_test.c @@ -0,0 +1,360 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2024 Cisco Systems, Inc. + */ + +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +#include <http/http.h> + +#define HTTP_TEST_I(_cond, _comment, _args...) \ + ({ \ + int _evald = (_cond); \ + if (!(_evald)) \ + { \ + vlib_cli_output (vm, "FAIL:%d: " _comment "\n", __LINE__, ##_args); \ + } \ + else \ + { \ + vlib_cli_output (vm, "PASS:%d: " _comment "\n", __LINE__, ##_args); \ + } \ + _evald; \ + }) + +#define HTTP_TEST(_cond, _comment, _args...) \ + { \ + if (!HTTP_TEST_I (_cond, _comment, ##_args)) \ + { \ + return 1; \ + } \ + } + +static int +http_test_authority_form (vlib_main_t *vm) +{ + u8 *target = 0, *formated_target = 0; + http_uri_t authority; + int rv; + + target = format (0, "10.10.2.45:20"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv == 0), "'%v' should be valid", target); + formated_target = http_serialize_authority_form_target (&authority); + rv = vec_cmp (target, formated_target); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", target, formated_target); + vec_free (target); + vec_free (formated_target); + + target = format (0, "[dead:beef::1234]:443"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv == 0), "'%v' should be valid", target); + formated_target = http_serialize_authority_form_target (&authority); + rv = vec_cmp (target, formated_target); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", target, formated_target); + vec_free (target); + vec_free (formated_target); + + target = format (0, "example.com:80"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv != 0), "'%v' reg-name not supported", target); + vec_free (target); + + target = format (0, "10.10.2.45"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv != 0), "'%v' should be invalid", target); + vec_free (target); + + target = format (0, "1000.10.2.45:20"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv != 0), "'%v' should be invalid", target); + vec_free (target); + + target = format (0, "[xyz0::1234]:443"); + rv = http_parse_authority_form_target (target, &authority); + HTTP_TEST ((rv != 0), "'%v' should be invalid", target); + vec_free (target); + + return 0; +} + +static int +http_test_absolute_form (vlib_main_t *vm) +{ + u8 *url = 0; + http_url_t parsed_url; + int rv; + + url = format (0, "https://example.org/.well-known/masque/udp/1.2.3.4/123/"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTPS), + "scheme should be https"); + HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("https://")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("https://")); + HTTP_TEST ((parsed_url.host_len == strlen ("example.org")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("example.org")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 443), + "port=%u should be 443", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_offset == strlen ("https://example.org/")), + "path_offset=%u should be %u", parsed_url.path_offset, + strlen ("https://example.org/")); + HTTP_TEST ( + (parsed_url.path_len == strlen (".well-known/masque/udp/1.2.3.4/123/")), + "path_len=%u should be %u", parsed_url.path_len, + strlen (".well-known/masque/udp/1.2.3.4/123/")); + vec_free (url); + + url = format (0, "http://vpp-example.org"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP), + "scheme should be http"); + HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("http://")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("http://")); + HTTP_TEST ((parsed_url.host_len == strlen ("vpp-example.org")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("vpp-example.org")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 80), + "port=%u should be 80", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_len == 0), "path_len=%u should be 0", + parsed_url.path_len); + vec_free (url); + + url = format (0, "http://1.2.3.4:8080/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP), + "scheme should be http"); + HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("http://")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("http://")); + HTTP_TEST ((parsed_url.host_len == strlen ("1.2.3.4")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("1.2.3.4")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 8080), + "port=%u should be 8080", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_offset == strlen ("http://1.2.3.4:8080/")), + "path_offset=%u should be %u", parsed_url.path_offset, + strlen ("http://1.2.3.4:8080/")); + HTTP_TEST ((parsed_url.path_len == strlen ("abcd")), + "path_len=%u should be %u", parsed_url.path_len, strlen ("abcd")); + vec_free (url); + + url = format (0, "https://[dead:beef::1234]/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTPS), + "scheme should be https"); + HTTP_TEST ((parsed_url.host_is_ip6 == 1), "host_is_ip6=%u should be 1", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("https://[")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("https://[")); + HTTP_TEST ((parsed_url.host_len == strlen ("dead:beef::1234")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("dead:beef::1234")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 443), + "port=%u should be 443", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_offset == strlen ("https://[dead:beef::1234]/")), + "path_offset=%u should be %u", parsed_url.path_offset, + strlen ("https://[dead:beef::1234]/")); + HTTP_TEST ((parsed_url.path_len == strlen ("abcd")), + "path_len=%u should be %u", parsed_url.path_len, strlen ("abcd")); + vec_free (url); + + url = format (0, "http://[::ffff:192.0.2.128]:8080/"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv == 0), "'%v' should be valid", url); + HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP), + "scheme should be http"); + HTTP_TEST ((parsed_url.host_is_ip6 == 1), "host_is_ip6=%u should be 1", + parsed_url.host_is_ip6); + HTTP_TEST ((parsed_url.host_offset == strlen ("http://[")), + "host_offset=%u should be %u", parsed_url.host_offset, + strlen ("http://[")); + HTTP_TEST ((parsed_url.host_len == strlen ("::ffff:192.0.2.128")), + "host_len=%u should be %u", parsed_url.host_len, + strlen ("::ffff:192.0.2.128")); + HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 8080), + "port=%u should be 8080", clib_net_to_host_u16 (parsed_url.port)); + HTTP_TEST ((parsed_url.path_len == 0), "path_len=%u should be 0", + parsed_url.path_len); + vec_free (url); + + url = format (0, "http://[dead:beef::1234/abc"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://[dead|beef::1234]/abc"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http:example.org:8080/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "htt://example.org:8080/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http:///abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://example.org:808080/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://example.org/a%%3Xbcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://example.org/a%%3"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://example.org/a[b]cd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + url = format (0, "http://exa[m]ple.org/abcd"); + rv = http_parse_absolute_form (url, &parsed_url); + HTTP_TEST ((rv != 0), "'%v' should be invalid", url); + vec_free (url); + + return 0; +} + +static int +http_test_parse_masque_host_port (vlib_main_t *vm) +{ + u8 *path = 0; + http_uri_t target; + int rv; + + path = format (0, "10.10.2.45/443/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv == 0), "'%v' should be valid", path); + HTTP_TEST ((target.is_ip4 == 1), "is_ip4=%d should be 1", target.is_ip4); + HTTP_TEST ((clib_net_to_host_u16 (target.port) == 443), + "port=%u should be 443", clib_net_to_host_u16 (target.port)); + HTTP_TEST ((target.ip.ip4.data[0] == 10 && target.ip.ip4.data[1] == 10 && + target.ip.ip4.data[2] == 2 && target.ip.ip4.data[3] == 45), + "target.ip=%U should be 10.10.2.45", format_ip4_address, + &target.ip.ip4); + vec_free (path); + + path = format (0, "dead%%3Abeef%%3A%%3A1234/80/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv == 0), "'%v' should be valid", path); + HTTP_TEST ((target.is_ip4 == 0), "is_ip4=%d should be 0", target.is_ip4); + HTTP_TEST ((clib_net_to_host_u16 (target.port) == 80), + "port=%u should be 80", clib_net_to_host_u16 (target.port)); + HTTP_TEST ((clib_net_to_host_u16 (target.ip.ip6.as_u16[0]) == 0xdead && + clib_net_to_host_u16 (target.ip.ip6.as_u16[1]) == 0xbeef && + target.ip.ip6.as_u16[2] == 0 && target.ip.ip6.as_u16[3] == 0 && + target.ip.ip6.as_u16[4] == 0 && target.ip.ip6.as_u16[5] == 0 && + target.ip.ip6.as_u16[6] == 0 && + clib_net_to_host_u16 (target.ip.ip6.as_u16[7]) == 0x1234), + "target.ip=%U should be dead:beef::1234", format_ip6_address, + &target.ip.ip6); + vec_free (path); + + path = format (0, "example.com/443/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' reg-name not supported", path); + vec_free (path); + + path = format (0, "10.10.2.45/443443/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + path = format (0, "/443/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + path = format (0, "10.10.2.45/"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + path = format (0, "10.10.2.45"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + path = format (0, "10.10.2.45/443"); + rv = http_parse_masque_host_port (path, vec_len (path), &target); + HTTP_TEST ((rv != 0), "'%v' should be invalid", path); + vec_free (path); + + return 0; +} + +static clib_error_t * +test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + int res = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "authority-form")) + res = http_test_authority_form (vm); + else if (unformat (input, "absolute-form")) + res = http_test_absolute_form (vm); + else if (unformat (input, "parse-masque-host-port")) + res = http_test_parse_masque_host_port (vm); + else if (unformat (input, "all")) + { + if ((res = http_test_authority_form (vm))) + goto done; + if ((res = http_test_absolute_form (vm))) + goto done; + if ((res = http_test_parse_masque_host_port (vm))) + goto done; + } + else + break; + } + +done: + if (res) + return clib_error_return (0, "HTTP unit test failed"); + return 0; +} + +VLIB_CLI_COMMAND (test_http_command) = { + .path = "test http", + .short_help = "http unit tests", + .function = test_http_command_fn, +}; + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "HTTP - Unit Test", + .default_disabled = 1, +}; diff --git a/src/plugins/http_static/http_static.api b/src/plugins/http_static/http_static.api index dd4f513a420..60c0369848d 100644 --- a/src/plugins/http_static/http_static.api +++ b/src/plugins/http_static/http_static.api @@ -3,20 +3,21 @@ This file defines static http server control-plane API messages */ -option version = "2.2.0"; +option version = "2.3.0"; /** \brief Configure and enable the static http server @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param fifo_size - size (in bytes) of the session FIFOs @param cache_size_limit - size (in bytes) of the in-memory file data cache + @param max_age - how long a response is considered fresh (in seconds) @param prealloc_fifos - number of preallocated fifos (usually 0) @param private_segment_size - fifo segment size (usually 0) @param www_root - html root path @param uri - bind URI, defaults to "tcp://0.0.0.0/80" */ -autoreply define http_static_enable { +autoreply define http_static_enable_v2 { option deprecated; /* Client identifier, set from api_main.my_client_index */ @@ -27,6 +28,7 @@ autoreply define http_static_enable { /* Typical options */ u32 fifo_size; u32 cache_size_limit; + u32 max_age [default=600]; /* Unusual options */ u32 prealloc_fifos; u32 private_segment_size; @@ -43,13 +45,16 @@ autoreply define http_static_enable { @param fifo_size - size (in bytes) of the session FIFOs @param cache_size_limit - size (in bytes) of the in-memory file data cache @param max_age - how long a response is considered fresh (in seconds) + @param keepalive_timeout - timeout during which client connection will stay open (in seconds) @param prealloc_fifos - number of preallocated fifos (usually 0) @param private_segment_size - fifo segment size (usually 0) @param www_root - html root path @param uri - bind URI, defaults to "tcp://0.0.0.0/80" */ -autoreply define http_static_enable_v2 { +autoreply define http_static_enable_v3 { + option deprecated; + /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -59,6 +64,7 @@ autoreply define http_static_enable_v2 { u32 fifo_size; u32 cache_size_limit; u32 max_age [default=600]; + u32 keepalive_timeout [default=60]; /* Unusual options */ u32 prealloc_fifos; u32 private_segment_size; diff --git a/src/plugins/http_static/http_static.c b/src/plugins/http_static/http_static.c index 967b8474af8..464fd27e90d 100644 --- a/src/plugins/http_static/http_static.c +++ b/src/plugins/http_static/http_static.c @@ -66,7 +66,8 @@ hss_register_url_handler (hss_url_handler_fn fp, const char *url, */ static int hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, - u32 private_segment_size, u8 *www_root, u8 *uri, u32 max_age) + u32 private_segment_size, u8 *www_root, u8 *uri, u32 max_age, + u32 keepalive_timeout) { hss_main_t *hsm = &hss_main; int rv; @@ -78,6 +79,7 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, hsm->www_root = format (0, "%s%c", www_root, 0); hsm->uri = format (0, "%s%c", uri, 0); hsm->max_age = max_age; + hsm->keepalive_timeout = keepalive_timeout; if (vec_len (hsm->www_root) < 2) return VNET_API_ERROR_INVALID_VALUE; @@ -104,10 +106,10 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, } /* API message handler */ -static void vl_api_http_static_enable_t_handler - (vl_api_http_static_enable_t * mp) +static void +vl_api_http_static_enable_v2_t_handler (vl_api_http_static_enable_v2_t *mp) { - vl_api_http_static_enable_reply_t *rmp; + vl_api_http_static_enable_v2_reply_t *rmp; hss_main_t *hsm = &hss_main; int rv; @@ -117,16 +119,16 @@ static void vl_api_http_static_enable_t_handler rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), ntohl (mp->prealloc_fifos), ntohl (mp->private_segment_size), mp->www_root, mp->uri, - HSS_DEFAULT_MAX_AGE); + ntohl (mp->max_age), HSS_DEFAULT_KEEPALIVE_TIMEOUT); - REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_REPLY); + REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V2_REPLY); } /* API message handler */ static void -vl_api_http_static_enable_v2_t_handler (vl_api_http_static_enable_v2_t *mp) +vl_api_http_static_enable_v3_t_handler (vl_api_http_static_enable_v3_t *mp) { - vl_api_http_static_enable_v2_reply_t *rmp; + vl_api_http_static_enable_v3_reply_t *rmp; hss_main_t *hsm = &hss_main; int rv; @@ -136,9 +138,9 @@ vl_api_http_static_enable_v2_t_handler (vl_api_http_static_enable_v2_t *mp) rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), ntohl (mp->prealloc_fifos), ntohl (mp->private_segment_size), mp->www_root, mp->uri, - ntohl (mp->max_age)); + ntohl (mp->max_age), ntohl (mp->keepalive_timeout)); - REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V2_REPLY); + REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V3_REPLY); } #include <http_static/http_static.api.c> diff --git a/src/plugins/http_static/http_static.h b/src/plugins/http_static/http_static.h index bee79090d2b..5e0654fae24 100644 --- a/src/plugins/http_static/http_static.h +++ b/src/plugins/http_static/http_static.h @@ -24,6 +24,7 @@ #include <http_static/http_cache.h> #define HSS_DEFAULT_MAX_AGE 600 +#define HSS_DEFAULT_KEEPALIVE_TIMEOUT 60 /** @file http_static.h * Static http server definitions @@ -162,6 +163,8 @@ typedef struct u32 max_age; /** Formatted max_age: "max-age=xyz" */ u8 *max_age_formatted; + /** Timeout during which client connection will stay open */ + u32 keepalive_timeout; /** hash table of file extensions to mime types string indices */ uword *mime_type_indices_by_file_extensions; diff --git a/src/plugins/http_static/http_static_test.c b/src/plugins/http_static/http_static_test.c index f701c8b9ee7..edb016f9e05 100644 --- a/src/plugins/http_static/http_static_test.c +++ b/src/plugins/http_static/http_static_test.c @@ -39,10 +39,10 @@ http_static_test_main_t http_static_test_main; #include <vlibapi/vat_helper_macros.h> static int -api_http_static_enable (vat_main_t * vam) +api_http_static_enable_v2 (vat_main_t *vam) { unformat_input_t *line_input = vam->input; - vl_api_http_static_enable_t *mp; + vl_api_http_static_enable_v2_t *mp; u64 tmp; u8 *www_root = 0; u8 *uri = 0; @@ -50,6 +50,7 @@ api_http_static_enable (vat_main_t * vam) u32 private_segment_size = 0; u32 fifo_size = 8 << 10; u32 cache_size_limit = 1 << 20; + u32 max_age = HSS_DEFAULT_MAX_AGE; int ret; /* Parse args required to build the message */ @@ -89,7 +90,8 @@ api_http_static_enable (vat_main_t * vam) } cache_size_limit = (u32) tmp; } - + else if (unformat (line_input, "max-age %d", &max_age)) + ; else if (unformat (line_input, "uri %s", &uri)) ; else @@ -108,16 +110,15 @@ api_http_static_enable (vat_main_t * vam) if (uri == 0) uri = format (0, "tcp://0.0.0.0/80%c", 0); - - /* Construct the API message */ - M (HTTP_STATIC_ENABLE, mp); + M (HTTP_STATIC_ENABLE_V2, mp); strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256); strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256); mp->fifo_size = ntohl (fifo_size); mp->cache_size_limit = ntohl (cache_size_limit); mp->prealloc_fifos = ntohl (prealloc_fifos); mp->private_segment_size = ntohl (private_segment_size); + mp->max_age = ntohl (max_age); /* send it... */ S (mp); @@ -128,10 +129,10 @@ api_http_static_enable (vat_main_t * vam) } static int -api_http_static_enable_v2 (vat_main_t *vam) +api_http_static_enable_v3 (vat_main_t *vam) { unformat_input_t *line_input = vam->input; - vl_api_http_static_enable_v2_t *mp; + vl_api_http_static_enable_v3_t *mp; u64 tmp; u8 *www_root = 0; u8 *uri = 0; @@ -140,6 +141,7 @@ api_http_static_enable_v2 (vat_main_t *vam) u32 fifo_size = 8 << 10; u32 cache_size_limit = 1 << 20; u32 max_age = HSS_DEFAULT_MAX_AGE; + u32 keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT; int ret; /* Parse args required to build the message */ @@ -181,6 +183,9 @@ api_http_static_enable_v2 (vat_main_t *vam) } else if (unformat (line_input, "max-age %d", &max_age)) ; + else if (unformat (line_input, "keepalive-timeout %d", + &keepalive_timeout)) + ; else if (unformat (line_input, "uri %s", &uri)) ; else @@ -200,7 +205,7 @@ api_http_static_enable_v2 (vat_main_t *vam) uri = format (0, "tcp://0.0.0.0/80%c", 0); /* Construct the API message */ - M (HTTP_STATIC_ENABLE_V2, mp); + M (HTTP_STATIC_ENABLE_V3, mp); strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256); strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256); mp->fifo_size = ntohl (fifo_size); @@ -208,6 +213,7 @@ api_http_static_enable_v2 (vat_main_t *vam) mp->prealloc_fifos = ntohl (prealloc_fifos); mp->private_segment_size = ntohl (private_segment_size); mp->max_age = ntohl (max_age); + mp->keepalive_timeout = ntohl (keepalive_timeout); /* send it... */ S (mp); diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c index 48e71f51629..9cc3f5dd658 100644 --- a/src/plugins/http_static/static_server.c +++ b/src/plugins/http_static/static_server.c @@ -126,6 +126,9 @@ start_send_data (hss_session_t *hs, http_status_code_t status) ASSERT (rv == sizeof (headers)); } + if (!msg.data.body_len) + goto done; + uword data = pointer_to_uword (hs->data); rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (data), (u8 *) &data); ASSERT (rv == sizeof (data)); @@ -747,7 +750,7 @@ hss_attach () hss_main_t *hsm = &hss_main; u64 options[APP_OPTIONS_N_OPTIONS]; vnet_app_attach_args_t _a, *a = &_a; - u32 segment_size = 128 << 20; + u64 segment_size = 128 << 20; clib_memset (a, 0, sizeof (*a)); clib_memset (options, 0, sizeof (options)); @@ -804,6 +807,7 @@ hss_listen (void) vnet_listen_args_t _a, *a = &_a; char *uri = "tcp://0.0.0.0/80"; u8 need_crypto; + transport_endpt_ext_cfg_t *ext_cfg; int rv; clib_memset (a, 0, sizeof (*a)); @@ -820,17 +824,21 @@ hss_listen (void) sep.transport_proto = TRANSPORT_PROTO_HTTP; clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); + ext_cfg->opaque = hsm->keepalive_timeout; + if (need_crypto) { - session_endpoint_alloc_ext_cfg (&a->sep_ext, - TRANSPORT_ENDPT_EXT_CFG_CRYPTO); - a->sep_ext.ext_cfg->crypto.ckpair_index = hsm->ckpair_index; + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = hsm->ckpair_index; } rv = vnet_listen (a); - if (need_crypto) - clib_mem_free (a->sep_ext.ext_cfg); + session_endpoint_free_ext_cfgs (&a->sep_ext); return rv; } @@ -897,6 +905,7 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input, hsm->fifo_size = 0; hsm->cache_size = 10 << 20; hsm->max_age = HSS_DEFAULT_MAX_AGE; + hsm->keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -921,6 +930,9 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input, ; else if (unformat (line_input, "debug %d", &hsm->debug_level)) ; + else if (unformat (line_input, "keepalive-timeout %d", + &hsm->keepalive_timeout)) + ; else if (unformat (line_input, "debug")) hsm->debug_level = 1; else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size, @@ -984,14 +996,16 @@ done: * http static server www-root /tmp/www uri tcp://0.0.0.0/80 cache-size 2m * @cliend * @cliexcmd{http static server www-root <path> [prealloc-fios <nn>] - * [private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]} + * [private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>] + * [keepalive-timeout <nn>]} ?*/ VLIB_CLI_COMMAND (hss_create_command, static) = { .path = "http static server", .short_help = "http static server www-root <path> [prealloc-fifos <nn>]\n" "[private-segment-size <nnMG>] [fifo-size <nbytes>] [max-age <nseconds>]\n" - "[uri <uri>] [ptr-thresh <nn>] [url-handlers] [debug [nn]]\n", + "[uri <uri>] [ptr-thresh <nn>] [url-handlers] [debug [nn]]\n" + "[keepalive-timeout <nn>]\n", .function = hss_create_command_fn, }; diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c index 51853d619e6..f8d894a013a 100644 --- a/src/plugins/map/ip6_map_t.c +++ b/src/plugins/map/ip6_map_t.c @@ -151,9 +151,8 @@ ip6_map_t_icmp (vlib_main_t * vm, vnet_buffer (p0)->map_t.map_domain_index); ctx0.d = d0; ctx0.sender_port = 0; - if (!ip6_get_port - (vm, p0, ip60, p0->current_length, NULL, &ctx0.sender_port, - NULL, NULL, NULL, NULL)) + if (!ip6_get_port (vm, p0, ip60, p0->current_length, NULL, + &ctx0.sender_port, NULL, NULL, NULL, NULL, NULL)) { // In case of 1:1 mapping, we don't care about the port if (!(d0->ea_bits_len == 0 && d0->rules)) diff --git a/src/plugins/mss_clamp/mss_clamp.c b/src/plugins/mss_clamp/mss_clamp.c index cdac5456641..f1c435a347b 100644 --- a/src/plugins/mss_clamp/mss_clamp.c +++ b/src/plugins/mss_clamp/mss_clamp.c @@ -46,7 +46,7 @@ mssc_enable_disable_feat (u32 sw_if_index, u8 dir4, u8 dir6, int enable) sw_if_index, enable, 0, 0); } -int +__clib_export int mssc_enable_disable (u32 sw_if_index, u8 dir4, u8 dir6, u16 mss4, u16 mss6) { mssc_main_t *cm = &mssc_main; @@ -81,7 +81,7 @@ mssc_enable_disable (u32 sw_if_index, u8 dir4, u8 dir6, u16 mss4, u16 mss6) return rv; } -int +__clib_export int mssc_get_mss (u32 sw_if_index, u8 *dir4, u8 *dir6, u16 *mss4, u16 *mss6) { mssc_main_t *cm = &mssc_main; diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c index 3f7a3426069..3797cd2b4ea 100644 --- a/src/plugins/quic/quic.c +++ b/src/plugins/quic/quic.c @@ -1332,14 +1332,16 @@ quic_connect_connection (session_endpoint_cfg_t * sep) quic_ctx_t *ctx; app_worker_t *app_wrk; application_t *app; + transport_endpt_ext_cfg_t *ext_cfg; int error; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return SESSION_E_NOEXTCFG; /* Use pool on thread 1 if we have workers because of UDP */ thread_index = transport_cl_thread (); - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; clib_memset (cargs, 0, sizeof (*cargs)); ctx_index = quic_ctx_alloc (thread_index); @@ -1475,13 +1477,15 @@ quic_start_listen (u32 quic_listen_session_index, quic_ctx_t *lctx; u32 lctx_index; app_listener_t *app_listener; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return SESSION_E_NOEXTCFG; - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; app_wrk = app_worker_get (sep->app_wrk_index); app = application_get (app_wrk->app_index); QUIC_DBG (2, "Called quic_start_listen for app %d", app_wrk->app_index); diff --git a/src/plugins/snort/daq_vpp.c b/src/plugins/snort/daq_vpp.c index 386092a0382..6fc0bf5506a 100644 --- a/src/plugins/snort/daq_vpp.c +++ b/src/plugins/snort/daq_vpp.c @@ -10,6 +10,7 @@ #include <sys/socket.h> #include <sys/un.h> #include <sys/mman.h> +#include <sys/time.h> #include <errno.h> #include <sys/epoll.h> @@ -521,6 +522,7 @@ vpp_daq_msg_receive_one (VPP_Context_t *vc, VPPQueuePair *qp, { uint32_t n_recv, n_left; uint32_t head, next, mask = qp->queue_size - 1; + struct timeval tv; if (max_recv == 0) return 0; @@ -535,11 +537,14 @@ vpp_daq_msg_receive_one (VPP_Context_t *vc, VPPQueuePair *qp, n_left = n_recv = max_recv; } + gettimeofday (&tv, NULL); while (n_left--) { uint32_t desc_index = qp->enq_ring[next & mask]; daq_vpp_desc_t *d = qp->descs + desc_index; VPPDescData *dd = qp->desc_data + desc_index; + dd->pkthdr.ts.tv_sec = tv.tv_sec; + dd->pkthdr.ts.tv_usec = tv.tv_usec; dd->pkthdr.pktlen = d->length; dd->pkthdr.address_space_id = d->address_space_id; dd->msg.data = vc->bpools[d->buffer_pool].base + d->offset; diff --git a/src/plugins/snort/snort_api.c b/src/plugins/snort/snort_api.c index 334a84b4341..adad0d8763f 100644 --- a/src/plugins/snort/snort_api.c +++ b/src/plugins/snort/snort_api.c @@ -80,10 +80,16 @@ vl_api_snort_interface_attach_t_handler (vl_api_snort_interface_attach_t *mp) u8 snort_dir = mp->snort_dir; int rv = VNET_API_ERROR_NO_SUCH_ENTRY; - instance = snort_get_instance_by_index (instance_index); - if (instance) - rv = snort_interface_enable_disable ( - vm, (char *) instance->name, sw_if_index, 1 /* is_enable */, snort_dir); + if (sw_if_index == INDEX_INVALID) + rv = VNET_API_ERROR_NO_MATCHING_INTERFACE; + else + { + instance = snort_get_instance_by_index (instance_index); + if (instance) + rv = snort_interface_enable_disable (vm, (char *) instance->name, + sw_if_index, 1 /* is_enable */, + snort_dir); + } REPLY_MACRO (VL_API_SNORT_INTERFACE_ATTACH_REPLY); } @@ -346,10 +352,11 @@ vl_api_snort_interface_detach_t_handler (vl_api_snort_interface_detach_t *mp) vlib_main_t *vm = vlib_get_main (); vl_api_snort_interface_detach_reply_t *rmp; u32 sw_if_index = clib_net_to_host_u32 (mp->sw_if_index); - int rv; + int rv = VNET_API_ERROR_NO_MATCHING_INTERFACE; - rv = snort_interface_enable_disable (vm, NULL, sw_if_index, - 0 /* is_enable */, 0); + if (sw_if_index != INDEX_INVALID) + rv = snort_interface_enable_disable (vm, NULL, sw_if_index, + 0 /* is_enable */, SNORT_INOUT); REPLY_MACRO (VL_API_SNORT_INTERFACE_DETACH_REPLY); } diff --git a/src/plugins/srtp/srtp.c b/src/plugins/srtp/srtp.c index bb54e672918..6862301d2d2 100644 --- a/src/plugins/srtp/srtp.c +++ b/src/plugins/srtp/srtp.c @@ -641,10 +641,12 @@ srtp_connect (transport_endpoint_cfg_t *tep) application_t *app; srtp_tc_t *ctx; u32 ctx_index; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_NONE); + if (!ext_cfg) return SESSION_E_NOEXTCFG; app_wrk = app_worker_get (sep->app_wrk_index); @@ -658,7 +660,7 @@ srtp_connect (transport_endpoint_cfg_t *tep) ctx->srtp_ctx_handle = ctx_index; ctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; - srtp_init_policy (ctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data); + srtp_init_policy (ctx, (transport_endpt_cfg_srtp_t *) ext_cfg->data); clib_memcpy_fast (&cargs->sep, sep, sizeof (session_endpoint_t)); cargs->sep.transport_proto = TRANSPORT_PROTO_UDP; @@ -723,9 +725,11 @@ srtp_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) app_listener_t *al; srtp_tc_t *lctx; u32 lctx_index; + transport_endpt_ext_cfg_t *ext_cfg; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_NONE); + if (!ext_cfg) return SESSION_E_NOEXTCFG; app_wrk = app_worker_get (sep->app_wrk_index); @@ -756,7 +760,7 @@ srtp_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) lctx->c_s_index = app_listener_index; lctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; - srtp_init_policy (lctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data); + srtp_init_policy (lctx, (transport_endpt_cfg_srtp_t *) ext_cfg->data); SRTP_DBG (1, "Started listening %d", lctx_index); return lctx_index; diff --git a/src/plugins/tracenode/node.c b/src/plugins/tracenode/node.c index 444d93f1708..c56df589826 100644 --- a/src/plugins/tracenode/node.c +++ b/src/plugins/tracenode/node.c @@ -55,23 +55,19 @@ tracenode_inline (vlib_main_t *vm, vlib_node_runtime_t *node, /* enqueue b0 to the current next frame */ vnet_feature_next_u16 (next, b[0]); - /* buffer already traced */ - if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) - goto skip; - if (is_pcap && vnet_is_packet_pcaped (pp, b[0], ~0)) { pcap_add_buffer (&pp->pcap_main, vm, from0[0], pp->max_bytes_per_pkt); } - else if (!is_pcap && vlib_trace_buffer (vm, node, next[0], b[0], - 1 /* follow_chain */)) + else if (!is_pcap && !(b[0]->flags & VLIB_BUFFER_IS_TRACED) && + vlib_trace_buffer (vm, node, next[0], b[0], + 1 /* follow_chain */)) { tracenode_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof *tr); tr->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; } - skip: b++; from0++; next++; diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c index f01e661157c..7702e817070 100644 --- a/src/plugins/unittest/session_test.c +++ b/src/plugins/unittest/session_test.c @@ -825,6 +825,8 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) session_test_enable_rule_table_engine (vm); session_table_init (st, FIB_PROTOCOL_MAX); + vec_add1 (st->appns_index, + app_namespace_index (app_namespace_get_default ())); session_rules_table_init (st, FIB_PROTOCOL_MAX); ip4_address_t lcl_ip = { @@ -2238,6 +2240,8 @@ session_test_sdl (vlib_main_t *vm, unformat_input_t *input) session_test_enable_sdl_engine (vm); session_table_init (st, FIB_PROTOCOL_MAX); + vec_add1 (st->appns_index, + app_namespace_index (app_namespace_get_default ())); session_rules_table_init (st, FIB_PROTOCOL_MAX); /* Add 1.2.0.0/16 */ @@ -2389,6 +2393,50 @@ session_test_sdl (vlib_main_t *vm, unformat_input_t *input) return 0; } +static int +session_test_ext_cfg (vlib_main_t *vm, unformat_input_t *input) +{ + session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; + transport_endpt_ext_cfg_t *ext_cfg; + + ext_cfg = session_endpoint_add_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_HTTP, + sizeof (ext_cfg->opaque)); + ext_cfg->opaque = 60; + + ext_cfg = + session_endpoint_add_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = 1; + + ext_cfg = session_endpoint_add_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_NONE, + sizeof (ext_cfg->opaque)); + ext_cfg->opaque = 345; + + ext_cfg = session_endpoint_get_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); + SESSION_TEST ((ext_cfg != 0), + "TRANSPORT_ENDPT_EXT_CFG_HTTP should be present"); + SESSION_TEST ((ext_cfg->opaque == 60), + "TRANSPORT_ENDPT_EXT_CFG_HTTP opaque value should be 60: %u", + ext_cfg->opaque); + ext_cfg = + session_endpoint_get_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + SESSION_TEST ((ext_cfg != 0), + "TRANSPORT_ENDPT_EXT_CFG_CRYPTO should be present"); + SESSION_TEST ( + (ext_cfg->crypto.ckpair_index == 1), + "TRANSPORT_ENDPT_EXT_CFG_HTTP ckpair_index value should be 1: %u", + ext_cfg->crypto.ckpair_index); + ext_cfg = session_endpoint_get_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_NONE); + SESSION_TEST ((ext_cfg != 0), + "TRANSPORT_ENDPT_EXT_CFG_NONE should be present"); + SESSION_TEST ((ext_cfg->opaque == 345), + "TRANSPORT_ENDPT_EXT_CFG_HTTP opaque value should be 345: %u", + ext_cfg->opaque); + session_endpoint_free_ext_cfgs (&sep); + + return 0; +} + static clib_error_t * session_test (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd_arg) @@ -2419,6 +2467,8 @@ session_test (vlib_main_t * vm, res = session_test_enable_disable (vm, input); else if (unformat (input, "sdl")) res = session_test_sdl (vm, input); + else if (unformat (input, "ext-cfg")) + res = session_test_ext_cfg (vm, input); else if (unformat (input, "all")) { if ((res = session_test_basic (vm, input))) @@ -2439,6 +2489,8 @@ session_test (vlib_main_t * vm, goto done; if ((res = session_test_sdl (vm, input))) goto done; + if ((res = session_test_ext_cfg (vm, input))) + goto done; if ((res = session_test_enable_disable (vm, input))) goto done; } diff --git a/src/svm/message_queue.c b/src/svm/message_queue.c index ab0d230b1f0..f2856532454 100644 --- a/src/svm/message_queue.c +++ b/src/svm/message_queue.c @@ -18,7 +18,7 @@ #include <vppinfra/format.h> #include <vppinfra/time.h> #include <sys/eventfd.h> -#include <sys/socket.h> +#include <poll.h> static inline svm_msg_q_ring_t * svm_msg_q_ring_inline (svm_msg_q_t * mq, u32 ring_index) @@ -629,25 +629,29 @@ svm_msg_q_timedwait (svm_msg_q_t *mq, double timeout) } else { - struct timeval tv; + struct pollfd fds = { .fd = mq->q.evtfd, .events = POLLIN }; u64 buf; int rv; - tv.tv_sec = (u64) timeout; - tv.tv_usec = ((u64) timeout - (u64) timeout) * 1e9; - rv = setsockopt (mq->q.evtfd, SOL_SOCKET, SO_RCVTIMEO, - (const char *) &tv, sizeof tv); + rv = poll (&fds, 1, timeout * 1e3 /* ms */); if (rv < 0) { - clib_unix_warning ("setsockopt"); + clib_unix_warning ("poll"); return -1; } + else if (rv == 0) + { + /* timeout occured */ + return 0; + } rv = read (mq->q.evtfd, &buf, sizeof (buf)); - if (rv < 0) - clib_warning ("read %u", errno); - - return rv < 0 ? errno : 0; + if (rv < 0 && errno != EAGAIN) + { + clib_warning ("read %u", errno); + return -2; + } + return 0; } } diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c index bd3457fa8fd..4510bf85e1b 100644 --- a/src/vcl/ldp.c +++ b/src/vcl/ldp.c @@ -71,6 +71,7 @@ /* from <linux/netfilter_ipv4.h> */ #define SO_ORIGINAL_DST 80 #endif + typedef struct ldp_worker_ctx_ { u8 *io_buffer; @@ -102,7 +103,6 @@ typedef struct ldp_worker_ctx_ u8 epoll_wait_vcl; u8 mq_epfd_added; int vcl_mq_epfd; - } ldp_worker_ctx_t; /* clib_bitmap_t, fd_mask and vcl_si_set are used interchangeably. Make sure @@ -674,6 +674,8 @@ ldp_select_init_maps (fd_set * __restrict original, vlsh = ldp_fd_to_vlsh (fd); if (vlsh == VLS_INVALID_HANDLE) clib_bitmap_set_no_check (*libcb, fd, 1); + else if (vlsh_to_worker_index (vlsh) != vppcom_worker_index ()) + clib_warning ("migration currently not supported"); else *vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1); } @@ -731,10 +733,10 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, const __sigset_t * __restrict sigmask) { u32 minbits = clib_max (nfds, BITS (uword)), n_bytes; - ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); struct timespec libc_tspec = { 0 }; f64 time_out, vcl_timeout = 0; uword si_bits, libc_bits; + ldp_worker_ctx_t *ldpw; int rv, bits_set = 0; if (nfds < 0) @@ -743,6 +745,11 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, return -1; } + if (PREDICT_FALSE (vppcom_worker_index () == ~0)) + vls_register_vcl_worker (); + + ldpw = ldp_worker_get_current (); + if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) clib_time_init (&ldpw->clib_time); diff --git a/src/vcl/vcl_locked.c b/src/vcl/vcl_locked.c index 93ece0027ff..24cc598694a 100644 --- a/src/vcl/vcl_locked.c +++ b/src/vcl/vcl_locked.c @@ -559,6 +559,22 @@ vlsh_to_session_index (vls_handle_t vlsh) return vppcom_session_index (sh); } +int +vlsh_to_worker_index (vls_handle_t vlsh) +{ + vcl_locked_session_t *vls; + u32 wrk_index; + + vls = vls_get_w_dlock (vlsh); + if (!vls) + wrk_index = INVALID_SESSION_ID; + else + wrk_index = vls->vcl_wrk_index; + vls_dunlock (vls); + + return wrk_index; +} + vls_handle_t vls_si_wi_to_vlsh (u32 session_index, u32 vcl_wrk_index) { @@ -1799,7 +1815,7 @@ vls_app_fork_child_handler (void) vls_worker_alloc (); /* Reset number of threads and set wrk index */ - vlsl->vls_mt_n_threads = 0; + vlsl->vls_mt_n_threads = 1; vlsl->vls_wrk_index = vcl_get_worker_index (); vlsl->select_mp_check = 0; clib_rwlock_init (&vlsl->vls_pool_lock); @@ -1983,9 +1999,11 @@ vls_app_create (char *app_name) atexit (vls_app_exit); vls_worker_alloc (); vlsl->vls_wrk_index = vcl_get_worker_index (); + vlsl->vls_mt_n_threads = 1; clib_rwlock_init (&vlsl->vls_pool_lock); vls_mt_locks_init (); vcm->wrk_rpc_fn = vls_rpc_handler; + return VPPCOM_OK; } diff --git a/src/vcl/vcl_locked.h b/src/vcl/vcl_locked.h index fa3a2735eb7..3d04a36d5c3 100644 --- a/src/vcl/vcl_locked.h +++ b/src/vcl/vcl_locked.h @@ -50,6 +50,7 @@ int vls_select (int n_bits, vcl_si_set * read_map, vcl_si_set * write_map, vcl_si_set * except_map, double wait_for_time); vcl_session_handle_t vlsh_to_sh (vls_handle_t vlsh); vcl_session_handle_t vlsh_to_session_index (vls_handle_t vlsh); +int vlsh_to_worker_index (vls_handle_t vlsh); vls_handle_t vls_session_index_to_vlsh (uint32_t session_index); int vls_app_create (char *app_name); unsigned char vls_use_eventfd (void); diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h index b89052f96af..0f1a6d24516 100644 --- a/src/vcl/vcl_private.h +++ b/src/vcl/vcl_private.h @@ -181,8 +181,7 @@ typedef struct vcl_session_ elog_track_t elog_track; #endif - u16 original_dst_port; /**< original dst port (network order) */ - u32 original_dst_ip4; /**< original dst ip4 (network order) */ + transport_endpt_attr_t *tep_attrs; /**< vector of attributes */ } vcl_session_t; typedef struct vppcom_cfg_t_ @@ -409,6 +408,7 @@ vcl_session_free (vcl_worker_t * wrk, vcl_session_t * s) vcl_session_detach_fifos (s); if (s->ext_config) clib_mem_free (s->ext_config); + vec_free (s->tep_attrs); pool_put (wrk->sessions, s); } @@ -664,6 +664,18 @@ vcl_session_clear_attr (vcl_session_t * s, u8 attr) s->attributes &= ~(1 << attr); } +static inline transport_endpt_attr_t * +vcl_session_tep_attr_get (vcl_session_t *s, transport_endpt_attr_type_t at) +{ + transport_endpt_attr_t *tepa; + vec_foreach (tepa, s->tep_attrs) + { + if (tepa->type == at) + return tepa; + } + return 0; +} + static inline session_evt_type_t vcl_session_dgram_tx_evt (vcl_session_t *s, session_evt_type_t et) { diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 2ebb1d8e5b6..4568ac618d5 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -351,10 +351,16 @@ vcl_session_accepted_handler (vcl_worker_t * wrk, session_accepted_msg_t * mp, session->vpp_handle = mp->handle; session->session_state = VCL_STATE_READY; - if (mp->rmt.is_ip4) + if (mp->rmt.is_ip4 && mp->original_dst_port) { - session->original_dst_ip4 = mp->original_dst_ip4; - session->original_dst_port = mp->original_dst_port; + transport_endpt_attr_t *tep_attr; + vec_add2 (session->tep_attrs, tep_attr, 1); + /* Expecting to receive this on accepted connections + * and the external transport endpoint received is + * the local one, prior to something like nat */ + tep_attr->type = TRANSPORT_ENDPT_ATTR_EXT_ENDPT; + tep_attr->ext_endpt.port = mp->original_dst_port; + tep_attr->ext_endpt.ip.ip4.as_u32 = mp->original_dst_ip4; } session->transport.rmt_port = mp->rmt.port; session->transport.is_ip4 = mp->rmt.is_ip4; @@ -989,6 +995,24 @@ vcl_worker_rpc_handler (vcl_worker_t * wrk, void *data) } static void +vcl_session_transport_attr_handler (vcl_worker_t *wrk, void *data) +{ + session_transport_attr_msg_t *mp = (session_transport_attr_msg_t *) data; + vcl_session_t *s; + + s = vcl_session_get_w_vpp_handle (wrk, mp->handle); + if (!s) + { + VDBG (0, "session transport attr with wrong handle %llx", mp->handle); + return; + } + + VDBG (0, "session %u [0x%llx]: transport attr %u", s->session_index, + s->vpp_handle, mp->attr.type); + vec_add1 (s->tep_attrs, mp->attr); +} + +static void vcl_session_transport_attr_reply_handler (vcl_worker_t *wrk, void *data) { session_transport_attr_reply_msg_t *mp; @@ -1129,6 +1153,9 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e) case SESSION_CTRL_EVT_APP_WRK_RPC: vcl_worker_rpc_handler (wrk, e->data); break; + case SESSION_CTRL_EVT_TRANSPORT_ATTR: + vcl_session_transport_attr_handler (wrk, e->data); + break; case SESSION_CTRL_EVT_TRANSPORT_ATTR_REPLY: vcl_session_transport_attr_reply_handler (wrk, e->data); break; @@ -2607,6 +2634,9 @@ vcl_select_handle_mq_event (vcl_worker_t * wrk, session_event_t * e, case SESSION_CTRL_EVT_APP_WRK_RPC: vcl_worker_rpc_handler (wrk, e->data); break; + case SESSION_CTRL_EVT_TRANSPORT_ATTR: + vcl_session_transport_attr_handler (wrk, e->data); + break; default: clib_warning ("unhandled: %u", e->event_type); break; @@ -3382,6 +3412,9 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e, case SESSION_CTRL_EVT_APP_WRK_RPC: vcl_worker_rpc_handler (wrk, e->data); break; + case SESSION_CTRL_EVT_TRANSPORT_ATTR: + vcl_session_transport_attr_handler (wrk, e->data); + break; default: VDBG (0, "unhandled: %u", e->event_type); break; @@ -3672,7 +3705,7 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op, vcl_worker_t *wrk = vcl_worker_get_current (); u32 *flags = buffer; vppcom_endpt_t *ep = buffer; - transport_endpt_attr_t tea; + transport_endpt_attr_t tea, *tepap; vcl_session_t *session; int rv = VPPCOM_OK; @@ -3795,24 +3828,49 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op, rv = VPPCOM_EAFNOSUPPORT; break; } - if (PREDICT_TRUE (buffer && buflen && (*buflen >= sizeof (*ep)) && - ep->ip)) + if (PREDICT_FALSE (!buffer || !buflen || (*buflen < sizeof (*ep)) || + !ep->ip)) { - ep->is_ip4 = session->transport.is_ip4; - ep->port = session->original_dst_port; - clib_memcpy_fast (ep->ip, &session->original_dst_ip4, - sizeof (ip4_address_t)); - *buflen = sizeof (*ep); - VDBG (1, - "VPPCOM_ATTR_GET_ORIGINAL_DST: sh %u, is_ip4 = %u, addr = %U" - " port %d", - session_handle, ep->is_ip4, vcl_format_ip4_address, - (ip4_address_t *) (&session->original_dst_ip4), - ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6, - clib_net_to_host_u16 (ep->port)); + rv = VPPCOM_EINVAL; + break; } - else - rv = VPPCOM_EINVAL; + + tepap = + vcl_session_tep_attr_get (session, TRANSPORT_ENDPT_ATTR_EXT_ENDPT); + if (!tepap) + { + rv = VPPCOM_EINVAL; + break; + } + vcl_ip_copy_to_ep (&tepap->ext_endpt.ip, ep, tepap->ext_endpt.is_ip4); + ep->port = tepap->ext_endpt.port; + *buflen = sizeof (*ep); + + VDBG (1, + "VPPCOM_ATTR_GET_ORIGINAL_DST: sh %u, is_ip4 = %u, " + "addr = %U port %d", + session_handle, ep->is_ip4, vcl_format_ip4_address, + (ip4_address_t *) ep->ip, + ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6, + clib_net_to_host_u16 (ep->port)); + break; + + case VPPCOM_ATTR_GET_EXT_ENDPT: + if (PREDICT_FALSE (!buffer || !buflen || (*buflen < sizeof (*ep)) || + !ep->ip)) + { + rv = VPPCOM_EINVAL; + break; + } + tepap = + vcl_session_tep_attr_get (session, TRANSPORT_ENDPT_ATTR_EXT_ENDPT); + if (!tepap) + { + rv = VPPCOM_EINVAL; + break; + } + vcl_ip_copy_to_ep (&tepap->ext_endpt.ip, ep, tepap->ext_endpt.is_ip4); + ep->port = tepap->ext_endpt.port; break; case VPPCOM_ATTR_SET_LCL_ADDR: @@ -3897,12 +3955,11 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op, case VPPCOM_ATTR_GET_TX_FIFO_LEN: if (buffer && buflen && (*buflen >= sizeof (u32))) { - /* VPP-TBD */ - *(size_t *) buffer = (session->sndbuf_size ? session->sndbuf_size : - session->tx_fifo ? - svm_fifo_size (session->tx_fifo) : - vcm->cfg.tx_fifo_size); + *(u32 *) buffer = + (session->sndbuf_size ? session->sndbuf_size : + session->tx_fifo ? svm_fifo_size (session->tx_fifo) : + vcm->cfg.tx_fifo_size); *buflen = sizeof (u32); VDBG (2, "VPPCOM_ATTR_GET_TX_FIFO_LEN: %u (0x%x), buflen %d," @@ -4337,7 +4394,8 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op, break; } vcl_session_alloc_ext_cfg (session, TRANSPORT_ENDPT_EXT_CFG_NONE, - *buflen + sizeof (u32)); + *buflen + + TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE); clib_memcpy (session->ext_config->data, buffer, *buflen); session->ext_config->len = *buflen; break; diff --git a/src/vcl/vppcom.h b/src/vcl/vppcom.h index 164dc376ad8..a11db951749 100644 --- a/src/vcl/vppcom.h +++ b/src/vcl/vppcom.h @@ -186,6 +186,7 @@ typedef enum VPPCOM_ATTR_GET_IP_PKTINFO, VPPCOM_ATTR_GET_ORIGINAL_DST, VPPCOM_ATTR_GET_NWRITEQ, + VPPCOM_ATTR_GET_EXT_ENDPT, } vppcom_attr_op_t; typedef struct _vcl_poll diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index d3b3ed21a26..cf14455f391 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -359,6 +359,16 @@ autoreply define bfd_udp_auth_deactivate bool is_delayed; }; +/** \brief BFD UDP - enable multihop support + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +autoreply define bfd_udp_enable_multihop +{ + u32 client_index; + u32 context; +}; + /* must be compatible with bfd_error_t */ counters bfd_udp { none { diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index 816e71081ff..bccf58ba4bb 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -46,8 +46,24 @@ pub_sub_handler (bfd_events, BFD_EVENTS); ip_address_decode(&mp->local_addr, &local_addr); \ ip_address_decode(&mp->peer_addr, &peer_addr); -#define BFD_UDP_API_PARAM_FROM_MP(mp) \ - clib_net_to_host_u32 (mp->sw_if_index), &local_addr, &peer_addr +#define BFD_UDP_API_PARAM_IS_MH(mp) \ + bfd_main.multihop_enabled && (mp->sw_if_index == ~0) + +#define BFD_UDP_API_PARAM_FROM_MP(mp) \ + BFD_UDP_API_PARAM_IS_MH (mp) ? true : false, \ + BFD_UDP_API_PARAM_IS_MH (mp) ? ~0 : \ + clib_net_to_host_u32 (mp->sw_if_index), \ + &local_addr, &peer_addr + +#define COND_VALIDATE_SW_IF_INDEX(mp) \ + do \ + { \ + if (!(bfd_main.multihop_enabled && mp->sw_if_index == ~0)) \ + { \ + VALIDATE_SW_IF_INDEX (mp) \ + } \ + } \ + while (0); static void vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp) @@ -55,7 +71,7 @@ vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp) vl_api_bfd_udp_add_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -76,7 +92,7 @@ vl_api_bfd_udp_upd_t_handler (vl_api_bfd_udp_add_t *mp) vl_api_bfd_udp_upd_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -97,7 +113,7 @@ vl_api_bfd_udp_mod_t_handler (vl_api_bfd_udp_mod_t * mp) vl_api_bfd_udp_mod_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -116,7 +132,7 @@ vl_api_bfd_udp_del_t_handler (vl_api_bfd_udp_del_t * mp) vl_api_bfd_udp_del_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -143,7 +159,14 @@ send_bfd_udp_session_details (vl_api_registration_t * reg, u32 context, mp->state = clib_host_to_net_u32 (bs->local_state); bfd_udp_session_t *bus = &bs->udp; bfd_udp_key_t *key = &bus->key; - mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + mp->sw_if_index = ~0; + } + else + { + mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); + } if ((!bs->auth.is_delayed && bs->auth.curr_key) || (bs->auth.is_delayed && bs->auth.next_key)) { @@ -186,7 +209,14 @@ send_bfd_udp_session_event (vl_api_registration_t *reg, u32 pid, mp->state = clib_host_to_net_u32 (bs->local_state); bfd_udp_session_t *bus = &bs->udp; bfd_udp_key_t *key = &bus->key; - mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + mp->sw_if_index = ~0; + } + else + { + mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); + } if ((!bs->auth.is_delayed && bs->auth.curr_key) || (bs->auth.is_delayed && bs->auth.next_key)) { @@ -315,7 +345,7 @@ vl_api_bfd_udp_auth_activate_t_handler (vl_api_bfd_udp_auth_activate_t * mp) vl_api_bfd_udp_auth_activate_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -334,7 +364,7 @@ vl_api_bfd_udp_auth_deactivate_t_handler (vl_api_bfd_udp_auth_deactivate_t * vl_api_bfd_udp_auth_deactivate_reply_t *rmp; int rv; - VALIDATE_SW_IF_INDEX (mp); + COND_VALIDATE_SW_IF_INDEX (mp); BFD_UDP_API_PARAM_COMMON_CODE; @@ -423,6 +453,17 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t * })) } +static void +vl_api_bfd_udp_enable_multihop_t_handler (vl_api_bfd_udp_enable_multihop_t *mp) +{ + vl_api_bfd_udp_enable_multihop_reply_t *rmp; + int rv = 0; + + bfd_main.multihop_enabled = true; + + REPLY_MACRO (VL_API_BFD_UDP_ENABLE_MULTIHOP_REPLY); +} + #include <vnet/bfd/bfd.api.c> static clib_error_t * bfd_api_hookup (vlib_main_t * vm) diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h index f051e6b679c..16501fcd272 100644 --- a/src/vnet/bfd/bfd_api.h +++ b/src/vnet/bfd/bfd_api.h @@ -37,44 +37,49 @@ typedef enum /** * @brief create a new bfd session */ -vnet_api_error_t -bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 desired_min_tx_usec, u32 required_min_rx_usec, - u8 detect_mult, u8 is_authenticated, u32 conf_key_id, - u8 bfd_key_id); +vnet_api_error_t bfd_udp_add_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult, + u8 is_authenticated, u32 conf_key_id, + u8 bfd_key_id); /** - * @brief create a new or modify and existing bfd session + * @brief create a new or modify an existing bfd session */ -vnet_api_error_t -bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr, - const ip46_address_t *peer_addr, u32 desired_min_tx_usec, - u32 required_min_rx_usec, u8 detect_mult, - u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id); +vnet_api_error_t bfd_udp_upd_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult, + u8 is_authenticated, u32 conf_key_id, + u8 bfd_key_id); /** * @brief modify existing session */ -vnet_api_error_t -bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 desired_min_tx_usec, u32 required_min_rx_usec, - u8 detect_mult); +vnet_api_error_t bfd_udp_mod_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, + u8 detect_mult); /** * @brief delete existing session */ -vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr); +vnet_api_error_t bfd_udp_del_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr); /** * @brief set session admin down/up */ -vnet_api_error_t bfd_udp_session_set_flags (vlib_main_t * vm, u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, +vnet_api_error_t bfd_udp_session_set_flags (vlib_main_t *vm, bool multihop, + u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u8 admin_up_down); /** @@ -91,18 +96,18 @@ vnet_api_error_t bfd_auth_del_key (u32 conf_key_id); /** * @brief activate authentication for existing session */ -vnet_api_error_t bfd_udp_auth_activate (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, +vnet_api_error_t bfd_udp_auth_activate (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u32 conf_key_id, u8 bfd_key_id, u8 is_delayed); /** * @brief deactivate authentication for existing session */ -vnet_api_error_t bfd_udp_auth_deactivate (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, +vnet_api_error_t bfd_udp_auth_deactivate (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u8 is_delayed); /** diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c index 33942bb89e6..194c62b507c 100644 --- a/src/vnet/bfd/bfd_cli.c +++ b/src/vnet/bfd/bfd_cli.c @@ -26,11 +26,39 @@ #include <vnet/bfd/bfd_api.h> #include <vnet/bfd/bfd_main.h> +#define BFD_MULTIHOP_CLI_CHECK \ + do \ + { \ + multihop = have_multihop; \ + if (multihop) \ + { \ + sw_if_index = ~0; \ + } \ + if (multihop && have_sw_if_index) \ + { \ + ret = clib_error_return ( \ + 0, "Incompatible parameter combination, " \ + "interface cannot be specified when multihop is enabled"); \ + goto out; \ + } \ + if (!multihop && !have_sw_if_index) \ + { \ + ret = \ + clib_error_return (0, "Incompatible parameter combination, " \ + "interface must be set if not multihop"); \ + goto out; \ + } \ + } \ + while (0); + static u8 * format_bfd_session_cli (u8 * s, va_list * args) { vlib_main_t *vm = va_arg (*args, vlib_main_t *); bfd_session_t *bs = va_arg (*args, bfd_session_t *); + s = format (s, "%10s %-32s %20s\n", "", "Hop Type", + bfd_hop_type_string (bs->hop_type)); + switch (bs->transport) { case BFD_TRANSPORT_UDP4: @@ -52,6 +80,8 @@ format_bfd_session_cli (u8 * s, va_list * args) bfd_diag_code_string (bs->remote_diag)); s = format (s, "%10s %-32s %20u %20u\n", "", "Detect multiplier", bs->local_detect_mult, bs->remote_detect_mult); + s = format (s, "%10s %-32s %20llu\n", "", "Detection Time (usec)", + bfd_nsec_to_usec (bs->detection_time_nsec)); s = format (s, "%10s %-32s %20u %20llu\n", "", "Required Min Rx Interval (usec)", bs->config_required_min_rx_usec, bs->remote_min_rx_usec); @@ -363,6 +393,7 @@ VLIB_CLI_COMMAND (bfd_cli_key_del_command, static) = { #define DETECT_MULT_STR "detect-mult" #define ADMIN_STR "admin" #define DELAYED_STR "delayed" +#define MULTIHOP_STR "multihop" static const unsigned mandatory = 1; static const unsigned optional = 0; @@ -401,7 +432,8 @@ bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_add_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -433,6 +465,7 @@ bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input, } foreach_bfd_cli_udp_session_add_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK if (1 == have_conf_key_id + have_bfd_key_id) { @@ -456,11 +489,9 @@ bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input, goto out; } - vnet_api_error_t rv = - bfd_udp_add_session (sw_if_index, &local_addr, &peer_addr, desired_min_tx, - required_min_rx, - detect_mult, have_conf_key_id, conf_key_id, - bfd_key_id); + vnet_api_error_t rv = bfd_udp_add_session ( + multihop, sw_if_index, &local_addr, &peer_addr, desired_min_tx, + required_min_rx, detect_mult, have_conf_key_id, conf_key_id, bfd_key_id); if (rv) { ret = @@ -477,16 +508,16 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = { .path = "bfd udp session add", .short_help = "bfd udp session add" - " interface <interface>" - " local-addr <local-address>" - " peer-addr <peer-address>" - " desired-min-tx <desired min tx interval>" - " required-min-rx <required min rx interval>" - " detect-mult <detect multiplier> " - "[" - " conf-key-id <config key ID>" - " bfd-key-id <BFD key ID>" - "]", + " <multihop | interface <interface>>" + " local-addr <local-address>" + " peer-addr <peer-address>" + " desired-min-tx <desired min tx interval>" + " required-min-rx <required min rx interval>" + " detect-mult <detect multiplier> " + "[" + " conf-key-id <config key ID>" + " bfd-key-id <BFD key ID>" + "]", .function = bfd_cli_udp_session_add, }; @@ -497,7 +528,8 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_mod_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -527,6 +559,7 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input, } foreach_bfd_cli_udp_session_mod_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK if (detect_mult > 255) { @@ -536,7 +569,7 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input, } vnet_api_error_t rv = - bfd_udp_mod_session (sw_if_index, &local_addr, &peer_addr, + bfd_udp_mod_session (multihop, sw_if_index, &local_addr, &peer_addr, desired_min_tx, required_min_rx, detect_mult); if (rv) { @@ -553,13 +586,13 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = { .path = "bfd udp session mod", - .short_help = "bfd udp session mod interface" - " <interface> local-addr" - " <local-address> peer-addr" - " <peer-address> desired-min-tx" - " <desired min tx interval> required-min-rx" - " <required min rx interval> detect-mult" - " <detect multiplier> ", + .short_help = "bfd udp session mod " + " <multihop | interface <interface>>" + " <local-address> peer-addr" + " <peer-address> desired-min-tx" + " <desired min tx interval> required-min-rx" + " <required min rx interval> detect-mult" + " <detect multiplier> ", .function = bfd_cli_udp_session_mod, }; @@ -570,7 +603,8 @@ bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_del_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -597,9 +631,10 @@ bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input, } foreach_bfd_cli_udp_session_del_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK vnet_api_error_t rv = - bfd_udp_del_session (sw_if_index, &local_addr, &peer_addr); + bfd_udp_del_session (multihop, sw_if_index, &local_addr, &peer_addr); if (rv) { ret = @@ -615,10 +650,10 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = { .path = "bfd udp session del", - .short_help = "bfd udp session del interface" - " <interface> local-addr" - " <local-address> peer-addr" - "<peer-address> ", + .short_help = "bfd udp session del <multihop |" + " interface <interface>> local-addr" + " <local-address> peer-addr" + "<peer-address> ", .function = bfd_cli_udp_session_del, }; @@ -629,7 +664,8 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -658,6 +694,7 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input, } foreach_bfd_cli_udp_session_set_flags_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK u8 admin_up_down; static const char up[] = "up"; @@ -677,9 +714,8 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input, ADMIN_STR, admin_up_down_token); goto out; } - vnet_api_error_t rv = - bfd_udp_session_set_flags (vm, sw_if_index, &local_addr, - &peer_addr, admin_up_down); + vnet_api_error_t rv = bfd_udp_session_set_flags ( + vm, multihop, sw_if_index, &local_addr, &peer_addr, admin_up_down); if (rv) { ret = @@ -696,10 +732,10 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = { .path = "bfd udp session set-flags", .short_help = "bfd udp session set-flags" - " interface <interface>" - " local-addr <local-address>" - " peer-addr <peer-address>" - " admin <up|down>", + " <multihop | interface <interface>>" + " local-addr <local-address>" + " peer-addr <peer-address>" + " admin <up|down>", .function = bfd_cli_udp_session_set_flags, }; @@ -711,7 +747,8 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -741,6 +778,7 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm, } foreach_bfd_cli_udp_session_auth_activate_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK u8 is_delayed = 0; if (have_delayed_token) @@ -773,8 +811,8 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm, } vnet_api_error_t rv = - bfd_udp_auth_activate (sw_if_index, &local_addr, &peer_addr, conf_key_id, - bfd_key_id, is_delayed); + bfd_udp_auth_activate (multihop, sw_if_index, &local_addr, &peer_addr, + conf_key_id, bfd_key_id, is_delayed); if (rv) { ret = @@ -791,12 +829,12 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_activate_command, static) = { .path = "bfd udp session auth activate", .short_help = "bfd udp session auth activate" - " interface <interface>" - " local-addr <local-address>" - " peer-addr <peer-address>" - " conf-key-id <config key ID>" - " bfd-key-id <BFD key ID>" - " [ delayed <yes|no> ]", + " <multihop | interface <interface>>" + " local-addr <local-address>" + " peer-addr <peer-address>" + " conf-key-id <config key ID>" + " bfd-key-id <BFD key ID>" + " [ delayed <yes|no> ]", .function = bfd_cli_udp_session_auth_activate, }; @@ -807,7 +845,8 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input, clib_error_t *ret = NULL; unformat_input_t _line_input, *line_input = &_line_input; #define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \ - F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + F (bool, multihop, MULTIHOP_STR, optional, "%_") \ + F (u32, sw_if_index, INTERFACE_STR, optional, "%U", \ unformat_vnet_sw_interface, &vnet_main) \ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ bfd_cli_unformat_ip46_address) \ @@ -835,6 +874,7 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input, } foreach_bfd_cli_udp_session_auth_deactivate_cli_param (CHECK_MANDATORY); + BFD_MULTIHOP_CLI_CHECK u8 is_delayed = 0; if (have_delayed_token) @@ -858,8 +898,8 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input, } } - vnet_api_error_t rv = bfd_udp_auth_deactivate (sw_if_index, &local_addr, - &peer_addr, is_delayed); + vnet_api_error_t rv = bfd_udp_auth_deactivate ( + multihop, sw_if_index, &local_addr, &peer_addr, is_delayed); if (rv) { ret = clib_error_return ( @@ -875,10 +915,10 @@ out: VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = { .path = "bfd udp session auth deactivate", .short_help = "bfd udp session auth deactivate" - " interface <interface>" - " local-addr <local-address>" - " peer-addr <peer-address>" - "[ delayed <yes|no> ]", + " <multihop | interface <interface>>" + " local-addr <local-address>" + " peer-addr <peer-address>" + "[ delayed <yes|no> ]", .function = bfd_cli_udp_session_auth_deactivate, }; diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 1423da91158..4ad0a16830f 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -30,6 +30,20 @@ #include <vlib/log.h> #include <vnet/crypto/crypto.h> +const char * +bfd_hop_type_string (bfd_hop_type_e hoptype) +{ + switch (hoptype) + { +#define F(x) \ + case BFD_HOP_TYPE_##x: \ + return "BFD_HOP_TYPE_" #x; + foreach_bfd_hop (F) +#undef F + } + return "UNKNOWN"; +} + static void bfd_validate_counters (bfd_main_t *bm) { @@ -1353,6 +1367,8 @@ VLIB_REGISTER_NODE (bfd_process_node, static) = [BFD_TX_IP6_REWRITE] = "ip6-rewrite", [BFD_TX_IP4_MIDCHAIN] = "ip4-midchain", [BFD_TX_IP6_MIDCHAIN] = "ip6-midchain", + [BFD_TX_IP4_LOOKUP] = "ip4-lookup", + [BFD_TX_IP6_LOOKUP] = "ip6-lookup", } }; // clang-format on @@ -2049,29 +2065,29 @@ u8 * format_bfd_session (u8 * s, va_list * args) { const bfd_session_t *bs = va_arg (*args, bfd_session_t *); - s = format (s, "bs_idx=%u local-state=%s remote-state=%s\n" - "local-discriminator=%u remote-discriminator=%u\n" - "local-diag=%s echo-active=%s\n" - "desired-min-tx=%u required-min-rx=%u\n" - "required-min-echo-rx=%u detect-mult=%u\n" - "remote-min-rx=%u remote-min-echo-rx=%u\n" - "remote-demand=%s poll-state=%s\n" - "auth: local-seq-num=%u remote-seq-num=%u\n" - " is-delayed=%s\n" - " curr-key=%U\n" - " next-key=%U", - bs->bs_idx, bfd_state_string (bs->local_state), - bfd_state_string (bs->remote_state), bs->local_discr, - bs->remote_discr, bfd_diag_code_string (bs->local_diag), - (bs->echo ? "yes" : "no"), bs->config_desired_min_tx_usec, - bs->config_required_min_rx_usec, 1, bs->local_detect_mult, - bs->remote_min_rx_usec, bs->remote_min_echo_rx_usec, - (bs->remote_demand ? "yes" : "no"), - bfd_poll_state_string (bs->poll_state), - bs->auth.local_seq_number, bs->auth.remote_seq_number, - (bs->auth.is_delayed ? "yes" : "no"), - format_bfd_auth_key, bs->auth.curr_key, format_bfd_auth_key, - bs->auth.next_key); + s = format ( + s, + "bs_idx=%u hop-type=%s local-state=%s remote-state=%s\n" + "local-discriminator=%u remote-discriminator=%u\n" + "local-diag=%s echo-active=%s\n" + "desired-min-tx=%u required-min-rx=%u\n" + "required-min-echo-rx=%u detect-mult=%u\n" + "remote-min-rx=%u remote-min-echo-rx=%u\n" + "remote-demand=%s poll-state=%s\n" + "auth: local-seq-num=%u remote-seq-num=%u\n" + " is-delayed=%s\n" + " curr-key=%U\n" + " next-key=%U", + bs->bs_idx, bfd_hop_type_string (bs->hop_type), + bfd_state_string (bs->local_state), bfd_state_string (bs->remote_state), + bs->local_discr, bs->remote_discr, bfd_diag_code_string (bs->local_diag), + (bs->echo ? "yes" : "no"), bs->config_desired_min_tx_usec, + bs->config_required_min_rx_usec, 1, bs->local_detect_mult, + bs->remote_min_rx_usec, bs->remote_min_echo_rx_usec, + (bs->remote_demand ? "yes" : "no"), bfd_poll_state_string (bs->poll_state), + bs->auth.local_seq_number, bs->auth.remote_seq_number, + (bs->auth.is_delayed ? "yes" : "no"), format_bfd_auth_key, + bs->auth.curr_key, format_bfd_auth_key, bs->auth.next_key); return s; } diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index 1d4617e1d7c..7d9253983ce 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -71,13 +71,13 @@ typedef enum /** * hop types */ -#define foreach_bfd_hop(F) \ - F (SINGLE, "single") \ - F (MULTI, "multi") \ +#define foreach_bfd_hop(F) \ + F (SINGLE) \ + F (MULTI) typedef enum { -#define F(sym, str) BFD_HOP_TYPE_##sym, +#define F(sym) BFD_HOP_TYPE_##sym, foreach_bfd_hop (F) #undef F } bfd_hop_type_e; @@ -318,6 +318,12 @@ typedef struct /** vector of callback notification functions */ bfd_notify_fn_t *listeners; + /** + * true if multihop support is enabled so sw_if_index of ~0 + * represents a multihop session + */ + bool multihop_enabled; + /** log class */ vlib_log_class_t log_class; @@ -449,6 +455,7 @@ vnet_api_error_t bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, u32 bfd_nsec_to_usec (u64 nsec); const char *bfd_poll_state_string (bfd_poll_state_e state); +const char *bfd_hop_type_string (bfd_hop_type_e state); #define USEC_PER_MS (1000LL) #define MSEC_PER_SEC (1000LL) @@ -482,6 +489,8 @@ typedef enum BFD_TX_IP6_REWRITE, BFD_TX_IP4_MIDCHAIN, BFD_TX_IP6_MIDCHAIN, + BFD_TX_IP4_LOOKUP, + BFD_TX_IP6_LOOKUP, BFD_TX_N_NEXT, } bfd_tx_next_t; diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index ec42cda1bc4..6d3202cc55c 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -64,12 +64,18 @@ typedef struct u32 echo_source_sw_if_index; /* log class */ vlib_log_class_t log_class; - /* number of active udp4 sessions */ - u32 udp4_sessions_count; - u32 udp4_sessions_count_stat_seg_entry; - /* number of active udp6 sessions */ - u32 udp6_sessions_count; - u32 udp6_sessions_count_stat_seg_entry; + /* number of active udp4 single-hop sessions */ + u32 udp4_sh_sessions_count; + u32 udp4_sh_sessions_count_stat_seg_entry; + /* number of active udp6 single-hop sessions */ + u32 udp6_sh_sessions_count; + u32 udp6_sh_sessions_count_stat_seg_entry; + /* number of active udp4 multi-hop sessions */ + u32 udp4_mh_sessions_count; + u32 udp4_mh_sessions_count_stat_seg_entry; + /* number of active udp6 multi-hop sessions */ + u32 udp6_mh_sessions_count; + u32 udp6_mh_sessions_count_stat_seg_entry; } bfd_udp_main_t; static vlib_node_registration_t bfd_udp4_input_node; @@ -258,8 +264,11 @@ bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, vlib_buffer_t *b = vlib_get_buffer (vm, bi); b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; - vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; - vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + if (bs->hop_type == BFD_HOP_TYPE_SINGLE) + { + vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + } vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; typedef struct @@ -290,7 +299,14 @@ bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, { headers->ip4.src_address.as_u32 = key->local_addr.ip4.as_u32; headers->ip4.dst_address.as_u32 = key->peer_addr.ip4.as_u32; - headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4_mh); + } + else + { + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + } } /* fix ip length, checksum and udp length */ @@ -313,8 +329,11 @@ bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, vlib_buffer_t *b = vlib_get_buffer (vm, bi); b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; - vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; - vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + if (bs->hop_type == BFD_HOP_TYPE_SINGLE) + { + vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + } vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; typedef struct @@ -350,7 +369,14 @@ bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, sizeof (headers->ip6.src_address)); clib_memcpy_fast (&headers->ip6.dst_address, &key->peer_addr.ip6, sizeof (headers->ip6.dst_address)); - headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6_mh); + } + else + { + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + } } /* fix ip payload length and udp length */ @@ -398,9 +424,25 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node) { vnet_main_t *vnm = vnet_get_main (); const bfd_udp_session_t *bus = &bs->udp; - ip_adjacency_t *adj = adj_get (bus->adj_index); - /* don't try to send the buffer if the interface is not up */ + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + *next_node = BFD_TX_IP4_LOOKUP; + return 1; + case BFD_TRANSPORT_UDP6: + *next_node = BFD_TX_IP6_LOOKUP; + return 1; + default: + /* drop */ + return 0; + } + } + + ip_adjacency_t *adj = adj_get (bus->adj_index); + /* For single-hop, don't try to send the buffer if the interface is not up */ if (!vnet_sw_interface_is_up (vnm, bus->key.sw_if_index)) return 0; @@ -495,7 +537,7 @@ bfd_udp_key_init (bfd_udp_key_t * key, u32 sw_if_index, const ip46_address_t * peer_addr) { clib_memset (key, 0, sizeof (*key)); - key->sw_if_index = sw_if_index; + key->sw_if_index = sw_if_index & 0xFFFF; key->local_addr.as_u64[0] = local_addr->as_u64[0]; key->local_addr.as_u64[1] = local_addr->as_u64[1]; key->peer_addr.as_u64[0] = peer_addr->as_u64[0]; @@ -503,12 +545,13 @@ bfd_udp_key_init (bfd_udp_key_t * key, u32 sw_if_index, } static vnet_api_error_t -bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum, - u32 sw_if_index, u32 desired_min_tx_usec, +bfd_udp_add_session_internal (vlib_main_t *vm, bfd_udp_main_t *bum, + bool multihop, u32 sw_if_index, + u32 desired_min_tx_usec, u32 required_min_rx_usec, u8 detect_mult, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - bfd_session_t ** bs_out) + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + bfd_session_t **bs_out) { /* get a pool entry and if we end up not needing it, give it back */ bfd_transport_e t = BFD_TRANSPORT_UDP4; @@ -536,8 +579,9 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum, return VNET_API_ERROR_BFD_EEXIST; } mhash_set (&bum->bfd_session_idx_by_bfd_key, key, bs->bs_idx, NULL); - BFD_DBG ("session created, bs_idx=%u, sw_if_index=%d, local=%U, peer=%U", - bs->bs_idx, key->sw_if_index, format_ip46_address, + BFD_DBG ("session created, bs_idx=%u, multihop=%u, sw_if_index=%d, " + "local=%U, peer=%U", + bs->bs_idx, multihop, key->sw_if_index, format_ip46_address, &key->local_addr, IP46_TYPE_ANY, format_ip46_address, &key->peer_addr, IP46_TYPE_ANY); vlib_log_info (bum->log_class, "create BFD session: %U", @@ -548,41 +592,82 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum, &key->peer_addr); if (BFD_TRANSPORT_UDP4 == t) { - bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4, - peer, key->sw_if_index); - BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) " - "returns %d", - format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index, - bus->adj_index); - ++bum->udp4_sessions_count; - bfd_udp_update_stat_segment_entry ( - bum->udp4_sessions_count_stat_seg_entry, bum->udp4_sessions_count); - if (1 == bum->udp4_sessions_count) + if (multihop) { - udp_register_dst_port (vm, UDP_DST_PORT_bfd4, - bfd_udp4_input_node.index, 1); - udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo4, - bfd_udp_echo4_input_node.index, 1); + ++bum->udp4_mh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp4_mh_sessions_count_stat_seg_entry, + bum->udp4_mh_sessions_count); + if (1 == bum->udp4_mh_sessions_count) + { + udp_register_dst_port (vm, UDP_DST_PORT_bfd4_mh, + bfd_udp4_input_node.index, 1); + } + } + else + { + bus->adj_index = adj_nbr_add_or_lock ( + FIB_PROTOCOL_IP4, VNET_LINK_IP4, peer, key->sw_if_index); + BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, " + " %U, %d) returns %d", + format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index, + bus->adj_index); + ++bum->udp4_sh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp4_sh_sessions_count_stat_seg_entry, + bum->udp4_sh_sessions_count); + if (1 == bum->udp4_sh_sessions_count) + { + udp_register_dst_port (vm, UDP_DST_PORT_bfd4, + bfd_udp4_input_node.index, 1); + udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo4, + bfd_udp_echo4_input_node.index, 1); + } } } else { - bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, - peer, key->sw_if_index); - BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, VNET_LINK_IP6, %U, %d) " - "returns %d", - format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index, - bus->adj_index); - ++bum->udp6_sessions_count; - bfd_udp_update_stat_segment_entry ( - bum->udp6_sessions_count_stat_seg_entry, bum->udp6_sessions_count); - if (1 == bum->udp6_sessions_count) + if (multihop) { - udp_register_dst_port (vm, UDP_DST_PORT_bfd6, - bfd_udp6_input_node.index, 0); - udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo6, - bfd_udp_echo6_input_node.index, 0); + ++bum->udp6_mh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp6_mh_sessions_count_stat_seg_entry, + bum->udp6_mh_sessions_count); + if (1 == bum->udp6_mh_sessions_count) + { + udp_register_dst_port (vm, UDP_DST_PORT_bfd6_mh, + bfd_udp6_input_node.index, 0); + } } + else + { + bus->adj_index = adj_nbr_add_or_lock ( + FIB_PROTOCOL_IP6, VNET_LINK_IP6, peer, key->sw_if_index); + BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, VNET_LINK_IP6, " + "%U, %d) returns %d", + format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index, + bus->adj_index); + ++bum->udp6_sh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp6_sh_sessions_count_stat_seg_entry, + bum->udp6_sh_sessions_count); + if (1 == bum->udp6_sh_sessions_count) + { + udp_register_dst_port (vm, UDP_DST_PORT_bfd6, + bfd_udp6_input_node.index, 0); + udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo6, + bfd_udp_echo6_input_node.index, 0); + } + } + } + + if (multihop) + { + bs->hop_type = BFD_HOP_TYPE_MULTI; + } + else + { + bs->hop_type = BFD_HOP_TYPE_SINGLE; } *bs_out = bs; return bfd_session_set_params (bum->bfd_main, bs, desired_min_tx_usec, @@ -590,20 +675,24 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum, } static vnet_api_error_t -bfd_udp_validate_api_input (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr) +bfd_udp_validate_api_input (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr) { bfd_udp_main_t *bum = &bfd_udp_main; - vnet_sw_interface_t *sw_if = - vnet_get_sw_interface_or_null (bfd_udp_main.vnet_main, sw_if_index); - if (!sw_if) + if (!multihop) { - vlib_log_err (bum->log_class, - "got NULL sw_if when getting interface by index %u", - sw_if_index); - return VNET_API_ERROR_INVALID_SW_IF_INDEX; + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface_or_null (bfd_udp_main.vnet_main, sw_if_index); + if (!sw_if) + { + vlib_log_err (bum->log_class, + "got NULL sw_if when getting interface by index %u", + sw_if_index); + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } } + if (ip46_address_is_ip4 (local_addr)) { if (!ip46_address_is_ip4 (peer_addr)) @@ -627,13 +716,13 @@ bfd_udp_validate_api_input (u32 sw_if_index, } static vnet_api_error_t -bfd_udp_find_session_by_api_input (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - bfd_session_t ** bs_out) +bfd_udp_find_session_by_api_input (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, + bfd_session_t **bs_out) { vnet_api_error_t rv = - bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + bfd_udp_validate_api_input (multihop, sw_if_index, local_addr, peer_addr); if (!rv) { bfd_udp_main_t *bum = &bfd_udp_main; @@ -647,8 +736,9 @@ bfd_udp_find_session_by_api_input (u32 sw_if_index, else { vlib_log_err (bum->log_class, - "BFD session not found, sw_if_index=%u, local=%U, peer=%U", - sw_if_index, format_ip46_address, local_addr, + "BFD session not found, multihop=%d, sw_if_index=%u, " + "local=%U, peer=%U", + multihop, sw_if_index, format_ip46_address, local_addr, IP46_TYPE_ANY, format_ip46_address, peer_addr, IP46_TYPE_ANY); return VNET_API_ERROR_BFD_ENOENT; @@ -658,13 +748,13 @@ bfd_udp_find_session_by_api_input (u32 sw_if_index, } static vnet_api_error_t -bfd_api_verify_common (u32 sw_if_index, u32 desired_min_tx_usec, +bfd_api_verify_common (bool multihop, u32 sw_if_index, u32 desired_min_tx_usec, u8 detect_mult, const ip46_address_t *local_addr, const ip46_address_t *peer_addr) { bfd_udp_main_t *bum = &bfd_udp_main; vnet_api_error_t rv = - bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + bfd_udp_validate_api_input (multihop, sw_if_index, local_addr, peer_addr); if (rv) { return rv; @@ -693,31 +783,62 @@ bfd_udp_del_session_internal (vlib_main_t * vm, bfd_session_t * bs) switch (bs->transport) { case BFD_TRANSPORT_UDP4: - --bum->udp4_sessions_count; - bfd_udp_update_stat_segment_entry ( - bum->udp4_sessions_count_stat_seg_entry, bum->udp4_sessions_count); - if (!bum->udp4_sessions_count) + if (bs->hop_type == BFD_HOP_TYPE_MULTI) { - udp_unregister_dst_port (vm, UDP_DST_PORT_bfd4, 1); - udp_unregister_dst_port (vm, UDP_DST_PORT_bfd_echo4, 1); + --bum->udp4_mh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp4_mh_sessions_count_stat_seg_entry, + bum->udp4_mh_sessions_count); + if (!bum->udp4_mh_sessions_count) + { + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd4_mh, 1); + } + } + else + { + --bum->udp4_sh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp4_sh_sessions_count_stat_seg_entry, + bum->udp4_sh_sessions_count); + if (!bum->udp4_sh_sessions_count) + { + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd4, 1); + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd_echo4, 1); + } } break; case BFD_TRANSPORT_UDP6: - --bum->udp6_sessions_count; - bfd_udp_update_stat_segment_entry ( - bum->udp6_sessions_count_stat_seg_entry, bum->udp6_sessions_count); - if (!bum->udp6_sessions_count) + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + --bum->udp6_mh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp6_mh_sessions_count_stat_seg_entry, + bum->udp6_mh_sessions_count); + if (!bum->udp6_mh_sessions_count) + { + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd6_mh, 0); + } + } + else { - udp_unregister_dst_port (vm, UDP_DST_PORT_bfd6, 0); - udp_unregister_dst_port (vm, UDP_DST_PORT_bfd_echo6, 0); + --bum->udp6_sh_sessions_count; + bfd_udp_update_stat_segment_entry ( + bum->udp6_sh_sessions_count_stat_seg_entry, + bum->udp6_sh_sessions_count); + if (!bum->udp6_sh_sessions_count) + { + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd6, 0); + udp_unregister_dst_port (vm, UDP_DST_PORT_bfd_echo6, 0); + } } + break; } bfd_put_session (bum->bfd_main, bs); } static vnet_api_error_t -bfd_udp_add_and_start_session (u32 sw_if_index, +bfd_udp_add_and_start_session (bool multihop, u32 sw_if_index, const ip46_address_t *local_addr, const ip46_address_t *peer_addr, u32 desired_min_tx_usec, @@ -728,9 +849,10 @@ bfd_udp_add_and_start_session (u32 sw_if_index, bfd_session_t *bs = NULL; vnet_api_error_t rv; - rv = bfd_udp_add_session_internal ( - vlib_get_main (), &bfd_udp_main, sw_if_index, desired_min_tx_usec, - required_min_rx_usec, detect_mult, local_addr, peer_addr, &bs); + rv = bfd_udp_add_session_internal (vlib_get_main (), &bfd_udp_main, multihop, + sw_if_index, desired_min_tx_usec, + required_min_rx_usec, detect_mult, + local_addr, peer_addr, &bs); if (!rv && is_authenticated) { @@ -750,21 +872,22 @@ bfd_udp_add_and_start_session (u32 sw_if_index, } vnet_api_error_t -bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 desired_min_tx_usec, u32 required_min_rx_usec, - u8 detect_mult, u8 is_authenticated, u32 conf_key_id, - u8 bfd_key_id) +bfd_udp_add_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult, + u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id) { bfd_main_t *bm = &bfd_main; bfd_lock (bm); - vnet_api_error_t rv = bfd_api_verify_common ( - sw_if_index, desired_min_tx_usec, detect_mult, local_addr, peer_addr); + vnet_api_error_t rv = + bfd_api_verify_common (multihop, sw_if_index, desired_min_tx_usec, + detect_mult, local_addr, peer_addr); if (!rv) rv = bfd_udp_add_and_start_session ( - sw_if_index, local_addr, peer_addr, desired_min_tx_usec, + multihop, sw_if_index, local_addr, peer_addr, desired_min_tx_usec, required_min_rx_usec, detect_mult, is_authenticated, conf_key_id, bfd_key_id); @@ -773,7 +896,8 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, } vnet_api_error_t -bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr, +bfd_udp_upd_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, const ip46_address_t *peer_addr, u32 desired_min_tx_usec, u32 required_min_rx_usec, u8 detect_mult, u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id) @@ -781,17 +905,18 @@ bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr, bfd_main_t *bm = &bfd_main; bfd_lock (bm); - vnet_api_error_t rv = bfd_api_verify_common ( - sw_if_index, desired_min_tx_usec, detect_mult, local_addr, peer_addr); + vnet_api_error_t rv = + bfd_api_verify_common (multihop, sw_if_index, desired_min_tx_usec, + detect_mult, local_addr, peer_addr); if (!rv) { bfd_session_t *bs = NULL; - rv = bfd_udp_find_session_by_api_input (sw_if_index, local_addr, - peer_addr, &bs); + rv = bfd_udp_find_session_by_api_input (multihop, sw_if_index, + local_addr, peer_addr, &bs); if (VNET_API_ERROR_BFD_ENOENT == rv) rv = bfd_udp_add_and_start_session ( - sw_if_index, local_addr, peer_addr, desired_min_tx_usec, + multihop, sw_if_index, local_addr, peer_addr, desired_min_tx_usec, required_min_rx_usec, detect_mult, is_authenticated, conf_key_id, bfd_key_id); else @@ -805,7 +930,8 @@ bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr, } vnet_api_error_t -bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t *local_addr, +bfd_udp_mod_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, const ip46_address_t *peer_addr, u32 desired_min_tx_usec, u32 required_min_rx_usec, u8 detect_mult) { @@ -813,9 +939,8 @@ bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t *local_addr, bfd_main_t *bm = &bfd_main; vnet_api_error_t error; bfd_lock (bm); - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -830,16 +955,15 @@ bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t *local_addr, } vnet_api_error_t -bfd_udp_del_session (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr) +bfd_udp_del_session (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr) { bfd_session_t *bs = NULL; bfd_main_t *bm = &bfd_main; bfd_lock (bm); - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -851,16 +975,15 @@ bfd_udp_del_session (u32 sw_if_index, } vnet_api_error_t -bfd_udp_session_set_flags (vlib_main_t * vm, u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, u8 admin_up_down) +bfd_udp_session_set_flags (vlib_main_t *vm, bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u8 admin_up_down) { bfd_session_t *bs = NULL; bfd_main_t *bm = &bfd_main; bfd_lock (bm); - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -872,19 +995,18 @@ bfd_udp_session_set_flags (vlib_main_t * vm, u32 sw_if_index, } vnet_api_error_t -bfd_udp_auth_activate (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 conf_key_id, u8 key_id, u8 is_delayed) +bfd_udp_auth_activate (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u32 conf_key_id, + u8 key_id, u8 is_delayed) { bfd_main_t *bm = &bfd_main; bfd_lock (bm); vnet_api_error_t error; bfd_session_t *bs = NULL; - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -896,17 +1018,16 @@ bfd_udp_auth_activate (u32 sw_if_index, } vnet_api_error_t -bfd_udp_auth_deactivate (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, u8 is_delayed) +bfd_udp_auth_deactivate (bool multihop, u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr, u8 is_delayed) { bfd_main_t *bm = &bfd_main; vnet_api_error_t error; bfd_lock (bm); bfd_session_t *bs = NULL; - vnet_api_error_t rv = - bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, - &bs); + vnet_api_error_t rv = bfd_udp_find_session_by_api_input ( + multihop, sw_if_index, local_addr, peer_addr, &bs); if (rv) { bfd_unlock (bm); @@ -985,13 +1106,19 @@ bfd_udp4_verify_transport (const ip4_header_t *ip4, const udp_header_t *udp, key->local_addr.ip4.as_u8); return BFD_UDP_ERROR_DST_MISMATCH; } - const u8 expected_ttl = 255; - if (ip4->ttl != expected_ttl) + + // For single-hop, TTL must be 255 + if (bs->hop_type == BFD_HOP_TYPE_SINGLE) { - BFD_ERR ("IPv4 unexpected TTL value %u, expected %u", ip4->ttl, - expected_ttl); - return BFD_UDP_ERROR_TTL; + const u8 expected_ttl = 255; + if (ip4->ttl != expected_ttl) + { + BFD_ERR ("IPv4 unexpected TTL value %u, expected %u", ip4->ttl, + expected_ttl); + return BFD_UDP_ERROR_TTL; + } } + if (clib_net_to_host_u16 (udp->src_port) < 49152) { BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", @@ -1062,7 +1189,14 @@ bfd_udp4_scan (vlib_main_t *vm, vlib_buffer_t *b, bfd_session_t **bs_out) { bfd_udp_key_t key; clib_memset (&key, 0, sizeof (key)); - key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + if (udp->dst_port == clib_host_to_net_u16 (UDP_DST_PORT_bfd4_mh)) + { + key.sw_if_index = ~0; + } + else + { + key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + } key.local_addr.ip4.as_u32 = ip4->dst_address.as_u32; key.peer_addr.ip4.as_u32 = ip4->src_address.as_u32; BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " @@ -1145,13 +1279,19 @@ bfd_udp6_verify_transport (const ip6_header_t *ip6, const udp_header_t *udp, &key->local_addr.ip6); return BFD_UDP_ERROR_DST_MISMATCH; } - const u8 expected_hop_limit = 255; - if (ip6->hop_limit != expected_hop_limit) + + // For single-hop, hop-limit must be 255 + if (bs->hop_type == BFD_HOP_TYPE_SINGLE) { - BFD_ERR ("IPv6 unexpected hop-limit value %u, expected %u", - ip6->hop_limit, expected_hop_limit); - return BFD_UDP_ERROR_TTL; + const u8 expected_hop_limit = 255; + if (ip6->hop_limit != expected_hop_limit) + { + BFD_ERR ("IPv6 unexpected hop-limit value %u, expected %u", + ip6->hop_limit, expected_hop_limit); + return BFD_UDP_ERROR_TTL; + } } + if (clib_net_to_host_u16 (udp->src_port) < 49152) { BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", @@ -1204,15 +1344,22 @@ bfd_udp6_scan (vlib_main_t *vm, vlib_buffer_t *b, bfd_session_t **bs_out) { bfd_udp_key_t key; clib_memset (&key, 0, sizeof (key)); - key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + if (udp->dst_port == clib_host_to_net_u16 (UDP_DST_PORT_bfd6_mh)) + { + key.sw_if_index = ~0; + } + else + { + key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + } key.local_addr.ip6.as_u64[0] = ip6->dst_address.as_u64[0]; key.local_addr.ip6.as_u64[1] = ip6->dst_address.as_u64[1]; key.peer_addr.ip6.as_u64[0] = ip6->src_address.as_u64[0]; key.peer_addr.ip6.as_u64[1] = ip6->src_address.as_u64[1]; - BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " - "peer=%U)", - key.sw_if_index, format_ip6_address, &key.local_addr, - format_ip6_address, &key.peer_addr); + BFD_DBG ("Looking up BFD session using discriminator %u", + pkt->your_disc); + bs = bfd_find_session_by_disc (bfd_udp_main.bfd_main, pkt->your_disc); + bs = bfd_lookup_session (&bfd_udp_main, &key); } if (!bs) @@ -1266,8 +1413,8 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, { u64 len; t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0)); - len = (b0->current_length < sizeof (t0->data)) ? b0->current_length - : sizeof (t0->data); + len = (b0->current_length < sizeof (t0->data)) ? b0->current_length : + sizeof (t0->data); t0->len = len; clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0), len); } @@ -1311,25 +1458,35 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_node_increment_counter (vm, bfd_udp4_input_node.index, error0, 1); } + const bfd_udp_session_t *bus = &bs->udp; - ip_adjacency_t *adj = adj_get (bus->adj_index); - switch (adj->lookup_next_index) + + if (bs->hop_type == BFD_HOP_TYPE_MULTI) { - case IP_LOOKUP_NEXT_ARP: - next0 = BFD_UDP_INPUT_NEXT_REPLY_ARP; - break; - case IP_LOOKUP_NEXT_REWRITE: next0 = BFD_UDP_INPUT_NEXT_REPLY_REWRITE; - break; - case IP_LOOKUP_NEXT_MIDCHAIN: - next0 = BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN; - break; - default: - /* drop */ - break; + } + else + { + ip_adjacency_t *adj = adj_get (bus->adj_index); + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + next0 = BFD_UDP_INPUT_NEXT_REPLY_ARP; + break; + case IP_LOOKUP_NEXT_REWRITE: + next0 = BFD_UDP_INPUT_NEXT_REPLY_REWRITE; + break; + case IP_LOOKUP_NEXT_MIDCHAIN: + next0 = BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN; + break; + default: + /* drop */ + break; + } } } } + bfd_unlock (bm); vlib_set_next_frame_buffer (vm, rt, next0, bi0); @@ -1566,6 +1723,10 @@ bfd_udp_sw_if_add_del (CLIB_UNUSED (vnet_main_t *vnm), u32 sw_if_index, { continue; } + if (bs->hop_type == BFD_HOP_TYPE_MULTI) + { + continue; + } if (bs->udp.key.sw_if_index != sw_if_index) { continue; @@ -1593,24 +1754,47 @@ clib_error_t * bfd_udp_stats_init (bfd_udp_main_t *bum) { const char *name4 = "/bfd/udp4/sessions"; - bum->udp4_sessions_count_stat_seg_entry = vlib_stats_add_gauge ("%s", name4); + bum->udp4_sh_sessions_count_stat_seg_entry = + vlib_stats_add_gauge ("%s", name4); - vlib_stats_set_gauge (bum->udp4_sessions_count_stat_seg_entry, 0); - if (~0 == bum->udp4_sessions_count_stat_seg_entry) + vlib_stats_set_gauge (bum->udp4_sh_sessions_count_stat_seg_entry, 0); + if (~0 == bum->udp4_sh_sessions_count_stat_seg_entry) { return clib_error_return ( 0, "Could not create stat segment entry for %s", name4); } const char *name6 = "/bfd/udp6/sessions"; - bum->udp6_sessions_count_stat_seg_entry = vlib_stats_add_gauge ("%s", name6); + bum->udp6_sh_sessions_count_stat_seg_entry = + vlib_stats_add_gauge ("%s", name6); - vlib_stats_set_gauge (bum->udp6_sessions_count_stat_seg_entry, 0); - if (~0 == bum->udp6_sessions_count_stat_seg_entry) + vlib_stats_set_gauge (bum->udp6_sh_sessions_count_stat_seg_entry, 0); + if (~0 == bum->udp6_sh_sessions_count_stat_seg_entry) { return clib_error_return ( 0, "Could not create stat segment entry for %s", name6); } + const char *name4_mh = "/bfd/udp4/sessions_mh"; + bum->udp4_mh_sessions_count_stat_seg_entry = + vlib_stats_add_gauge ("%s", name4_mh); + + vlib_stats_set_gauge (bum->udp4_mh_sessions_count_stat_seg_entry, 0); + if (~0 == bum->udp4_mh_sessions_count_stat_seg_entry) + { + return clib_error_return ( + 0, "Could not create stat segment entry for %s", name4_mh); + } + const char *name6_mh = "/bfd/udp6/sessions_mh"; + bum->udp6_mh_sessions_count_stat_seg_entry = + vlib_stats_add_gauge ("%s", name6_mh); + + vlib_stats_set_gauge (bum->udp6_mh_sessions_count_stat_seg_entry, 0); + if (~0 == bum->udp6_mh_sessions_count_stat_seg_entry) + { + return clib_error_return ( + 0, "Could not create stat segment entry for %s", name6_mh); + } + return 0; } @@ -1620,8 +1804,10 @@ bfd_udp_stats_init (bfd_udp_main_t *bum) static clib_error_t * bfd_udp_init (vlib_main_t * vm) { - bfd_udp_main.udp4_sessions_count = 0; - bfd_udp_main.udp6_sessions_count = 0; + bfd_udp_main.udp4_sh_sessions_count = 0; + bfd_udp_main.udp6_sh_sessions_count = 0; + bfd_udp_main.udp4_mh_sessions_count = 0; + bfd_udp_main.udp6_mh_sessions_count = 0; mhash_init (&bfd_udp_main.bfd_session_idx_by_bfd_key, sizeof (uword), sizeof (bfd_udp_key_t)); bfd_udp_main.bfd_main = &bfd_main; diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index 8f4bfee2bd7..362e9541dfe 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -26,12 +26,10 @@ /** identifier of BFD session based on UDP transport only */ typedef CLIB_PACKED (struct { - union { - /** interface to which the session is tied - single-hop */ - u32 sw_if_index; - /** the FIB index the peer is in - multi-hop*/ - u32 fib_index; - }; + /** interface to which the session is tied - single-hop */ + u16 sw_if_index; + /** the FIB index the peer is in - multi-hop*/ + u16 fib_index; /** local address */ ip46_address_t local_addr; /** peer address */ diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 2f34aa4b5fc..247af56f403 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -219,16 +219,12 @@ typedef struct struct { /* input variables */ - struct - { - u32 next_index; /* index of next node - used by custom apps */ - u32 error_next_index; /* index of next node if error - used by custom apps */ - }; + u32 next_index; /* index of next node - used by custom apps */ + u32 error_next_index; /* index of next node if error - used by + custom apps */ + u8 _save_rewrite_length; /* handoff variables */ - struct - { - u16 owner_thread_index; - }; + u16 owner_thread_index; }; /* output variables */ struct @@ -245,7 +241,8 @@ typedef struct u8 ip_proto; /* protocol in ip header */ u8 icmp_type_or_tcp_flags; u8 is_non_first_fragment : 1; - u8 l4_layer_truncated : 7; + u8 l4_hdr_truncated : 1; + u8 unused : 6; u32 tcp_seq_number; }; /* full reassembly output variables */ @@ -422,25 +419,26 @@ typedef struct STATIC_ASSERT (VNET_REWRITE_TOTAL_BYTES <= VLIB_BUFFER_PRE_DATA_SIZE, "VNET_REWRITE_TOTAL_BYTES too big"); -STATIC_ASSERT (STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) - == STRUCT_SIZE_OF (vnet_buffer_opaque_t, - ip.reass.save_rewrite_length) - && STRUCT_SIZE_OF (vnet_buffer_opaque_t, - ip.reass.save_rewrite_length) == - STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) - && STRUCT_SIZE_OF (vnet_buffer_opaque_t, - mpls.save_rewrite_length) == 1 - && VNET_REWRITE_TOTAL_BYTES < UINT8_MAX, - "save_rewrite_length member must be able to hold the max value of rewrite length"); - -STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) - == STRUCT_OFFSET_OF (vnet_buffer_opaque_t, - ip.reass.save_rewrite_length) - && STRUCT_OFFSET_OF (vnet_buffer_opaque_t, - mpls.save_rewrite_length) == - STRUCT_OFFSET_OF (vnet_buffer_opaque_t, - ip.reass.save_rewrite_length), - "save_rewrite_length must be aligned so that reass doesn't overwrite it"); +STATIC_ASSERT ( + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) == + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.reass.save_rewrite_length) && + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) == + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.reass._save_rewrite_length) && + STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.reass.save_rewrite_length) == + STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) && + STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) == 1 && + VNET_REWRITE_TOTAL_BYTES < UINT8_MAX, + "save_rewrite_length member must be able to hold the max value of rewrite " + "length"); + +STATIC_ASSERT ( + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) == + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.reass.save_rewrite_length) && + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) == + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.reass._save_rewrite_length) && + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) == + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.reass.save_rewrite_length), + "save_rewrite_length must be aligned so that reass doesn't overwrite it"); /* * The opaque field of the vlib_buffer_t is interpreted as a @@ -495,7 +493,22 @@ typedef struct }; } nat; - u32 unused[8]; + struct + { + /* + * Shallow virtual reassembly output values. + * Only populated if extended reassembly enabled via + * ipX_sv_reass_enable_disable_extended(). + */ + struct + { + u32 thread_index; + u32 pool_index; + u32 id; + } reass; + } ip; + + u32 unused[5]; } vnet_buffer_opaque2_t; #define vnet_buffer2(b) ((vnet_buffer_opaque2_t *) (b)->opaque2) diff --git a/src/vnet/dev/api.c b/src/vnet/dev/api.c index 114b63d6662..d968f66c316 100644 --- a/src/vnet/dev/api.c +++ b/src/vnet/dev/api.c @@ -156,6 +156,7 @@ vnet_dev_api_create_port_if (vlib_main_t *vm, { vnet_dev_t *dev = vnet_dev_by_index (args->dev_index); vnet_dev_port_t *port = 0; + vnet_dev_port_if_create_args_t a = {}; u16 n_threads = vlib_get_n_threads (); int default_is_intr_mode; vnet_dev_rv_t rv; @@ -181,7 +182,7 @@ vnet_dev_api_create_port_if (vlib_main_t *vm, if (!port) return VNET_DEV_ERR_INVALID_DEVICE_ID; - if (port->interface_created) + if (port->interfaces) return VNET_DEV_ERR_ALREADY_EXISTS; if (args->args) @@ -202,45 +203,82 @@ vnet_dev_api_create_port_if (vlib_main_t *vm, { if (args->num_rx_queues > port->attr.max_rx_queues) return VNET_DEV_ERR_INVALID_NUM_RX_QUEUES; - port->intf.num_rx_queues = args->num_rx_queues; + a.num_rx_queues = args->num_rx_queues; } else - port->intf.num_rx_queues = clib_min (port->attr.max_tx_queues, 1); + a.num_rx_queues = clib_min (port->attr.max_tx_queues, 1); if (args->num_tx_queues) { if (args->num_tx_queues > port->attr.max_tx_queues) return VNET_DEV_ERR_INVALID_NUM_TX_QUEUES; - port->intf.num_tx_queues = args->num_tx_queues; + a.num_tx_queues = args->num_tx_queues; } else - port->intf.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads); + a.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads); if (args->rx_queue_size) { if (!_vnet_dev_queue_size_validate (args->rx_queue_size, port->rx_queue_config)) return VNET_DEV_ERR_INVALID_RX_QUEUE_SIZE; - port->intf.rxq_sz = args->rx_queue_size; + a.rxq_sz = args->rx_queue_size; } else - port->intf.rxq_sz = port->rx_queue_config.default_size; + a.rxq_sz = port->rx_queue_config.default_size; if (args->tx_queue_size) { if (!_vnet_dev_queue_size_validate (args->tx_queue_size, port->tx_queue_config)) return VNET_DEV_ERR_INVALID_TX_QUEUE_SIZE; - port->intf.txq_sz = args->tx_queue_size; + a.txq_sz = args->tx_queue_size; } else - port->intf.txq_sz = port->tx_queue_config.default_size; + a.txq_sz = port->tx_queue_config.default_size; - clib_memcpy (port->intf.name, args->intf_name, sizeof (port->intf.name)); - port->intf.default_is_intr_mode = default_is_intr_mode; + clib_memcpy (a.name, args->intf_name, sizeof (a.name)); + a.default_is_intr_mode = default_is_intr_mode; + a.consistent_qp = (args->flags.n & VNET_DEV_PORT_F_CONSISTENT_QP) != 0; - rv = vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_create); - args->sw_if_index = (rv == VNET_DEV_OK) ? port->intf.sw_if_index : ~0; + rv = vnet_dev_process_call_port_op_with_ptr (vm, port, + vnet_dev_port_if_create, &a); + args->sw_if_index = (rv == VNET_DEV_OK) ? a.sw_if_index : ~0; + + return rv; +} + +vnet_dev_rv_t +vnet_dev_api_port_add_sec_if (vlib_main_t *vm, + vnet_dev_api_port_add_sec_if_args_t *args) +{ + vnet_dev_port_t *port = 0; + vnet_dev_t *dev = 0; + vnet_dev_port_sec_if_create_args_t a = {}; + vnet_dev_rv_t rv = VNET_DEV_OK; + + port = vnet_dev_get_port_from_sw_if_index (args->primary_sw_if_index); + if (port == 0) + return VNET_DEV_ERR_NOT_FOUND; + + log_debug (dev, + "create_port_if: primary_sw_if_index %u intf_name '%s' " + "args '%v'", + args->primary_sw_if_index, args->intf_name, args->args); + + if (port->interfaces == 0) + return VNET_DEV_ERR_PRIMARY_INTERFACE_MISSING; + + clib_memcpy (a.name, args->intf_name, sizeof (a.name)); + a.args = args->args; + + rv = vnet_dev_process_call_port_op_with_ptr (vm, port, + vnet_dev_port_add_sec_if, &a); + + if (rv != VNET_DEV_OK) + args->sw_if_index = ~0; + else + args->sw_if_index = a.sw_if_index; return rv; } @@ -249,9 +287,23 @@ vnet_dev_rv_t vnet_dev_api_remove_port_if (vlib_main_t *vm, vnet_dev_api_remove_port_if_args_t *args) { + vnet_dev_port_t *port; + + port = vnet_dev_get_port_from_sw_if_index (args->sw_if_index); + + if (port == 0) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove); +} + +vnet_dev_rv_t +vnet_dev_api_port_del_sec_if (vlib_main_t *vm, + vnet_dev_api_port_del_sec_if_args_t *args) +{ vnet_dev_main_t *dm = &vnet_dev_main; vnet_main_t *vnm = vnet_get_main (); - vnet_sw_interface_t *si; + vnet_sw_interface_t *si, *sup_si; vnet_hw_interface_t *hi; vnet_dev_port_t *port; @@ -259,17 +311,26 @@ vnet_dev_api_remove_port_if (vlib_main_t *vm, if (!si) return VNET_DEV_ERR_UNKNOWN_INTERFACE; - hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index); + if (si->sup_sw_if_index == si->sw_if_index) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + sup_si = vnet_get_sw_interface_or_null (vnm, si->sup_sw_if_index); + if (!sup_si) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + hi = vnet_get_hw_interface_or_null (vnm, sup_si->hw_if_index); if (!hi) return VNET_DEV_ERR_UNKNOWN_INTERFACE; - if (pool_is_free_index (dm->ports_by_dev_instance, hi->dev_instance)) + if (pool_is_free_index (dm->dev_instances, hi->dev_instance)) return VNET_DEV_ERR_UNKNOWN_INTERFACE; port = vnet_dev_get_port_from_dev_instance (hi->dev_instance); - if (port->intf.hw_if_index != si->hw_if_index) + if (port->interfaces->primary_interface.hw_if_index != si->hw_if_index) return VNET_DEV_ERR_UNKNOWN_INTERFACE; - return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove); + return vnet_dev_process_call_port_op_with_ptr ( + vm, port, vnet_dev_port_del_sec_if, + &(vnet_dev_port_del_sec_if_args_t){ .sw_if_index = args->sw_if_index }); } diff --git a/src/vnet/dev/api.h b/src/vnet/dev/api.h index 1b7bf27d62a..3e552e4326e 100644 --- a/src/vnet/dev/api.h +++ b/src/vnet/dev/api.h @@ -65,4 +65,27 @@ vnet_dev_rv_t vnet_dev_api_remove_port_if (vlib_main_t *, vnet_dev_api_remove_port_if_args_t *); +typedef struct +{ + u32 primary_sw_if_index; + vnet_dev_if_name_t intf_name; + u8 *args; + + /* return */ + u32 sw_if_index; +} vnet_dev_api_port_add_sec_if_args_t; + +vnet_dev_rv_t +vnet_dev_api_port_add_sec_if (vlib_main_t *, + vnet_dev_api_port_add_sec_if_args_t *); + +typedef struct +{ + u32 sw_if_index; +} vnet_dev_api_port_del_sec_if_args_t; + +vnet_dev_rv_t +vnet_dev_api_port_del_sec_if (vlib_main_t *, + vnet_dev_api_port_del_sec_if_args_t *); + #endif /* _VNET_DEV_API_H_ */ diff --git a/src/vnet/dev/cli.c b/src/vnet/dev/cli.c index 53be4483183..6002a2f0dee 100644 --- a/src/vnet/dev/cli.c +++ b/src/vnet/dev/cli.c @@ -223,6 +223,94 @@ VLIB_CLI_COMMAND (device_remove_if_cmd, static) = { }; static clib_error_t * +device_create_sec_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_api_port_add_sec_if_args_t a = {}; + vnet_main_t *vnm = vnet_get_main (); + vnet_dev_rv_t rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!a.intf_name[0] && + unformat (input, "if-name %U", unformat_c_string_array, a.intf_name, + sizeof (a.intf_name))) + ; + else if (unformat (input, "primary-if-name %U", + unformat_vnet_sw_interface, vnm, + &a.primary_sw_if_index)) + ; + else if (unformat (input, "primary-sw-if-index %u", + &a.primary_sw_if_index)) + ; + else if (!a.args && unformat (input, "args %v", &a.args)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + rv = vnet_dev_api_port_add_sec_if (vm, &a); + + vec_free (a.args); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to create secondary interface: %U", + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_create_sec_if_cmd, static) = { + .path = "device create-secondary-interface", + .short_help = "device create-secondary-interface [<interface-name> | " + "sw-if-index <n>] id <n> [args <sec-if-args>]", + .function = device_create_sec_if_cmd_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * +device_remove_sec_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_dev_api_port_del_sec_if_args_t a = { .sw_if_index = ~0 }; + vnet_main_t *vnm = vnet_get_main (); + vnet_dev_rv_t rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, + &a.sw_if_index)) + ; + else if (unformat (input, "sw-if-index %u", &a.sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (a.sw_if_index == ~0) + return clib_error_return ( + 0, "please specify existing secondary interface name"); + + rv = vnet_dev_api_port_del_sec_if (vm, &a); + + if (rv != VNET_DEV_OK) + return clib_error_return (0, "unable to remove secondary interface: %U", + format_vnet_dev_rv, rv); + + return 0; +} + +VLIB_CLI_COMMAND (device_remove_sec_if_cmd, static) = { + .path = "device remove-secondary-interface", + .short_help = + "device remove-secondary-interface [<interface-name> | sw-if-index <n>]", + .function = device_remove_sec_if_cmd_fn, + .is_mp_safe = 1, +}; + +static clib_error_t * show_devices_cmd_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { @@ -300,22 +388,23 @@ show_device_counters_cmd_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_output (vm, "device '%s':", dev->device_id); foreach_vnet_dev_port (p, dev) { + vlib_cli_output (vm, " Port %u:", p->port_id); vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, p->counter_main); foreach_vnet_dev_port_rx_queue (q, p) if (q->counter_main) { - vlib_cli_output (vm, " RX queue %u:", q->queue_id); - vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, + vlib_cli_output (vm, " RX queue %u:", q->queue_id); + vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, q->counter_main); } foreach_vnet_dev_port_tx_queue (q, p) if (q->counter_main) { - vlib_cli_output (vm, " TX queue %u:", q->queue_id); - vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, + vlib_cli_output (vm, " TX queue %u:", q->queue_id); + vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa, q->counter_main); } } diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c index d02839d664f..05cfc0ad290 100644 --- a/src/vnet/dev/counters.c +++ b/src/vnet/dev/counters.c @@ -89,6 +89,8 @@ format_vnet_dev_counter_name (u8 *s, va_list *va) char *units[] = { [VNET_DEV_CTR_UNIT_BYTES] = "bytes", [VNET_DEV_CTR_UNIT_PACKETS] = "packets", + [VNET_DEV_CTR_UNIT_DESCRIPTORS] = "descriptors", + [VNET_DEV_CTR_UNIT_BUFFERS] = "buffers", }; if (c->type == VNET_DEV_CTR_TYPE_VENDOR) diff --git a/src/vnet/dev/counters.h b/src/vnet/dev/counters.h index 33d08ffbecd..411ccdfb785 100644 --- a/src/vnet/dev/counters.h +++ b/src/vnet/dev/counters.h @@ -30,6 +30,8 @@ typedef enum VNET_DEV_CTR_UNIT_NA, VNET_DEV_CTR_UNIT_BYTES, VNET_DEV_CTR_UNIT_PACKETS, + VNET_DEV_CTR_UNIT_DESCRIPTORS, + VNET_DEV_CTR_UNIT_BUFFERS, } __clib_packed vnet_dev_counter_unit_t; typedef struct vnet_dev_counter diff --git a/src/vnet/dev/dev.api b/src/vnet/dev/dev.api index 552b778949b..423d0ee8505 100644 --- a/src/vnet/dev/dev.api +++ b/src/vnet/dev/dev.api @@ -12,6 +12,7 @@ enumflag dev_flags : u32 enumflag dev_port_flags : u32 { VL_API_DEV_PORT_FLAG_INTERRUPT_MODE = 0x1, + VL_API_DEV_PORT_FLAG_CONSISTENT_QP = 0x2, }; autoendian define dev_attach diff --git a/src/vnet/dev/dev.c b/src/vnet/dev/dev.c index e04fa161ce2..7954707dd32 100644 --- a/src/vnet/dev/dev.c +++ b/src/vnet/dev/dev.c @@ -130,7 +130,7 @@ vnet_dev_deinit (vlib_main_t *vm, vnet_dev_t *dev) vnet_dev_validate (vm, dev); foreach_vnet_dev_port (p, dev) - ASSERT (p->interface_created == 0); + ASSERT (p->interfaces == 0); if (dev->ops.deinit) dev->ops.deinit (vm, dev); @@ -188,7 +188,7 @@ void vnet_dev_detach (vlib_main_t *vm, vnet_dev_t *dev) { foreach_vnet_dev_port (p, dev) - if (p->interface_created) + if (p->interfaces) vnet_dev_port_if_remove (vm, p); vnet_dev_deinit (vm, dev); vnet_dev_free (vm, dev); @@ -260,6 +260,8 @@ vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable, vnet_feature_config_main_t *cm; vnet_dev_main_t *vdm = &vnet_dev_main; vnet_dev_port_t *port; + vnet_dev_port_interface_t *intf; + vnet_dev_instance_t *di; vnet_hw_interface_t *hw; u32 current_config_index = ~0; u32 next_index = ~0; @@ -269,9 +271,18 @@ vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable, return; hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - port = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + di = vnet_dev_get_dev_instance (hw->dev_instance); - if (port == 0 || port->intf.sw_if_index != sw_if_index) + if (!di) + return; + + intf = di->is_primary_if ? + vnet_dev_port_get_primary_if (di->port) : + vnet_dev_port_get_sec_if_by_index (di->port, di->sec_if_index); + + port = di->port; + + if (port == 0 || intf->sw_if_index != sw_if_index) return; if (vnet_have_features (arc_index, sw_if_index)) @@ -281,28 +292,27 @@ vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable, vec_elt (cm->config_index_by_sw_if_index, sw_if_index); vnet_get_config_data (&cm->config_main, ¤t_config_index, &next_index, 0); - if (port->intf.feature_arc == 0 || - port->intf.rx_next_index != next_index || - port->intf.current_config_index != current_config_index) + if (intf->feature_arc == 0 || intf->rx_next_index != next_index || + intf->current_config_index != current_config_index) { - port->intf.current_config_index = current_config_index; - port->intf.rx_next_index = next_index; - port->intf.feature_arc_index = arc_index; - port->intf.feature_arc = 1; + intf->current_config_index = current_config_index; + intf->rx_next_index = next_index; + intf->feature_arc_index = arc_index; + intf->feature_arc = 1; update_runtime = 1; } } else { - if (port->intf.feature_arc) + if (intf->feature_arc) { - port->intf.current_config_index = 0; - port->intf.rx_next_index = - port->intf.redirect_to_node ? - port->intf.redirect_to_node_next_index : - vnet_dev_default_next_index_by_port_type[port->attr.type]; - port->intf.feature_arc_index = 0; - port->intf.feature_arc = 0; + intf->current_config_index = 0; + intf->rx_next_index = + intf->redirect_to_node ? + intf->redirect_to_node_next_index : + vnet_dev_default_next_index_by_port_type[port->attr.type]; + intf->feature_arc_index = 0; + intf->feature_arc = 0; update_runtime = 1; } } diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h index eb06eeba34e..f3f7563317e 100644 --- a/src/vnet/dev/dev.h +++ b/src/vnet/dev/dev.h @@ -29,7 +29,8 @@ typedef enum _ (interrupt_mode) \ _ (rss) \ _ (change_max_rx_frame_size) \ - _ (mac_filter) + _ (mac_filter) \ + _ (secondary_interfaces) #define foreach_vnet_dev_port_rx_offloads _ (ip4_cksum) @@ -104,6 +105,11 @@ typedef void (vnet_dev_rx_queue_op_no_rv_t) (vlib_main_t *, vnet_dev_rx_queue_t *); typedef void (vnet_dev_tx_queue_op_no_rv_t) (vlib_main_t *, vnet_dev_tx_queue_t *); +typedef vnet_dev_rv_t (vnet_dev_op_with_ptr_t) (vlib_main_t *, vnet_dev_t *, + void *); +typedef vnet_dev_rv_t (vnet_dev_port_op_with_ptr_t) (vlib_main_t *, + vnet_dev_port_t *, + void *); typedef u16 vnet_dev_queue_id_t; typedef u16 vnet_dev_bus_index_t; @@ -248,6 +254,8 @@ typedef struct vnet_dev_port_op_no_rv_t *deinit; vnet_dev_port_op_no_rv_t *free; vnet_dev_port_op_no_rv_t *clear_counters; + vnet_dev_port_op_with_ptr_t *add_sec_if; + vnet_dev_port_op_with_ptr_t *del_sec_if; format_function_t *format_status; format_function_t *format_flow; } vnet_dev_port_ops_t; @@ -264,30 +272,41 @@ typedef union u8 as_number; } vnet_dev_rx_queue_rt_req_t; +typedef struct +{ + vlib_buffer_template_t buffer_template; + u32 sw_if_index; + u16 next_index; + u16 sec_if_index; +} vnet_dev_rx_queue_if_rt_data_t; + typedef struct vnet_dev_rx_queue { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); vnet_dev_port_t *port; u16 rx_thread_index; u16 index; - vnet_dev_counter_main_t *counter_main; - CLIB_CACHE_LINE_ALIGN_MARK (runtime0); - vnet_dev_rx_queue_t *next_on_thread; + u16 size; u8 interrupt_mode : 1; u8 enabled : 1; u8 started : 1; u8 suspended : 1; - vnet_dev_queue_id_t queue_id; - u16 size; - u16 next_index; vnet_dev_rx_queue_rt_req_t runtime_request; + vnet_dev_counter_main_t *counter_main; + vnet_dev_rx_queue_t *next_on_thread; + vnet_dev_queue_id_t queue_id; + vnet_dev_rx_queue_if_rt_data_t **sec_if_rt_data; CLIB_CACHE_LINE_ALIGN_MARK (runtime1); - vlib_buffer_template_t buffer_template; + vnet_dev_rx_queue_if_rt_data_t if_rt_data; CLIB_CACHE_LINE_ALIGN_MARK (driver_data); u8 data[]; } vnet_dev_rx_queue_t; +#if CLIB_CACHE_LINE_BYTES > 64 +STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 2 * CLIB_CACHE_LINE_BYTES); +#else STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 3 * CLIB_CACHE_LINE_BYTES); +#endif typedef struct vnet_dev_tx_queue { @@ -309,6 +328,38 @@ typedef struct vnet_dev_tx_queue STATIC_ASSERT_SIZEOF (vnet_dev_tx_queue_t, 2 * CLIB_CACHE_LINE_BYTES); +typedef struct +{ + vnet_dev_if_name_t name; + u8 interface_created : 1; + u8 feature_arc : 1; + u8 redirect_to_node : 1; + u8 feature_arc_index; + u16 rx_next_index; + u32 index; + u32 sw_if_index; + u32 hw_if_index; + u32 dev_instance; + u32 tx_node_index; + u32 next_index; + u32 current_config_index; + u16 redirect_to_node_next_index; + u32 user_data; + vnet_dev_arg_t *args; +} vnet_dev_port_interface_t; + +typedef struct +{ + u32 rx_node_index; + u8 default_is_intr_mode : 1; + u16 num_rx_queues; + u16 num_tx_queues; + u16 txq_sz; + u16 rxq_sz; + vnet_dev_port_interface_t primary_interface; + vnet_dev_port_interface_t **secondary_interfaces; +} vnet_dev_port_interfaces_t; + typedef struct vnet_dev_port { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -319,7 +370,6 @@ typedef struct vnet_dev_port u8 started : 1; u8 link_up : 1; u8 promisc : 1; - u8 interface_created : 1; u8 rx_node_assigned : 1; vnet_dev_counter_main_t *counter_main; vnet_dev_queue_config_t rx_queue_config; @@ -334,31 +384,12 @@ typedef struct vnet_dev_port vnet_dev_tx_queue_t **tx_queues; vnet_dev_port_ops_t port_ops; vnet_dev_arg_t *args; + vnet_dev_arg_t *sec_if_args; vnet_dev_rx_queue_ops_t rx_queue_ops; vnet_dev_tx_queue_ops_t tx_queue_ops; vnet_dev_node_t rx_node; vnet_dev_node_t tx_node; - - struct - { - vnet_dev_if_name_t name; - u32 dev_instance; - u32 rx_node_index; - u32 current_config_index; - u16 rx_next_index; - u16 redirect_to_node_next_index; - u8 feature_arc_index; - u8 feature_arc : 1; - u8 redirect_to_node : 1; - u8 default_is_intr_mode : 1; - u32 tx_node_index; - u32 hw_if_index; - u32 sw_if_index; - u16 num_rx_queues; - u16 num_tx_queues; - u16 txq_sz; - u16 rxq_sz; - } intf; + vnet_dev_port_interfaces_t *interfaces; CLIB_CACHE_LINE_ALIGN_MARK (data0); u8 data[]; @@ -456,10 +487,17 @@ typedef struct typedef struct { + vnet_dev_port_t *port; + u32 sec_if_index; + u8 is_primary_if : 1; +} vnet_dev_instance_t; + +typedef struct +{ vnet_dev_bus_t *buses; vnet_dev_driver_t *drivers; vnet_dev_t **devices; - vnet_dev_port_t **ports_by_dev_instance; + vnet_dev_instance_t *dev_instances; vnet_dev_bus_registration_t *bus_registrations; vnet_dev_driver_registration_t *driver_registrations; void *runtime_temp_spaces; @@ -482,6 +520,7 @@ typedef struct vnet_dev_port_attr_t attr; vnet_dev_port_ops_t ops; vnet_dev_arg_t *args; + vnet_dev_arg_t *sec_if_args; u16 data_size; void *initial_data; } port; @@ -531,11 +570,11 @@ format_function_t format_vnet_dev_args; /* dev.c */ vnet_dev_t *vnet_dev_alloc (vlib_main_t *, vnet_dev_device_id_t, vnet_dev_driver_t *); -void vnet_dev_free (vlib_main_t *, vnet_dev_t *); -vnet_dev_rv_t vnet_dev_init (vlib_main_t *, vnet_dev_t *); -void vnet_dev_deinit (vlib_main_t *, vnet_dev_t *); -vnet_dev_rv_t vnet_dev_reset (vlib_main_t *, vnet_dev_t *); -void vnet_dev_detach (vlib_main_t *, vnet_dev_t *); +vnet_dev_op_no_rv_t vnet_dev_free; +vnet_dev_op_t vnet_dev_init; +vnet_dev_op_no_rv_t vnet_dev_deinit; +vnet_dev_op_t vnet_dev_reset; +vnet_dev_op_no_rv_t vnet_dev_detach; vnet_dev_rv_t vnet_dev_port_add (vlib_main_t *, vnet_dev_t *, vnet_dev_port_id_t, vnet_dev_port_add_args_t *); @@ -567,51 +606,84 @@ void vnet_dev_clear_hw_interface_counters (u32); void vnet_dev_set_interface_next_node (vnet_main_t *, u32, u32); /* port.c */ -vnet_dev_rv_t vnet_dev_port_start (vlib_main_t *, vnet_dev_port_t *); -vnet_dev_rv_t vnet_dev_port_start_all_rx_queues (vlib_main_t *, - vnet_dev_port_t *); -vnet_dev_rv_t vnet_dev_port_start_all_tx_queues (vlib_main_t *, - vnet_dev_port_t *); -void vnet_dev_port_stop (vlib_main_t *, vnet_dev_port_t *); -void vnet_dev_port_deinit (vlib_main_t *, vnet_dev_port_t *); -void vnet_dev_port_free (vlib_main_t *, vnet_dev_port_t *); + +typedef struct +{ + vnet_dev_if_name_t name; + u16 num_rx_queues; + u16 num_tx_queues; + u16 rxq_sz; + u16 txq_sz; + u8 default_is_intr_mode : 1; + u8 consistent_qp : 1; + + /* return */ + u32 sw_if_index; +} vnet_dev_port_if_create_args_t; + +typedef struct +{ + vnet_dev_if_name_t name; + u8 *args; + + /* return */ + u32 sw_if_index; +} vnet_dev_port_sec_if_create_args_t; + +typedef struct +{ + u32 sw_if_index; +} vnet_dev_port_del_sec_if_args_t; + +vnet_dev_port_op_t vnet_dev_port_start; +vnet_dev_port_op_t vnet_dev_port_start_all_rx_queues; +vnet_dev_port_op_t vnet_dev_port_start_all_tx_queues; +vnet_dev_port_op_no_rv_t vnet_dev_port_stop; +vnet_dev_port_op_no_rv_t vnet_dev_port_deinit; +vnet_dev_port_op_no_rv_t vnet_dev_port_free; +vnet_dev_port_op_with_ptr_t vnet_dev_port_add_sec_if; +vnet_dev_port_op_with_ptr_t vnet_dev_port_del_sec_if; + void vnet_dev_port_add_counters (vlib_main_t *, vnet_dev_port_t *, vnet_dev_counter_t *, u16); -void vnet_dev_port_free_counters (vlib_main_t *, vnet_dev_port_t *); -void vnet_dev_port_update_tx_node_runtime (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_port_op_no_rv_t vnet_dev_port_free_counters; +vnet_dev_port_op_no_rv_t vnet_dev_port_update_tx_node_runtime; void vnet_dev_port_state_change (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_state_changes_t); -void vnet_dev_port_clear_counters (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_port_op_no_rv_t vnet_dev_port_clear_counters; vnet_dev_rv_t vnet_dev_port_cfg_change_req_validate (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *); vnet_dev_rv_t vnet_dev_port_cfg_change (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *); -vnet_dev_rv_t vnet_dev_port_if_create (vlib_main_t *, vnet_dev_port_t *); -vnet_dev_rv_t vnet_dev_port_if_remove (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_port_op_with_ptr_t vnet_dev_port_if_create; +vnet_dev_port_op_t vnet_dev_port_if_remove; /* queue.c */ vnet_dev_rv_t vnet_dev_rx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16); vnet_dev_rv_t vnet_dev_tx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16); -void vnet_dev_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *); -void vnet_dev_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_rx_queue_op_no_rv_t vnet_dev_rx_queue_free; +vnet_dev_tx_queue_op_no_rv_t vnet_dev_tx_queue_free; void vnet_dev_rx_queue_add_counters (vlib_main_t *, vnet_dev_rx_queue_t *, vnet_dev_counter_t *, u16); -void vnet_dev_rx_queue_free_counters (vlib_main_t *, vnet_dev_rx_queue_t *); +vnet_dev_rx_queue_op_no_rv_t vnet_dev_rx_queue_free_counters; void vnet_dev_tx_queue_add_counters (vlib_main_t *, vnet_dev_tx_queue_t *, vnet_dev_counter_t *, u16); -void vnet_dev_tx_queue_free_counters (vlib_main_t *, vnet_dev_tx_queue_t *); -vnet_dev_rv_t vnet_dev_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *); -vnet_dev_rv_t vnet_dev_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *); -void vnet_dev_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *); -void vnet_dev_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_tx_queue_op_no_rv_t vnet_dev_tx_queue_free_counters; +vnet_dev_rx_queue_op_t vnet_dev_rx_queue_start; +vnet_dev_tx_queue_op_t vnet_dev_tx_queue_start; +vnet_dev_rx_queue_op_no_rv_t vnet_dev_rx_queue_stop; +vnet_dev_tx_queue_op_no_rv_t vnet_dev_tx_queue_stop; /* process.c */ -vnet_dev_rv_t vnet_dev_process_create (vlib_main_t *, vnet_dev_t *); +vnet_dev_op_t vnet_dev_process_create; vnet_dev_rv_t vnet_dev_process_call_op (vlib_main_t *, vnet_dev_t *, vnet_dev_op_t *); vnet_dev_rv_t vnet_dev_process_call_op_no_rv (vlib_main_t *, vnet_dev_t *, vnet_dev_op_no_rv_t *); +vnet_dev_rv_t vnet_dev_process_call_op_with_ptr (vlib_main_t *, vnet_dev_t *, + vnet_dev_op_with_ptr_t *, + void *); void vnet_dev_process_call_op_no_wait (vlib_main_t *, vnet_dev_t *, vnet_dev_op_no_rv_t *); vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *, @@ -619,12 +691,15 @@ vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *, vnet_dev_rv_t vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *, vnet_dev_port_op_no_rv_t *); +vnet_dev_rv_t +vnet_dev_process_call_port_op_with_ptr (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_op_with_ptr_t *, void *); void vnet_dev_process_call_port_op_no_wait (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_op_no_rv_t *); vnet_dev_rv_t vnet_dev_process_port_cfg_change_req (vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *); -void vnet_dev_process_quit (vlib_main_t *, vnet_dev_t *); +vnet_dev_op_no_rv_t vnet_dev_process_quit; void vnet_dev_poll_dev_add (vlib_main_t *, vnet_dev_t *, f64, vnet_dev_op_no_rv_t *); void vnet_dev_poll_dev_remove (vlib_main_t *, vnet_dev_t *, diff --git a/src/vnet/dev/dev_funcs.h b/src/vnet/dev/dev_funcs.h index 521157abbec..f47344b0cea 100644 --- a/src/vnet/dev/dev_funcs.h +++ b/src/vnet/dev/dev_funcs.h @@ -51,13 +51,33 @@ vnet_dev_get_port_by_index (vnet_dev_t *dev, u32 index) return pool_elt_at_index (dev->ports, index)[0]; } -static_always_inline vnet_dev_port_t * -vnet_dev_get_port_from_dev_instance (u32 dev_instance) +static_always_inline vnet_dev_instance_t * +vnet_dev_get_dev_instance (u32 dev_instance) { vnet_dev_main_t *dm = &vnet_dev_main; - if (pool_is_free_index (dm->ports_by_dev_instance, dev_instance)) + if (pool_is_free_index (dm->dev_instances, dev_instance)) return 0; - return pool_elt_at_index (dm->ports_by_dev_instance, dev_instance)[0]; + return pool_elt_at_index (dm->dev_instances, dev_instance); +} + +static_always_inline vnet_dev_port_interface_t * +vnet_dev_port_get_primary_if (vnet_dev_port_t *p) +{ + return &p->interfaces->primary_interface; +} + +static_always_inline vnet_dev_port_interface_t * +vnet_dev_port_get_sec_if_by_index (vnet_dev_port_t *p, u32 index) +{ + return *pool_elt_at_index (p->interfaces->secondary_interfaces, index); +} + +static_always_inline vnet_dev_port_t * +vnet_dev_get_port_from_dev_instance (u32 dev_instance) +{ + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (dev_instance); + + return di ? di->port : 0; } static_always_inline vnet_dev_port_t * @@ -68,12 +88,44 @@ vnet_dev_get_port_from_hw_if_index (u32 hw_if_index) hw = vnet_get_hw_interface (vnet_get_main (), hw_if_index); port = vnet_dev_get_port_from_dev_instance (hw->dev_instance); - if (!port || port->intf.hw_if_index != hw_if_index) + if (!port || !port->interfaces || + port->interfaces->primary_interface.hw_if_index != hw_if_index) return 0; return port; } +static_always_inline u32 +vnet_dev_get_rx_queue_if_sw_if_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->port->interfaces->primary_interface.sw_if_index; +} + +static_always_inline u32 +vnet_dev_get_rx_queue_if_hw_if_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->port->interfaces->primary_interface.hw_if_index; +} + +static_always_inline u32 +vnet_dev_get_port_rx_node_index (vnet_dev_port_t *port) +{ + return port->interfaces->rx_node_index; +} + +static_always_inline vnet_dev_port_t * +vnet_dev_get_port_from_sw_if_index (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *si; + + si = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (!si) + return 0; + + return vnet_dev_get_port_from_hw_if_index (si->hw_if_index); +} + static_always_inline vnet_dev_t * vnet_dev_by_index (u32 index) { @@ -128,12 +180,6 @@ vnet_dev_port_validate (vlib_main_t *vm, vnet_dev_port_t *port) ASSERT (vm->thread_index == 0); } -static_always_inline u32 -vnet_dev_port_get_sw_if_index (vnet_dev_port_t *port) -{ - return port->intf.sw_if_index; -} - static_always_inline vnet_dev_port_t * vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id) { @@ -144,7 +190,7 @@ vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id) } static_always_inline vnet_dev_rx_queue_t * -vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port, +vnet_dev_get_port_rx_queue_by_id (vnet_dev_port_t *port, vnet_dev_queue_id_t queue_id) { foreach_vnet_dev_port_rx_queue (q, port) @@ -154,7 +200,7 @@ vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port, } static_always_inline vnet_dev_tx_queue_t * -vnet_dev_port_get_tx_queue_by_id (vnet_dev_port_t *port, +vnet_dev_get_port_tx_queue_by_id (vnet_dev_port_t *port, vnet_dev_queue_id_t queue_id) { foreach_vnet_dev_port_tx_queue (q, port) @@ -199,10 +245,49 @@ vnet_dev_tx_queue_unlock_if_needed (vnet_dev_tx_queue_t *txq) __atomic_store_n (&txq->lock, 0, __ATOMIC_RELEASE); } +static_always_inline vnet_dev_rx_queue_if_rt_data_t * +vnet_dev_get_rx_queue_if_rt_data (vnet_dev_rx_queue_t *rxq) +{ + return &rxq->if_rt_data; +} + +static_always_inline vnet_dev_rx_queue_if_rt_data_t * +vnet_dev_get_rx_queue_sec_if_rt_data (vnet_dev_rx_queue_t *rxq, + u32 sec_if_index) +{ + return rxq->sec_if_rt_data[sec_if_index]; +} + +static_always_inline vlib_buffer_template_t +vnet_dev_get_rx_queue_if_buffer_template (vnet_dev_rx_queue_t *rxq) +{ + return rxq->if_rt_data.buffer_template; +} + +static_always_inline vlib_buffer_template_t +vnet_dev_get_rx_queue_sec_if_buffer_template (vnet_dev_rx_queue_t *rxq, + u32 sec_if_index) +{ + return rxq->sec_if_rt_data[sec_if_index]->buffer_template; +} + +static_always_inline u16 +vnet_dev_get_rx_queue_if_next_index (vnet_dev_rx_queue_t *rxq) +{ + return rxq->if_rt_data.next_index; +} + +static_always_inline u16 +vnet_dev_get_rx_queue_sec_if_next_index (vnet_dev_rx_queue_t *rxq, + u32 sec_if_index) +{ + return rxq->sec_if_rt_data[sec_if_index]->next_index; +} + static_always_inline u8 vnet_dev_get_rx_queue_buffer_pool_index (vnet_dev_rx_queue_t *rxq) { - return rxq->buffer_template.buffer_pool_index; + return rxq->if_rt_data.buffer_template.buffer_pool_index; } static_always_inline u32 @@ -237,8 +322,8 @@ static_always_inline vnet_dev_rx_queue_t * foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node, vnet_dev_rx_queue_t *rxq) { - vnet_dev_port_t *port; vnet_dev_rx_queue_rt_req_t req; + vnet_dev_port_interfaces_t *ifs; if (rxq == 0) rxq = vnet_dev_get_rx_node_runtime (node)->first_rx_queue; @@ -255,15 +340,34 @@ foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node, req.as_number = __atomic_exchange_n (&rxq->runtime_request.as_number, 0, __ATOMIC_ACQUIRE); - port = rxq->port; + ifs = rxq->port->interfaces; if (req.update_next_index) - rxq->next_index = port->intf.rx_next_index; + { + vnet_dev_port_interface_t **si = + rxq->port->interfaces->secondary_interfaces; + rxq->if_rt_data.next_index = ifs->primary_interface.rx_next_index; + vec_foreach_pointer (rtd, rxq->sec_if_rt_data) + if (rtd) + rtd->next_index = si[rtd->sec_if_index]->next_index; + } if (req.update_feature_arc) { - vlib_buffer_template_t *bt = &rxq->buffer_template; - bt->current_config_index = port->intf.current_config_index; - vnet_buffer (bt)->feature_arc_index = port->intf.feature_arc_index; + vnet_dev_port_interface_t **si = + rxq->port->interfaces->secondary_interfaces; + vlib_buffer_template_t *bt = &rxq->if_rt_data.buffer_template; + bt->current_config_index = ifs->primary_interface.current_config_index; + vnet_buffer (bt)->feature_arc_index = + ifs->primary_interface.feature_arc_index; + vec_foreach_pointer (rtd, rxq->sec_if_rt_data) + if (rtd) + { + vlib_buffer_template_t *bt = &rtd->buffer_template; + bt->current_config_index = + si[rtd->sec_if_index]->current_config_index; + vnet_buffer (bt)->feature_arc_index = + si[rtd->sec_if_index]->feature_arc_index; + } } if (req.suspend_on) diff --git a/src/vnet/dev/errors.h b/src/vnet/dev/errors.h index 6ececad12ec..243b10e698e 100644 --- a/src/vnet/dev/errors.h +++ b/src/vnet/dev/errors.h @@ -37,9 +37,12 @@ _ (TIMEOUT, "timeout") \ _ (UNKNOWN_DEVICE, "unknown device") \ _ (UNKNOWN_INTERFACE, "unknown interface") \ + _ (NOT_PRIMARY_INTERFACE, "not primary interface") \ + _ (PRIMARY_INTERFACE_MISSING, "primary interface missing") \ _ (UNSUPPORTED_CONFIG, "unsupported config") \ _ (UNSUPPORTED_DEVICE, "unsupported device") \ _ (UNSUPPORTED_DEVICE_VER, "unsupported device version") \ + _ (UNSUPPORTED_INTERFACE, "unsupported interface") \ _ (ALREADY_DONE, "already done") \ _ (NO_SUCH_INTERFACE, "no such interface") \ _ (INIT_FAILED, "init failed") diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c index f599c0f8b85..ffc4a3a70b4 100644 --- a/src/vnet/dev/format.c +++ b/src/vnet/dev/format.c @@ -44,9 +44,15 @@ u8 * format_vnet_dev_interface_name (u8 *s, va_list *args) { u32 i = va_arg (*args, u32); - vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (i); + vnet_dev_port_interface_t *si; + vnet_dev_port_t *p = di->port; + + if (di->is_primary_if) + return format (s, "%s", p->interfaces->primary_interface.name); - return format (s, "%s", port->intf.name); + si = vnet_dev_port_get_sec_if_by_index (p, di->sec_if_index); + return format (s, "%s", si->name); } u8 * @@ -138,11 +144,22 @@ format_vnet_dev_port_info (u8 *s, va_list *args) format_vnet_dev_args, port->args); s = format (s, "\n%UInterface ", format_white_space, indent); - if (port->interface_created) + if (port->interfaces) { - s = format (s, "assigned, interface name is '%U', RX node is '%U'", - format_vnet_sw_if_index_name, vnm, port->intf.sw_if_index, - format_vlib_node_name, vm, port->intf.rx_node_index); + s = format ( + s, "assigned, primary interface name is '%U', RX node is '%U'", + format_vnet_sw_if_index_name, vnm, + port->interfaces->primary_interface.sw_if_index, format_vlib_node_name, + vm, vnet_dev_get_port_rx_node_index (port)); + pool_foreach_pointer (sif, port->interfaces->secondary_interfaces) + { + s = format (s, "\n%USecondary interface '%U'", format_white_space, + indent, format_vnet_sw_if_index_name, vnm, + sif->sw_if_index); + if (sif->args) + s = format (s, "\n%U args '%U", format_white_space, indent, + format_vnet_dev_args, sif->args); + } } else s = format (s, "not assigned"); @@ -318,7 +335,7 @@ unformat_vnet_dev_port_flags (unformat_input_t *input, va_list *args) #undef _ }; u64 flag_values[] = { -#define _(b, n, d) 1ull << (b) +#define _(b, n, d) 1ull << (b), foreach_vnet_dev_port_flag #undef _ }; @@ -394,7 +411,7 @@ format_vnet_dev_port_flags (u8 *s, va_list *args) #undef _ }; u64 flag_values[] = { -#define _(b, n, d) 1ull << (b) +#define _(b, n, d) 1ull << (b), foreach_vnet_dev_port_flag #undef _ }; diff --git a/src/vnet/dev/handlers.c b/src/vnet/dev/handlers.c index 2a55affe3e3..bfacbe27c99 100644 --- a/src/vnet/dev/handlers.c +++ b/src/vnet/dev/handlers.c @@ -19,7 +19,8 @@ vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw, u32 frame_size) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hw->dev_instance); + vnet_dev_port_t *p; vnet_dev_rv_t rv; vnet_dev_port_cfg_change_req_t req = { @@ -27,6 +28,11 @@ vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw, .max_rx_frame_size = frame_size, }; + p = di->port; + + if (!di->is_primary_if) + return vnet_dev_port_err (vm, p, VNET_DEV_ERR_NOT_PRIMARY_INTERFACE, ""); + log_debug (p->dev, "size %u", frame_size); rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req); @@ -49,13 +55,17 @@ vnet_dev_port_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw, u32 flags) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hw->dev_instance); + vnet_dev_port_t *p = di->port; vnet_dev_rv_t rv; vnet_dev_port_cfg_change_req_t req = { .type = VNET_DEV_PORT_CFG_PROMISC_MODE, }; + if (!di->is_primary_if) + return ~0; + switch (flags) { case ETHERNET_INTERFACE_FLAG_DEFAULT_L3: @@ -87,13 +97,17 @@ vnet_dev_port_mac_change (vnet_hw_interface_t *hi, const u8 *old, const u8 *new) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hi->dev_instance); + vnet_dev_port_t *p = di->port; vnet_dev_rv_t rv; vnet_dev_port_cfg_change_req_t req = { .type = VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR, }; + if (!di->is_primary_if) + return vnet_dev_port_err (vm, p, VNET_DEV_ERR_NOT_PRIMARY_INTERFACE, ""); + vnet_dev_set_hw_addr_eth_mac (&req.addr, new); log_debug (p->dev, "new mac %U", format_vnet_dev_hw_addr, &req.addr); @@ -116,7 +130,8 @@ vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address, u8 is_add) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hi->dev_instance); + vnet_dev_port_t *p = di->port; vnet_dev_rv_t rv; vnet_dev_port_cfg_change_req_t req = { @@ -124,6 +139,9 @@ vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address, VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR, }; + if (!di->is_primary_if) + return vnet_dev_port_err (vm, p, VNET_DEV_ERR_NOT_PRIMARY_INTERFACE, ""); + vnet_dev_set_hw_addr_eth_mac (&req.addr, address); log_debug (p->dev, "received (addr %U is_add %u", format_vnet_dev_hw_addr, @@ -147,10 +165,19 @@ vnet_dev_flow_ops_fn (vnet_main_t *vnm, vnet_flow_dev_op_t op, u32 dev_instance, u32 flow_index, uword *private_data) { vlib_main_t *vm = vlib_get_main (); - vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (dev_instance); + vnet_dev_port_t *p; vnet_dev_port_cfg_change_req_t req; vnet_dev_rv_t rv; + if (!di) + return VNET_FLOW_ERROR_NO_SUCH_INTERFACE; + + if (di->is_primary_if) + return VNET_FLOW_ERROR_NOT_SUPPORTED; + + p = di->port; + switch (op) { case VNET_FLOW_DEV_OP_ADD_FLOW: @@ -201,10 +228,12 @@ vnet_dev_interface_set_rss_queues (vnet_main_t *vnm, vnet_hw_interface_t *hi, void vnet_dev_clear_hw_interface_counters (u32 instance) { - vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (instance); vlib_main_t *vm = vlib_get_main (); - vnet_dev_process_call_port_op_no_rv (vm, port, vnet_dev_port_clear_counters); + if (di->is_primary_if) + vnet_dev_process_call_port_op_no_rv (vm, di->port, + vnet_dev_port_clear_counters); } void @@ -213,44 +242,49 @@ vnet_dev_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index, { vlib_main_t *vm = vlib_get_main (); vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - vnet_dev_port_t *port = - vnet_dev_get_port_from_dev_instance (hw->dev_instance); + vnet_dev_instance_t *di = vnet_dev_get_dev_instance (hw->dev_instance); + vnet_dev_port_interface_t *intf; int runtime_update = 0; + if (di->is_primary_if) + intf = vnet_dev_port_get_primary_if (di->port); + else + intf = vnet_dev_port_get_sec_if_by_index (di->port, di->sec_if_index); + if (node_index == ~0) { - port->intf.redirect_to_node_next_index = 0; - if (port->intf.feature_arc == 0) + intf->redirect_to_node_next_index = 0; + if (intf->feature_arc == 0) { - port->intf.rx_next_index = - vnet_dev_default_next_index_by_port_type[port->attr.type]; + intf->rx_next_index = + vnet_dev_default_next_index_by_port_type[di->port->attr.type]; runtime_update = 1; } - port->intf.redirect_to_node = 0; + intf->redirect_to_node = 0; } else { u16 next_index = vlib_node_add_next (vlib_get_main (), port_rx_eth_node.index, node_index); - port->intf.redirect_to_node_next_index = next_index; - if (port->intf.feature_arc == 0) + intf->redirect_to_node_next_index = next_index; + if (intf->feature_arc == 0) { - port->intf.rx_next_index = next_index; + intf->rx_next_index = next_index; runtime_update = 1; } - port->intf.redirect_to_node = 1; + intf->redirect_to_node = 1; } - port->intf.rx_next_index = + intf->rx_next_index = node_index == ~0 ? - vnet_dev_default_next_index_by_port_type[port->attr.type] : - node_index; + vnet_dev_default_next_index_by_port_type[di->port->attr.type] : + node_index; if (runtime_update) { - foreach_vnet_dev_port_rx_queue (rxq, port) + foreach_vnet_dev_port_rx_queue (rxq, di->port) vnet_dev_rx_queue_rt_request ( vm, rxq, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 }); - log_debug (port->dev, "runtime update requested due to chgange in " - "reditect-to-next configuration"); + log_debug (di->port->dev, "runtime update requested due to chgange in " + "reditect-to-next configuration"); } } diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c index df7805c1ff2..fccedebdcf4 100644 --- a/src/vnet/dev/port.c +++ b/src/vnet/dev/port.c @@ -94,6 +94,7 @@ vnet_dev_port_free (vlib_main_t *vm, vnet_dev_port_t *port) pool_free (port->rx_queues); pool_free (port->tx_queues); vnet_dev_arg_free (&port->args); + vnet_dev_arg_free (&port->sec_if_args); pool_put_index (dev->ports, port->index); clib_mem_free (port); } @@ -109,11 +110,23 @@ vnet_dev_port_update_tx_node_runtime (vlib_main_t *vm, vnet_dev_port_t *port) clib_bitmap_foreach (ti, q->assigned_threads) { vlib_main_t *tvm = vlib_get_main_by_index (ti); - vlib_node_runtime_t *nr = - vlib_node_get_runtime (tvm, port->intf.tx_node_index); - vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (nr); - tnr->hw_if_index = port->intf.hw_if_index; + vlib_node_runtime_t *nr; + vnet_dev_tx_node_runtime_t *tnr; + vnet_dev_port_interfaces_t *ifs = port->interfaces; + + nr = + vlib_node_get_runtime (tvm, ifs->primary_interface.tx_node_index); + tnr = vnet_dev_get_tx_node_runtime (nr); + tnr->hw_if_index = ifs->primary_interface.hw_if_index; tnr->tx_queue = q; + + pool_foreach_pointer (sif, port->interfaces->secondary_interfaces) + { + nr = vlib_node_get_runtime (tvm, sif->tx_node_index); + tnr = vnet_dev_get_tx_node_runtime (nr); + tnr->hw_if_index = sif->hw_if_index; + tnr->tx_queue = q; + } } } } @@ -271,6 +284,11 @@ vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id, for (vnet_dev_arg_t *a = args->port.args; a->type != VNET_DEV_ARG_END; a++) vec_add1 (port->args, *a); + if (args->port.sec_if_args) + for (vnet_dev_arg_t *a = args->port.sec_if_args; + a->type != VNET_DEV_ARG_END; a++) + vec_add1 (port->sec_if_args, *a); + /* defaults out of port attributes */ port->max_rx_frame_size = args->port.attr.max_supported_rx_frame_size; port->primary_hw_addr = args->port.attr.hw_addr; @@ -371,7 +389,7 @@ vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, { if (req->all_queues == 0) { - rxq = vnet_dev_port_get_rx_queue_by_id (port, req->queue_id); + rxq = vnet_dev_get_port_rx_queue_by_id (port, req->queue_id); if (rxq == 0) return VNET_DEV_ERR_BUG; } @@ -466,25 +484,34 @@ vnet_dev_port_state_change (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_state_changes_t changes) { vnet_main_t *vnm = vnet_get_main (); + vnet_dev_port_interfaces_t *ifs = port->interfaces; vnet_dev_port_validate (vm, port); if (changes.change.link_speed) { port->speed = changes.link_speed; - if (port->interface_created) - vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index, - changes.link_speed); + if (port->interfaces) + vnet_hw_interface_set_link_speed ( + vnm, ifs->primary_interface.hw_if_index, changes.link_speed); log_debug (port->dev, "port speed changed to %u", changes.link_speed); } if (changes.change.link_state) { port->link_up = changes.link_state; - if (port->interface_created) - vnet_hw_interface_set_flags ( - vnm, port->intf.hw_if_index, - changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + if (ifs) + { + vnet_hw_interface_set_flags ( + vnm, ifs->primary_interface.hw_if_index, + changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + pool_foreach_pointer (sif, ifs->secondary_interfaces) + { + vnet_hw_interface_set_flags ( + vnm, sif->hw_if_index, + changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + } log_debug (port->dev, "port link state changed to %s", changes.link_state ? "up" : "down"); } @@ -510,18 +537,51 @@ vnet_dev_port_free_counters (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_counters_free (vm, port->counter_main); } +static void +vnet_dev_port_init_if_rt_data (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_rx_queue_if_rt_data_t *rtd, + u32 sw_if_index) +{ + vnet_dev_t *dev = port->dev; + u8 buffer_pool_index = + vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node); + vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); + + rtd->buffer_template = bp->buffer_template; + vnet_buffer (&rtd->buffer_template)->sw_if_index[VLIB_RX] = sw_if_index; + vnet_buffer (&rtd->buffer_template)->sw_if_index[VLIB_TX] = ~0; + rtd->next_index = ~0; + rtd->sw_if_index = sw_if_index; +} + vnet_dev_rv_t -vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) +vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port, void *ptr) { vnet_main_t *vnm = vnet_get_main (); u16 n_threads = vlib_get_n_threads (); vnet_dev_main_t *dm = &vnet_dev_main; vnet_dev_t *dev = port->dev; - vnet_dev_port_t **pp; + vnet_dev_port_if_create_args_t *a = ptr; + vnet_dev_port_interfaces_t *ifs = port->interfaces; + vnet_dev_instance_t *di; vnet_dev_rv_t rv; u16 ti = 0; - if (port->intf.name[0] == 0) + if (ifs) + return VNET_DEV_ERR_ALREADY_EXISTS; + + port->interfaces = ifs = + clib_mem_alloc (sizeof (vnet_dev_port_interfaces_t)); + + *(ifs) = (vnet_dev_port_interfaces_t){ + .num_rx_queues = a->num_rx_queues, + .num_tx_queues = a->num_tx_queues, + .rxq_sz = a->rxq_sz, + .txq_sz = a->txq_sz, + .default_is_intr_mode = a->default_is_intr_mode, + }; + + if (a->name[0] == 0) { u8 *s; s = format (0, "%s%u/%u", @@ -529,44 +589,47 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) port->dev->index, port->index); u32 n = vec_len (s); - if (n >= sizeof (port->intf.name)) + if (n >= sizeof (a->name)) { vec_free (s); return VNET_DEV_ERR_BUG; } - clib_memcpy (port->intf.name, s, n); - port->intf.name[n] = 0; + clib_memcpy (ifs->primary_interface.name, s, n); + ifs->primary_interface.name[n] = 0; vec_free (s); } + else + clib_memcpy (ifs->primary_interface.name, a->name, + sizeof (ifs->primary_interface.name)); log_debug ( dev, "allocating %u rx queues with size %u and %u tx queues with size %u", - port->intf.num_rx_queues, port->intf.rxq_sz, port->intf.num_tx_queues, - port->intf.txq_sz); + a->num_rx_queues, a->rxq_sz, a->num_tx_queues, a->txq_sz); - for (int i = 0; i < port->intf.num_rx_queues; i++) - if ((rv = vnet_dev_rx_queue_alloc (vm, port, port->intf.rxq_sz)) != - VNET_DEV_OK) + for (int i = 0; i < ifs->num_rx_queues; i++) + if ((rv = vnet_dev_rx_queue_alloc (vm, port, ifs->rxq_sz)) != VNET_DEV_OK) goto error; - for (u32 i = 0; i < port->intf.num_tx_queues; i++) - if ((rv = vnet_dev_tx_queue_alloc (vm, port, port->intf.txq_sz)) != - VNET_DEV_OK) + for (u32 i = 0; i < ifs->num_tx_queues; i++) + if ((rv = vnet_dev_tx_queue_alloc (vm, port, ifs->txq_sz)) != VNET_DEV_OK) goto error; foreach_vnet_dev_port_tx_queue (q, port) { - q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1); + /* if consistent_qp is enabled, we start by assigning queues to workers + * and we end with main */ + u16 real_ti = (ti + a->consistent_qp) % n_threads; + q->assigned_threads = clib_bitmap_set (q->assigned_threads, real_ti, 1); log_debug (dev, "port %u tx queue %u assigned to thread %u", - port->port_id, q->queue_id, ti); + port->port_id, q->queue_id, real_ti); if (++ti >= n_threads) break; } - /* pool of port pointers helps us to assign unique dev_instance */ - pool_get (dm->ports_by_dev_instance, pp); - port->intf.dev_instance = pp - dm->ports_by_dev_instance; - pp[0] = port; + pool_get (dm->dev_instances, di); + ifs->primary_interface.dev_instance = di - dm->dev_instances; + di->port = port; + di->is_primary_if = 1; if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET) { @@ -575,7 +638,7 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) vnet_sw_interface_t *sw; vnet_hw_interface_t *hw; vnet_hw_if_caps_t caps = 0; - u32 rx_node_index; + u32 rx_node_index, hw_if_index, sw_if_index; driver = pool_elt_at_index (dm->drivers, dev->driver_index); @@ -587,27 +650,28 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) dev_class->tx_function_n_errors = port->tx_node.n_error_counters; /* create new interface including tx and output nodes */ - port->intf.hw_if_index = vnet_eth_register_interface ( + hw_if_index = vnet_eth_register_interface ( vnm, &(vnet_eth_interface_registration_t){ .address = port->primary_hw_addr.eth_mac, .max_frame_size = port->max_rx_frame_size, .dev_class_index = driver->dev_class_index, - .dev_instance = port->intf.dev_instance, + .dev_instance = ifs->primary_interface.dev_instance, .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size, .cb.flag_change = vnet_dev_port_eth_flag_change, }); + ifs->primary_interface.hw_if_index = hw_if_index; - sw = vnet_get_hw_sw_interface (vnm, port->intf.hw_if_index); - hw = vnet_get_hw_interface (vnm, port->intf.hw_if_index); - port->intf.sw_if_index = sw->sw_if_index; + sw = vnet_get_hw_sw_interface (vnm, hw_if_index); + hw = vnet_get_hw_interface (vnm, hw_if_index); + sw_if_index = ifs->primary_interface.sw_if_index = sw->sw_if_index; vnet_hw_interface_set_flags ( - vnm, port->intf.hw_if_index, + vnm, ifs->primary_interface.hw_if_index, port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); if (port->speed) - vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index, - port->speed); + vnet_hw_interface_set_link_speed ( + vnm, ifs->primary_interface.hw_if_index, port->speed); - port->intf.tx_node_index = hw->tx_node_index; + ifs->primary_interface.tx_node_index = hw->tx_node_index; caps |= port->attr.caps.interrupt_mode ? VNET_HW_IF_CAP_INT_MODE : 0; caps |= port->attr.caps.mac_filter ? VNET_HW_IF_CAP_MAC_FILTER : 0; @@ -615,14 +679,15 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) caps |= port->attr.tx_offloads.ip4_cksum ? VNET_HW_IF_CAP_TX_CKSUM : 0; if (caps) - vnet_hw_if_set_caps (vnm, port->intf.hw_if_index, caps); + vnet_hw_if_set_caps (vnm, hw_if_index, caps); /* create / reuse rx node */ if (vec_len (dm->free_rx_node_indices)) { vlib_node_t *n; rx_node_index = vec_pop (dm->free_rx_node_indices); - vlib_node_rename (vm, rx_node_index, "%s-rx", port->intf.name); + vlib_node_rename (vm, rx_node_index, "%s-rx", + port->interfaces->primary_interface.name); n = vlib_get_node (vm, rx_node_index); n->function = vlib_node_get_preferred_node_fn_variant ( vm, port->rx_node.registrations); @@ -646,30 +711,28 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) .error_counters = port->rx_node.error_counters, .n_errors = port->rx_node.n_error_counters, }; - rx_node_index = - vlib_register_node (vm, &rx_node_reg, "%s-rx", port->intf.name); + rx_node_index = vlib_register_node (vm, &rx_node_reg, "%s-rx", + ifs->primary_interface.name); } port->rx_node_assigned = 1; - port->intf.rx_node_index = rx_node_index; - port->intf.rx_next_index = + ifs->rx_node_index = rx_node_index; + ifs->primary_interface.rx_next_index = vnet_dev_default_next_index_by_port_type[port->attr.type]; vlib_worker_thread_node_runtime_update (); log_debug (dev, "ethernet interface created, hw_if_index %u sw_if_index %u " "rx_node_index %u tx_node_index %u", - port->intf.hw_if_index, port->intf.sw_if_index, - port->intf.rx_node_index, port->intf.tx_node_index); + hw_if_index, sw_if_index, rx_node_index, + ifs->primary_interface.tx_node_index); } - port->interface_created = 1; foreach_vnet_dev_port_rx_queue (q, port) { - vnet_buffer (&q->buffer_template)->sw_if_index[VLIB_RX] = - port->intf.sw_if_index; + vnet_dev_port_init_if_rt_data (vm, port, &q->if_rt_data, + ifs->primary_interface.sw_if_index); /* poison to catch node not calling runtime update function */ - q->next_index = ~0; - q->interrupt_mode = port->intf.default_is_intr_mode; + q->interrupt_mode = ifs->default_is_intr_mode; vnet_dev_rx_queue_rt_request ( vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 }); } @@ -682,6 +745,8 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port) error: if (rv != VNET_DEV_OK) vnet_dev_port_if_remove (vm, port); + else + a->sw_if_index = ifs->primary_interface.sw_if_index; return rv; } @@ -690,6 +755,7 @@ vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port) { vnet_dev_main_t *dm = &vnet_dev_main; vnet_main_t *vnm = vnet_get_main (); + vnet_dev_port_interfaces_t *ifs = port->interfaces; vnet_dev_port_validate (vm, port); @@ -698,23 +764,22 @@ vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port) if (port->rx_node_assigned) { - vlib_node_rename (vm, port->intf.rx_node_index, "deleted-%u", - port->intf.rx_node_index); - vec_add1 (dm->free_rx_node_indices, port->intf.rx_node_index); + vlib_node_rename (vm, ifs->rx_node_index, "deleted-%u", + ifs->rx_node_index); + vec_add1 (dm->free_rx_node_indices, ifs->rx_node_index); port->rx_node_assigned = 0; } - if (port->interface_created) + if (ifs) { vlib_worker_thread_barrier_sync (vm); - vnet_delete_hw_interface (vnm, port->intf.hw_if_index); + vnet_delete_hw_interface (vnm, ifs->primary_interface.hw_if_index); vlib_worker_thread_barrier_release (vm); - pool_put_index (dm->ports_by_dev_instance, port->intf.dev_instance); - port->interface_created = 0; + pool_put_index (dm->dev_instances, ifs->primary_interface.dev_instance); + clib_mem_free (port->interfaces); + port->interfaces = 0; } - port->intf = (typeof (port->intf)){}; - if (port->port_ops.deinit) port->port_ops.deinit (vm, port); @@ -731,6 +796,171 @@ vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port) return VNET_DEV_OK; } + +vnet_dev_rv_t +vnet_dev_port_del_sec_if_internal (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_interface_t *sif) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + + if (sif && port->port_ops.add_sec_if) + rv = port->port_ops.add_sec_if (vm, port, sif); + + if (rv != VNET_DEV_OK) + return rv; + + foreach_vnet_dev_port_rx_queue (q, port) + { + vec_foreach_pointer (p, q->sec_if_rt_data) + if (p) + clib_mem_free (p); + vec_free (q->sec_if_rt_data); + } + + if (sif->interface_created) + ethernet_delete_interface (vnet_get_main (), sif->hw_if_index); + + pool_put_index (port->interfaces->secondary_interfaces, sif->index); + vnet_dev_arg_free (&sif->args); + clib_mem_free (sif); + return rv; +} + +vnet_dev_rv_t +vnet_dev_port_add_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *ptr) +{ + vnet_dev_main_t *dm = &vnet_dev_main; + vnet_dev_port_sec_if_create_args_t *a = ptr; + vnet_main_t *vnm = vnet_get_main (); + vnet_dev_t *dev = port->dev; + vnet_dev_port_interface_t *sif = 0; + vnet_dev_port_interface_t **sip; + vnet_dev_rv_t rv = VNET_DEV_OK; + + sif = clib_mem_alloc (sizeof (vnet_dev_port_interface_t)); + pool_get (port->interfaces->secondary_interfaces, sip); + *sip = sif; + + *sif = (vnet_dev_port_interface_t){ + .index = sip - port->interfaces->secondary_interfaces, + .args = vec_dup (port->sec_if_args), + }; + + clib_memcpy (sif->name, a->name, sizeof (sif->name)); + + if (sif->args) + { + rv = vnet_dev_arg_parse (vm, dev, sif->args, a->args); + if (rv != VNET_DEV_OK) + return rv; + } + + if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET) + { + vnet_device_class_t *dev_class; + vnet_dev_driver_t *driver; + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hw; + vnet_dev_instance_t *di; + vnet_hw_if_caps_t caps = 0; + + pool_get (dm->dev_instances, di); + sif->dev_instance = di - dm->dev_instances; + di->port = port; + di->sec_if_index = sip - port->interfaces->secondary_interfaces; + + driver = pool_elt_at_index (dm->drivers, dev->driver_index); + + /* hack to provide per-port tx node function */ + dev_class = vnet_get_device_class (vnm, driver->dev_class_index); + dev_class->tx_fn_registrations = port->tx_node.registrations; + dev_class->format_tx_trace = port->tx_node.format_trace; + dev_class->tx_function_error_counters = port->tx_node.error_counters; + dev_class->tx_function_n_errors = port->tx_node.n_error_counters; + + /* create new interface including tx and output nodes */ + sif->hw_if_index = vnet_eth_register_interface ( + vnm, &(vnet_eth_interface_registration_t){ + .address = port->primary_hw_addr.eth_mac, + .max_frame_size = port->max_rx_frame_size, + .dev_class_index = driver->dev_class_index, + .dev_instance = sif->dev_instance, + .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size, + .cb.flag_change = vnet_dev_port_eth_flag_change, + }); + + sw = vnet_get_hw_sw_interface (vnm, sif->hw_if_index); + hw = vnet_get_hw_interface (vnm, sif->hw_if_index); + sif->sw_if_index = sw->sw_if_index; + sif->next_index = + vnet_dev_default_next_index_by_port_type[port->attr.type]; + sif->interface_created = 1; + vnet_dev_port_update_tx_node_runtime (vm, port); + vnet_hw_interface_set_flags ( + vnm, sif->hw_if_index, + port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + if (port->speed) + vnet_hw_interface_set_link_speed (vnm, sif->hw_if_index, port->speed); + + sif->tx_node_index = hw->tx_node_index; + + caps |= port->attr.caps.interrupt_mode ? VNET_HW_IF_CAP_INT_MODE : 0; + caps |= port->attr.caps.mac_filter ? VNET_HW_IF_CAP_MAC_FILTER : 0; + caps |= port->attr.tx_offloads.tcp_gso ? VNET_HW_IF_CAP_TCP_GSO : 0; + caps |= port->attr.tx_offloads.ip4_cksum ? VNET_HW_IF_CAP_TX_CKSUM : 0; + + if (caps) + vnet_hw_if_set_caps (vnm, sif->hw_if_index, caps); + } + else + return VNET_DEV_ERR_NOT_SUPPORTED; + + foreach_vnet_dev_port_rx_queue (q, port) + { + vnet_dev_rx_queue_if_rt_data_t *rtd; + vec_validate (q->sec_if_rt_data, sif->index); + + rtd = clib_mem_alloc_aligned (sizeof (vnet_dev_rx_queue_if_rt_data_t), + CLIB_CACHE_LINE_BYTES); + + q->sec_if_rt_data[sif->index] = rtd; + + vnet_dev_port_init_if_rt_data (vm, port, rtd, sif->sw_if_index); + vnet_dev_rx_queue_rt_request ( + vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 }); + } + + if (sif && port->port_ops.add_sec_if) + rv = port->port_ops.add_sec_if (vm, port, sif); + + if (rv != VNET_DEV_OK) + vnet_dev_port_del_sec_if_internal (vm, port, sif); + + return rv; +} + +vnet_dev_rv_t +vnet_dev_port_del_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *ptr) +{ + vnet_dev_port_del_sec_if_args_t *a = ptr; + vnet_sw_interface_t *si; + vnet_hw_interface_t *hi; + vnet_dev_instance_t *di; + vnet_main_t *vnm = vnet_get_main (); + + log_debug (port->dev, "%u", a->sw_if_index); + + si = vnet_get_sw_interface_or_null (vnm, a->sw_if_index); + if (!si) + return VNET_DEV_ERR_UNKNOWN_INTERFACE; + + hi = vnet_get_hw_interface (vnm, si->hw_if_index); + di = vnet_dev_get_dev_instance (hi->dev_instance); + + return vnet_dev_port_del_sec_if_internal ( + vm, port, vnet_dev_port_get_sec_if_by_index (port, di->sec_if_index)); +} + void vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) { diff --git a/src/vnet/dev/process.c b/src/vnet/dev/process.c index 3d3b49c01b6..4df56296444 100644 --- a/src/vnet/dev/process.c +++ b/src/vnet/dev/process.c @@ -20,9 +20,11 @@ typedef enum VNET_DEV_EVENT_PROCESS_QUIT, VNET_DEV_EVENT_CALL_OP, VNET_DEV_EVENT_CALL_OP_NO_RV, + VNET_DEV_EVENT_CALL_OP_WITH_PTR, VNET_DEV_EVENT_CALL_OP_NO_WAIT, VNET_DEV_EVENT_CALL_PORT_OP, VNET_DEV_EVENT_CALL_PORT_OP_NO_RV, + VNET_DEV_EVENT_CALL_PORT_OP_WITH_PTR, VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT, VNET_DEV_EVENT_CLOCK = ~0 } __clib_packed vnet_dev_event_t; @@ -51,6 +53,11 @@ typedef struct } call_op_no_rv; struct { + vnet_dev_op_with_ptr_t *op; + void *ptr; + } call_op_with_ptr; + struct + { vnet_dev_op_no_rv_t *op; } call_op_no_wait; struct @@ -65,6 +72,12 @@ typedef struct } call_port_op_no_rv; struct { + vnet_dev_port_op_with_ptr_t *op; + vnet_dev_port_t *port; + void *ptr; + } call_port_op_with_ptr; + struct + { vnet_dev_port_op_no_rv_t *op; vnet_dev_port_t *port; } call_port_op_no_wait; @@ -132,6 +145,10 @@ vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev, ev_log_debug (vm, dev, ed, "call op no rv"); ed->call_op_no_rv.op (vm, dev); break; + case VNET_DEV_EVENT_CALL_OP_WITH_PTR: + ev_log_debug (vm, dev, ed, "call op woth ptr"); + rv = ed->call_op_with_ptr.op (vm, dev, ed->call_op_with_ptr.ptr); + break; case VNET_DEV_EVENT_CALL_OP_NO_WAIT: ev_log_debug (vm, dev, ed, "call op no wait"); ed->call_op_no_wait.op (vm, dev); @@ -144,6 +161,11 @@ vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev, ev_log_debug (vm, dev, ed, "call port op no rv"); ed->call_port_op_no_rv.op (vm, ed->call_port_op_no_rv.port); break; + case VNET_DEV_EVENT_CALL_PORT_OP_WITH_PTR: + ev_log_debug (vm, dev, ed, "call port op woth ptr"); + rv = ed->call_port_op_with_ptr.op (vm, ed->call_port_op_with_ptr.port, + ed->call_port_op_with_ptr.ptr); + break; case VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT: ev_log_debug (vm, dev, ed, "call port op no wait"); ed->call_port_op_no_wait.op (vm, ed->call_port_op_no_wait.port); @@ -477,6 +499,19 @@ vnet_dev_process_call_op_no_rv (vlib_main_t *vm, vnet_dev_t *dev, return vnet_dev_process_event_send_and_wait (vm, dev, ed); } +vnet_dev_rv_t +vnet_dev_process_call_op_with_ptr (vlib_main_t *vm, vnet_dev_t *dev, + vnet_dev_op_with_ptr_t *op, void *p) +{ + vnet_dev_event_data_t *ed = vnet_dev_event_data_alloc (vm, dev); + *ed = (vnet_dev_event_data_t){ + .event = VNET_DEV_EVENT_CALL_OP_WITH_PTR, + .call_op_with_ptr = { .op = op, .ptr = p }, + }; + + return vnet_dev_process_event_send_and_wait (vm, dev, ed); +} + void vnet_dev_process_call_op_no_wait (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_op_no_rv_t *op) @@ -516,6 +551,20 @@ vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *port, return vnet_dev_process_event_send_and_wait (vm, port->dev, ed); } +vnet_dev_rv_t +vnet_dev_process_call_port_op_with_ptr (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_op_with_ptr_t *op, + void *p) +{ + vnet_dev_event_data_t *ed = vnet_dev_event_data_alloc (vm, port->dev); + *ed = (vnet_dev_event_data_t){ + .event = VNET_DEV_EVENT_CALL_PORT_OP_WITH_PTR, + .call_port_op_with_ptr = { .op = op, .port = port, .ptr = p }, + }; + + return vnet_dev_process_event_send_and_wait (vm, port->dev, ed); +} + void vnet_dev_process_call_port_op_no_wait (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_op_no_rv_t *op) diff --git a/src/vnet/dev/queue.c b/src/vnet/dev/queue.c index 9a016a626fb..57ed3dcae3b 100644 --- a/src/vnet/dev/queue.c +++ b/src/vnet/dev/queue.c @@ -36,7 +36,6 @@ vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_t *dev = port->dev; vnet_dev_rv_t rv = VNET_DEV_OK; u16 n_threads = vlib_get_n_threads (); - u8 buffer_pool_index; vnet_dev_port_validate (vm, port); @@ -65,15 +64,6 @@ vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port, dm->next_rx_queue_thread = 1; } - buffer_pool_index = - vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node); - vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); - - rxq->buffer_template = bp->buffer_template; - vnet_buffer (&rxq->buffer_template)->sw_if_index[VLIB_TX] = ~0; - - rxq->next_index = vnet_dev_default_next_index_by_port_type[port->attr.type]; - if (port->rx_queue_ops.alloc) rv = port->rx_queue_ops.alloc (vm, rxq); @@ -107,7 +97,7 @@ vnet_dev_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) { if (rxq->port->rx_queue_ops.stop) rxq->port->rx_queue_ops.stop (vm, rxq); - vlib_node_set_state (vm, rxq->port->intf.rx_node_index, + vlib_node_set_state (vm, vnet_dev_get_port_rx_node_index (rxq->port), VLIB_NODE_STATE_DISABLED); rxq->started = 0; } diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c index 79c55cfbd53..944c3ef32fa 100644 --- a/src/vnet/dev/runtime.c +++ b/src/vnet/dev/runtime.c @@ -23,7 +23,7 @@ _vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op) vnet_dev_rx_queue_t *previous = 0, *first = 0; vnet_dev_rx_node_runtime_t *rtd; vlib_node_state_t state = VLIB_NODE_STATE_DISABLED; - u32 node_index = port->intf.rx_node_index; + u32 node_index = vnet_dev_get_port_rx_node_index (port); rtd = vlib_node_get_runtime_data (vm, node_index); @@ -47,7 +47,7 @@ _vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op) } rtd->first_rx_queue = first; - vlib_node_set_state (vm, port->intf.rx_node_index, state); + vlib_node_set_state (vm, node_index, state); __atomic_store_n (&op->completed, 1, __ATOMIC_RELEASE); } diff --git a/src/vnet/dev/types.h b/src/vnet/dev/types.h index 006d18e5bc5..24799ac8138 100644 --- a/src/vnet/dev/types.h +++ b/src/vnet/dev/types.h @@ -50,7 +50,8 @@ typedef union /* do not change bit assignments - API dependency */ #define foreach_vnet_dev_port_flag \ - _ (0, INTERRUPT_MODE, "enable interrupt mode") + _ (0, INTERRUPT_MODE, "enable interrupt mode") \ + _ (1, CONSISTENT_QP, "consistent queue pairs") typedef union { diff --git a/src/vnet/devices/tap/cli.c b/src/vnet/devices/tap/cli.c index 5c676d32d60..5738ef237b6 100644 --- a/src/vnet/devices/tap/cli.c +++ b/src/vnet/devices/tap/cli.c @@ -105,6 +105,8 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.tap_flags |= TAP_FLAG_PACKED; else if (unformat (line_input, "in-order")) args.tap_flags |= TAP_FLAG_IN_ORDER; + else if (unformat (line_input, "consistent-qp")) + args.tap_flags |= TAP_FLAG_CONSISTENT_QP; else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, args.mac_addr.bytes)) args.mac_addr_set = 1; diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c index b0b0a3af13f..bb91200a525 100644 --- a/src/vnet/devices/tap/tap.c +++ b/src/vnet/devices/tap/tap.c @@ -212,6 +212,9 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) } } + if (args->tap_flags & TAP_FLAG_CONSISTENT_QP) + vif->consistent_qp = 1; + /* if namespace is specified, all further netlink messages should be executed * after we change our net namespace */ if (args->host_namespace) diff --git a/src/vnet/devices/tap/tap.h b/src/vnet/devices/tap/tap.h index 1df2fb7e1ad..66f5576c5be 100644 --- a/src/vnet/devices/tap/tap.h +++ b/src/vnet/devices/tap/tap.h @@ -22,15 +22,16 @@ #define MIN(x,y) (((x)<(y))?(x):(y)) #endif -#define foreach_tapv2_flags \ - _ (GSO, 0) \ - _ (CSUM_OFFLOAD, 1) \ - _ (PERSIST, 2) \ - _ (ATTACH, 3) \ - _ (TUN, 4) \ - _ (GRO_COALESCE, 5) \ - _ (PACKED, 6) \ - _ (IN_ORDER, 7) +#define foreach_tapv2_flags \ + _ (GSO, 0) \ + _ (CSUM_OFFLOAD, 1) \ + _ (PERSIST, 2) \ + _ (ATTACH, 3) \ + _ (TUN, 4) \ + _ (GRO_COALESCE, 5) \ + _ (PACKED, 6) \ + _ (IN_ORDER, 7) \ + _ (CONSISTENT_QP, 8) typedef enum { diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c index c4364600722..34c74ac91ac 100644 --- a/src/vnet/devices/virtio/cli.c +++ b/src/vnet/devices/virtio/cli.c @@ -64,6 +64,8 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.bind = VIRTIO_BIND_DEFAULT; else if (unformat (line_input, "rss-enabled")) args.rss_enabled = 1; + else if (unformat (line_input, "consistent-qp")) + args.virtio_flags |= VIRTIO_FLAG_CONSISTENT_QP; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c index 140cdb94153..9562bdc6369 100644 --- a/src/vnet/devices/virtio/pci.c +++ b/src/vnet/devices/virtio/pci.c @@ -1418,9 +1418,10 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) if (args->virtio_flags & VIRTIO_FLAG_PACKED) vif->is_packed = 1; - if ((error = - vlib_pci_device_open (vm, (vlib_pci_addr_t *) & vif->pci_addr, - virtio_pci_device_ids, &h))) + if (args->virtio_flags & VIRTIO_FLAG_CONSISTENT_QP) + vif->consistent_qp = 1; + if ((error = vlib_pci_device_open (vm, (vlib_pci_addr_t *) &vif->pci_addr, + virtio_pci_device_ids, &h))) { args->rv = VNET_API_ERROR_INVALID_INTERFACE; args->error = diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h index 745ad6fce87..59778533316 100644 --- a/src/vnet/devices/virtio/pci.h +++ b/src/vnet/devices/virtio/pci.h @@ -283,7 +283,8 @@ typedef struct _virtio_pci_func _ (PACKED, 3) \ _ (IN_ORDER, 4) \ _ (BUFFERING, 5) \ - _ (RSS, 6) + _ (RSS, 6) \ + _ (CONSISTENT_QP, 7) typedef enum { diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c index 840936a43ff..682ec32ceff 100644 --- a/src/vnet/devices/virtio/virtio.c +++ b/src/vnet/devices/virtio/virtio.c @@ -294,6 +294,8 @@ virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif) { vnet_main_t *vnm = vnet_get_main (); vnet_virtio_vring_t *vring; + uword n_threads = vlib_get_n_threads (); + u8 consistent = vif->consistent_qp; vec_foreach (vring, vif->txq_vrings) { @@ -308,10 +310,11 @@ virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif) return; } - for (u32 j = 0; j < vlib_get_n_threads (); j++) + for (u32 j = 0; j < n_threads; j++) { u32 qi = vif->txq_vrings[j % vif->num_txqs].queue_index; - vnet_hw_if_tx_queue_assign_thread (vnm, qi, j); + vnet_hw_if_tx_queue_assign_thread (vnm, qi, + (j + consistent) % n_threads); } vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index); diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h index a8e258884a4..fb72051ce45 100644 --- a/src/vnet/devices/virtio/virtio.h +++ b/src/vnet/devices/virtio/virtio.h @@ -213,6 +213,7 @@ typedef struct }; const virtio_pci_func_t *virtio_pci_func; int is_packed; + u8 consistent_qp : 1; } virtio_if_t; typedef struct diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h index 57c2b6ff78b..d356fd5411c 100644 --- a/src/vnet/ip/ip4_to_ip6.h +++ b/src/vnet/ip/ip4_to_ip6.h @@ -46,10 +46,9 @@ static u8 icmp_to_icmp6_updater_pointer_table[] = * @returns Port number on success, 0 otherwise. */ always_inline u16 -ip4_get_port (ip4_header_t * ip, u8 sender) +ip4_get_port (ip4_header_t *ip, u8 sender) { - if (ip->ip_version_and_header_length != 0x45 || - ip4_get_fragment_offset (ip)) + if (ip->ip_version_and_header_length != 0x45 || ip4_get_fragment_offset (ip)) return 0; if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || @@ -65,7 +64,15 @@ ip4_get_port (ip4_header_t * ip, u8 sender) { return *((u16 *) (icmp + 1)); } - else if (clib_net_to_host_u16 (ip->length) >= 64) + /* + * Minimum length here consists of: + * - outer IP header length + * - outer ICMP header length (2*sizeof (icmp46_header_t)) + * - inner IP header length + * - first 8 bytes of payload of original packet in case of ICMP error + */ + else if (clib_net_to_host_u16 (ip->length) >= + 2 * sizeof (ip4_header_t) + 2 * sizeof (icmp46_header_t) + 8) { ip = (ip4_header_t *) (icmp + 2); if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h index 29d5718d4da..ebabcd0b797 100644 --- a/src/vnet/ip/ip6_to_ip4.h +++ b/src/vnet/ip/ip6_to_ip4.h @@ -96,10 +96,10 @@ ip6_parse (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, u32 buff_len, * @returns 1 on success, 0 otherwise. */ always_inline u16 -ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6, - u16 buffer_len, u8 * ip_protocol, u16 * src_port, - u16 * dst_port, u8 * icmp_type_or_tcp_flags, - u32 * tcp_ack_number, u32 * tcp_seq_number) +ip6_get_port (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, + u16 buffer_len, u8 *ip_protocol, u16 *src_port, u16 *dst_port, + u8 *icmp_type_or_tcp_flags, u32 *tcp_ack_number, + u32 *tcp_seq_number, void **l4_hdr) { u8 l4_protocol; u16 l4_offset; @@ -120,8 +120,19 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6, *ip_protocol = l4_protocol; } l4 = u8_ptr_add (ip6, l4_offset); + if (l4_hdr) + *l4_hdr = l4; if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP) { + if ((IP_PROTOCOL_UDP == l4_protocol && + u8_ptr_add (l4, sizeof (udp_header_t)) > + u8_ptr_add (vlib_buffer_get_current (b), b->current_length)) || + (IP_PROTOCOL_TCP == l4_protocol && + u8_ptr_add (l4, sizeof (tcp_header_t)) > + u8_ptr_add (vlib_buffer_get_current (b), b->current_length))) + { + return 0; + } if (src_port) *src_port = ((udp_header_t *) (l4))->src_port; if (dst_port) @@ -135,6 +146,11 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6, } else if (l4_protocol == IP_PROTOCOL_ICMP6) { + if (u8_ptr_add (l4, sizeof (icmp46_header_t)) > + u8_ptr_add (vlib_buffer_get_current (b), b->current_length)) + { + return 0; + } icmp46_header_t *icmp = (icmp46_header_t *) (l4); if (icmp_type_or_tcp_flags) *icmp_type_or_tcp_flags = ((icmp46_header_t *) (l4))->type; diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 5ced88fec2e..1f025fa1113 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -1924,7 +1924,7 @@ vl_api_ip_local_reass_get_t_handler (vl_api_ip_local_reass_get_t *mp) { vl_api_ip_local_reass_get_reply_t *rmp; int rv = 0; - REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET, { + REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET_REPLY, { rmp->ip4_is_enabled = ip4_local_full_reass_enabled (); rmp->ip6_is_enabled = ip6_local_full_reass_enabled (); }); diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c index 7c3c2fff217..50b4b22eb60 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.c +++ b/src/vnet/ip/reass/ip4_sv_reass.c @@ -28,12 +28,13 @@ #include <vppinfra/bihash_16_8.h> #include <vnet/ip/reass/ip4_sv_reass.h> -#define MSEC_PER_SEC 1000 +#define MSEC_PER_SEC 1000 #define IP4_SV_REASS_TIMEOUT_DEFAULT_MS 100 -#define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default -#define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024 +#define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS \ + 10000 // 10 seconds default +#define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024 #define IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 -#define IP4_SV_REASS_HT_LOAD_FACTOR (0.75) +#define IP4_SV_REASS_HT_LOAD_FACTOR (0.75) typedef enum { @@ -94,17 +95,23 @@ typedef struct // buffer indexes of buffers in this reassembly in chronological order - // including overlaps and duplicate fragments u32 *cached_buffers; - // set to true when this reassembly is completed - bool is_complete; - // ip protocol + + bool first_fragment_seen; + bool last_fragment_seen; + + // vnet_buffer data u8 ip_proto; u8 icmp_type_or_tcp_flags; u32 tcp_ack_number; u32 tcp_seq_number; - // l4 src port u16 l4_src_port; - // l4 dst port u16 l4_dst_port; + + // vnet_buffer2 data + u32 total_ip_payload_length; + u32 first_fragment_total_ip_header_length; + u32 first_fragment_clone_bi; + u32 next_index; // lru indexes u32 lru_prev; @@ -114,13 +121,11 @@ typedef struct typedef struct { ip4_sv_reass_t *pool; - u32 reass_n; u32 id_counter; clib_spinlock_t lock; // lru indexes u32 lru_first; u32 lru_last; - } ip4_sv_reass_per_thread_t; typedef struct @@ -143,13 +148,12 @@ typedef struct vlib_main_t *vlib_main; vnet_main_t *vnet_main; - // node index of ip4-drop node - u32 ip4_drop_idx; u32 ip4_sv_reass_expire_node_idx; /** Worker handoff */ u32 fq_index; u32 fq_feature_index; + u32 fq_output_feature_index; u32 fq_custom_context_index; // reference count for enabling/disabling feature - per interface @@ -158,6 +162,8 @@ typedef struct // reference count for enabling/disabling feature - per interface u32 *output_feature_use_refcount_per_intf; + // extended reassembly refcount - see ip4_sv_reass_enable_disable_extended() + u32 extended_refcount; } ip4_sv_reass_main_t; extern ip4_sv_reass_main_t ip4_sv_reass_main; @@ -177,9 +183,15 @@ typedef enum typedef enum { REASS_FRAGMENT_CACHE, - REASS_FINISH, + REASS_FIRST_FRAG, + REASS_LAST_FRAG, REASS_FRAGMENT_FORWARD, REASS_PASSTHROUGH, + REASS_HANDOFF, + REASS_KEY, + REASS_FREE_TIMEOUT, + REASS_FREE_LRU, + REASS_FREE_ERROR, } ip4_sv_reass_trace_operation_e; typedef struct @@ -190,19 +202,23 @@ typedef struct u8 ip_proto; u16 l4_src_port; u16 l4_dst_port; - int l4_layer_truncated; + int l4_hdr_truncated; + u32 handoff_thread_index; + clib_bihash_kv_16_8_t kv; } ip4_sv_reass_trace_t; extern vlib_node_registration_t ip4_sv_reass_node; extern vlib_node_registration_t ip4_sv_reass_node_feature; static u8 * -format_ip4_sv_reass_trace (u8 * s, va_list * args) +format_ip4_sv_reass_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *); - if (REASS_PASSTHROUGH != t->action) + if (REASS_PASSTHROUGH != t->action && REASS_HANDOFF != t->action && + REASS_KEY != t->action && REASS_FREE_TIMEOUT != t->action && + REASS_FREE_LRU != t->action && REASS_FREE_ERROR != t->action) { s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); } @@ -211,25 +227,42 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args) case REASS_FRAGMENT_CACHE: s = format (s, "[cached]"); break; - case REASS_FINISH: + case REASS_FIRST_FRAG: s = - format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]", + format (s, "[first-frag-seen, ip proto=%u, src_port=%u, dst_port=%u]", t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), clib_net_to_host_u16 (t->l4_dst_port)); break; + case REASS_LAST_FRAG: + s = format (s, "[last-frag-seen]"); + break; + case REASS_HANDOFF: + s = format (s, "[handoff, thread index: %u]", t->handoff_thread_index); + break; + case REASS_KEY: + s = format (s, "[lookup, key: %U]", format_bihash_kvp_16_8, &t->kv); + break; + case REASS_FREE_LRU: + s = format (s, "[free, LRU pressure]"); + break; + case REASS_FREE_TIMEOUT: + s = format (s, "[free, timed out]"); + break; + case REASS_FREE_ERROR: + s = format (s, "[free, error occurred]"); + break; case REASS_FRAGMENT_FORWARD: - s = - format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]", - t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), - clib_net_to_host_u16 (t->l4_dst_port)); + s = format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]", + t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), + clib_net_to_host_u16 (t->l4_dst_port)); break; case REASS_PASSTHROUGH: s = format (s, "[not-fragmented]"); break; } - if (t->l4_layer_truncated) + if (t->l4_hdr_truncated) { - s = format (s, " [l4-layer-truncated]"); + s = format (s, " [l4-hdr-truncated]"); } return s; } @@ -238,12 +271,12 @@ static void ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, ip4_sv_reass_t *reass, u32 bi, ip4_sv_reass_trace_operation_e action, u32 ip_proto, - u16 l4_src_port, u16 l4_dst_port, - int l4_layer_truncated) + u16 l4_src_port, u16 l4_dst_port, int l4_hdr_truncated, + u32 handoff_thread_index) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (pool_is_free_index - (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b))) + if (pool_is_free_index (vm->trace_main.trace_buffer_pool, + vlib_buffer_get_trace_index (b))) { // this buffer's trace is gone b->flags &= ~VLIB_BUFFER_IS_TRACED; @@ -260,7 +293,8 @@ ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, t->ip_proto = ip_proto; t->l4_src_port = l4_src_port; t->l4_dst_port = l4_dst_port; - t->l4_layer_truncated = l4_layer_truncated; + t->l4_hdr_truncated = l4_hdr_truncated; + t->handoff_thread_index = handoff_thread_index; #if 0 static u8 *s = NULL; s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t); @@ -270,29 +304,56 @@ ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, #endif } +static void +ip4_sv_reass_trace_timeout (vlib_main_t *vm, vlib_node_runtime_t *node, + ip4_sv_reass_t *reass, u32 bi) +{ + return ip4_sv_reass_add_trace (vm, node, reass, bi, REASS_FREE_TIMEOUT, ~0, + ~0, ~0, 0, ~0); +} + +static void +ip4_sv_reass_trace_lru_free (vlib_main_t *vm, vlib_node_runtime_t *node, + ip4_sv_reass_t *reass, u32 bi) +{ + return ip4_sv_reass_add_trace (vm, node, reass, bi, REASS_FREE_LRU, ~0, ~0, + ~0, 0, ~0); +} + +static void +ip4_sv_reass_trace_error_free (vlib_main_t *vm, vlib_node_runtime_t *node, + ip4_sv_reass_t *reass, u32 bi) +{ + return ip4_sv_reass_add_trace (vm, node, reass, bi, REASS_FREE_ERROR, ~0, ~0, + ~0, 0, ~0); +} always_inline void -ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm, - ip4_sv_reass_per_thread_t * rt, ip4_sv_reass_t * reass) +ip4_sv_reass_free (vlib_main_t *vm, ip4_sv_reass_main_t *rm, + ip4_sv_reass_per_thread_t *rt, ip4_sv_reass_t *reass, + bool del_bihash) { - clib_bihash_kv_16_8_t kv; - kv.key[0] = reass->key.as_u64[0]; - kv.key[1] = reass->key.as_u64[1]; - clib_bihash_add_del_16_8 (&rm->hash, &kv, 0); + if (del_bihash) + { + clib_bihash_kv_16_8_t kv; + kv.key[0] = reass->key.as_u64[0]; + kv.key[1] = reass->key.as_u64[1]; + clib_bihash_add_del_16_8 (&rm->hash, &kv, 0); + } vlib_buffer_free (vm, reass->cached_buffers, vec_len (reass->cached_buffers)); vec_free (reass->cached_buffers); reass->cached_buffers = NULL; + if (~0 != reass->first_fragment_clone_bi) + vlib_buffer_free_one (vm, reass->first_fragment_clone_bi); if (~0 != reass->lru_prev) { - ip4_sv_reass_t *lru_prev = - pool_elt_at_index (rt->pool, reass->lru_prev); + ip4_sv_reass_t *lru_prev = pool_elt_at_index (rt->pool, reass->lru_prev); lru_prev->lru_next = reass->lru_next; } if (~0 != reass->lru_next) { - ip4_sv_reass_t *lru_next = - pool_elt_at_index (rt->pool, reass->lru_next); + ip4_sv_reass_t *lru_next = pool_elt_at_index (rt->pool, reass->lru_next); lru_next->lru_prev = reass->lru_prev; } if (rt->lru_first == reass - rt->pool) @@ -304,20 +365,13 @@ ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm, rt->lru_last = reass->lru_prev; } pool_put (rt->pool, reass); - --rt->reass_n; -} - -always_inline void -ip4_sv_reass_init (ip4_sv_reass_t * reass) -{ - reass->cached_buffers = NULL; - reass->is_complete = false; } always_inline ip4_sv_reass_t * -ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm, - ip4_sv_reass_per_thread_t * rt, - ip4_sv_reass_kv_t * kv, u8 * do_handoff) +ip4_sv_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 bi, ip4_sv_reass_main_t *rm, + ip4_sv_reass_per_thread_t *rt, + ip4_sv_reass_kv_t *kv, u8 *do_handoff) { ip4_sv_reass_t *reass = NULL; f64 now = vlib_time_now (vm); @@ -335,7 +389,8 @@ again: if (now > reass->last_heard + rm->timeout) { - ip4_sv_reass_free (vm, rm, rt, reass); + ip4_sv_reass_trace_timeout (vm, node, reass, bi); + ip4_sv_reass_free (vm, rm, rt, reass, true); reass = NULL; } } @@ -346,18 +401,17 @@ again: return reass; } - if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n) + if (pool_elts (rt->pool) >= rm->max_reass_n && rm->max_reass_n) { reass = pool_elt_at_index (rt->pool, rt->lru_first); - ip4_sv_reass_free (vm, rm, rt, reass); + ip4_sv_reass_trace_lru_free (vm, node, reass, bi); + ip4_sv_reass_free (vm, rm, rt, reass, true); } - pool_get (rt->pool, reass); - clib_memset (reass, 0, sizeof (*reass)); + pool_get_zero (rt->pool, reass); + reass->first_fragment_clone_bi = ~0; reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter; ++rt->id_counter; - ip4_sv_reass_init (reass); - ++rt->reass_n; reass->lru_prev = reass->lru_next = ~0; if (~0 != rt->lru_last) @@ -381,7 +435,7 @@ again: int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2); if (rv) { - ip4_sv_reass_free (vm, rm, rt, reass); + ip4_sv_reass_free (vm, rm, rt, reass, false); reass = NULL; // if other worker created a context already work with the other copy if (-2 == rv) @@ -391,10 +445,23 @@ again: return reass; } +always_inline bool +ip4_sv_reass_is_complete (ip4_sv_reass_t *reass, bool extended) +{ + /* + * Both first and last fragments have to be seen for extended reassembly to + * be complete. Otherwise first fragment is enough. + */ + if (extended) + return reass->first_fragment_seen && reass->last_fragment_seen; + + return reass->first_fragment_seen; +} + always_inline ip4_sv_reass_rc_t ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, ip4_sv_reass_main_t *rm, ip4_header_t *ip0, - ip4_sv_reass_t *reass, u32 bi0) + ip4_sv_reass_t *reass, u32 bi0, bool extended) { vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK; @@ -408,33 +475,59 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, return IP4_SV_REASS_RC_UNSUPP_IP_PROTO; if (IP_PROTOCOL_TCP == reass->ip_proto) { - reass->icmp_type_or_tcp_flags = ((tcp_header_t *) (ip0 + 1))->flags; - reass->tcp_ack_number = ((tcp_header_t *) (ip0 + 1))->ack_number; - reass->tcp_seq_number = ((tcp_header_t *) (ip0 + 1))->seq_number; + tcp_header_t *th = ip4_next_header (ip0); + reass->icmp_type_or_tcp_flags = th->flags; + reass->tcp_ack_number = th->ack_number; + reass->tcp_seq_number = th->seq_number; } else if (IP_PROTOCOL_ICMP == reass->ip_proto) { reass->icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip0)))->type; + } + reass->first_fragment_seen = true; + if (extended) + { + reass->first_fragment_total_ip_header_length = + ip4_header_bytes (ip0); + vlib_buffer_t *clone = vlib_buffer_copy_no_chain ( + vm, b0, &reass->first_fragment_clone_bi); + if (!clone) + reass->first_fragment_clone_bi = ~0; } - reass->is_complete = true; vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip4_sv_reass_add_trace ( - vm, node, reass, bi0, REASS_FINISH, reass->ip_proto, + vm, node, reass, bi0, REASS_FIRST_FRAG, reass->ip_proto, reass->l4_src_port, reass->l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } } + if (!ip4_get_fragment_more (ip0)) + { + const u32 fragment_length = + clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); + reass->last_fragment_seen = true; + reass->total_ip_payload_length = fragment_first + fragment_length; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_add_trace ( + vm, node, reass, bi0, REASS_LAST_FRAG, ~0, ~0, ~0, + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); + } + } + vec_add1 (reass->cached_buffers, bi0); - if (!reass->is_complete) + + if (!ip4_sv_reass_is_complete (reass, extended)) { if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip4_sv_reass_add_trace ( vm, node, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } if (vec_len (reass->cached_buffers) > rm->max_reass_len) { @@ -445,30 +538,63 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, } always_inline int -l4_layer_truncated (ip4_header_t *ip) +l4_hdr_truncated (ip4_header_t *ip) { - static const int l4_layer_length[256] = { - [IP_PROTOCOL_TCP] = sizeof (tcp_header_t), - [IP_PROTOCOL_UDP] = sizeof (udp_header_t), - [IP_PROTOCOL_ICMP] = sizeof (icmp46_header_t), - }; + if (IP_PROTOCOL_UDP == ip->protocol) + return ((u8 *) ip + ip4_header_bytes (ip) + sizeof (udp_header_t) > + (u8 *) ip + clib_net_to_host_u16 (ip->length)); + if (IP_PROTOCOL_ICMP == ip->protocol) + return ((u8 *) ip + ip4_header_bytes (ip) + sizeof (icmp46_header_t) > + (u8 *) ip + clib_net_to_host_u16 (ip->length)); + + if (IP_PROTOCOL_TCP != ip->protocol) + return false; + + tcp_header_t *th = ip4_next_header (ip); + const u32 tcp_opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t); - return ((u8 *) ip + ip4_header_bytes (ip) + l4_layer_length[ip->protocol] > + return ((u8 *) ip + ip4_header_bytes (ip) + sizeof (tcp_header_t) + + tcp_opts_len > (u8 *) ip + clib_net_to_host_u16 (ip->length)); } +always_inline void +ip4_sv_reass_reset_vnet_buffer2 (vlib_buffer_t *b) +{ + vnet_buffer2 (b)->ip.reass.pool_index = ~0; + vnet_buffer2 (b)->ip.reass.thread_index = ~0; + vnet_buffer2 (b)->ip.reass.id = ~0; +} + +always_inline void +ip4_sv_reass_set_vnet_buffer2_from_reass (vlib_main_t *vm, vlib_buffer_t *b, + ip4_sv_reass_t *reass) +{ + vnet_buffer2 (b)->ip.reass.thread_index = vm->thread_index; + vnet_buffer2 (b)->ip.reass.id = reass->id; + vnet_buffer2 (b)->ip.reass.pool_index = + reass - ip4_sv_reass_main.per_thread_data[vm->thread_index].pool; +} + +struct ip4_sv_reass_args +{ + bool is_feature; + bool is_output_feature; + bool is_custom; + bool with_custom_context; + bool extended; +}; + always_inline uword ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, bool is_feature, - bool is_output_feature, bool is_custom, - bool with_custom_context) + vlib_frame_t *frame, struct ip4_sv_reass_args a) { u32 *from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index; ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; u32 *context; - if (with_custom_context) + if (a.with_custom_context) context = vlib_frame_aux_args (frame); clib_spinlock_lock (&rt->lock); @@ -506,20 +632,18 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, clib_prefetch_load (p3->data); } - ip4_header_t *ip0 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), - (is_output_feature ? 1 : 0) * - vnet_buffer (b0)-> - ip.save_rewrite_length); - ip4_header_t *ip1 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b1), - (is_output_feature ? 1 : 0) * - vnet_buffer (b1)-> - ip.save_rewrite_length); - - if (PREDICT_FALSE - (ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0)) - || (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1))) + ip4_header_t *ip0 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); + ip4_header_t *ip1 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b1), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b1)->ip.save_rewrite_length); + + if (PREDICT_FALSE (ip4_get_fragment_more (ip0) || + ip4_get_fragment_offset (ip0)) || + (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1))) { // fragment found, go slow path b -= 2; @@ -530,39 +654,41 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } goto slow_path; } - if (is_feature) + if (a.is_feature) { vnet_feature_next (&next0, b0); } else { - next0 = is_custom ? vnet_buffer (b0)->ip.reass.next_index : - IP4_SV_REASSEMBLY_NEXT_INPUT; + next0 = a.is_custom ? vnet_buffer (b0)->ip.reass.next_index : + IP4_SV_REASSEMBLY_NEXT_INPUT; } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - if (l4_layer_truncated (ip0)) + + if (a.extended) + ip4_sv_reass_reset_vnet_buffer2 (b0); + + if (l4_hdr_truncated (ip0)) { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1; - vnet_buffer (b0)->ip.reass.l4_src_port = 0; - vnet_buffer (b0)->ip.reass.l4_dst_port = 0; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 1; } else { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; if (IP_PROTOCOL_TCP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip0 + 1))->flags; + ((tcp_header_t *) (ip4_next_header (ip0)))->flags; vnet_buffer (b0)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip0 + 1))->ack_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->ack_number; vnet_buffer (b0)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip0 + 1))->seq_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->seq_number; } else if (IP_PROTOCOL_ICMP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip0)))->type; } vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); @@ -574,41 +700,43 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_buffer (b0)->ip.reass.ip_proto, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } - if (is_feature) + if (a.is_feature) { vnet_feature_next (&next1, b1); } else { - next1 = is_custom ? vnet_buffer (b1)->ip.reass.next_index : - IP4_SV_REASSEMBLY_NEXT_INPUT; + next1 = a.is_custom ? vnet_buffer (b1)->ip.reass.next_index : + IP4_SV_REASSEMBLY_NEXT_INPUT; } vnet_buffer (b1)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b1)->ip.reass.ip_proto = ip1->protocol; - if (l4_layer_truncated (ip1)) + + if (a.extended) + ip4_sv_reass_reset_vnet_buffer2 (b1); + + if (l4_hdr_truncated (ip1)) { - vnet_buffer (b1)->ip.reass.l4_layer_truncated = 1; - vnet_buffer (b1)->ip.reass.l4_src_port = 0; - vnet_buffer (b1)->ip.reass.l4_dst_port = 0; + vnet_buffer (b1)->ip.reass.l4_hdr_truncated = 1; } else { - vnet_buffer (b1)->ip.reass.l4_layer_truncated = 0; + vnet_buffer (b1)->ip.reass.l4_hdr_truncated = 0; if (IP_PROTOCOL_TCP == ip1->protocol) { vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip1 + 1))->flags; + ((tcp_header_t *) (ip4_next_header (ip1)))->flags; vnet_buffer (b1)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip1 + 1))->ack_number; + ((tcp_header_t *) (ip4_next_header (ip1)))->ack_number; vnet_buffer (b1)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip1 + 1))->seq_number; + ((tcp_header_t *) (ip4_next_header (ip1)))->seq_number; } else if (IP_PROTOCOL_ICMP == ip1->protocol) { vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip1 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip1)))->type; } vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1); vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0); @@ -620,14 +748,14 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_buffer (b1)->ip.reass.ip_proto, vnet_buffer (b1)->ip.reass.l4_src_port, vnet_buffer (b1)->ip.reass.l4_dst_port, - vnet_buffer (b1)->ip.reass.l4_layer_truncated); + vnet_buffer (b1)->ip.reass.l4_hdr_truncated, ~0); } n_left_from -= 2; next[0] = next0; next[1] = next1; next += 2; - if (with_custom_context) + if (a.with_custom_context) context += 2; } @@ -638,13 +766,12 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, b0 = *b; b++; - ip4_header_t *ip0 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), - (is_output_feature ? 1 : 0) * - vnet_buffer (b0)-> - ip.save_rewrite_length); - if (PREDICT_FALSE - (ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0))) + ip4_header_t *ip0 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); + if (PREDICT_FALSE (ip4_get_fragment_more (ip0) || + ip4_get_fragment_offset (ip0))) { // fragment found, go slow path b -= 1; @@ -655,38 +782,41 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } goto slow_path; } - if (is_feature) + if (a.is_feature) { vnet_feature_next (&next0, b0); } else { - next0 = - is_custom ? vnet_buffer (b0)->ip. - reass.next_index : IP4_SV_REASSEMBLY_NEXT_INPUT; + next0 = a.is_custom ? vnet_buffer (b0)->ip.reass.next_index : + IP4_SV_REASSEMBLY_NEXT_INPUT; } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - if (l4_layer_truncated (ip0)) + + if (a.extended) + ip4_sv_reass_reset_vnet_buffer2 (b0); + + if (l4_hdr_truncated (ip0)) { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 1; } else { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; if (IP_PROTOCOL_TCP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip0 + 1))->flags; + ((tcp_header_t *) (ip4_next_header (ip0)))->flags; vnet_buffer (b0)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip0 + 1))->ack_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->ack_number; vnet_buffer (b0)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip0 + 1))->seq_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->seq_number; } else if (IP_PROTOCOL_ICMP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip0)))->type; } vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); @@ -698,13 +828,13 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_buffer (b0)->ip.reass.ip_proto, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } n_left_from -= 1; next[0] = next0; next += 1; - if (with_custom_context) + if (a.with_custom_context) context += 1; } @@ -719,7 +849,7 @@ slow_path: while (n_left_from > 0) { - if (with_custom_context) + if (a.with_custom_context) vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, to_next_aux, n_left_to_next); else @@ -736,15 +866,14 @@ slow_path: bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); - ip4_header_t *ip0 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), - (is_output_feature ? 1 : 0) * - vnet_buffer (b0)-> - ip.save_rewrite_length); + ip4_header_t *ip0 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0)) { // this is a regular packet - no fragmentation - if (is_custom) + if (a.is_custom) { next0 = vnet_buffer (b0)->ip.reass.next_index; } @@ -754,28 +883,28 @@ slow_path: } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - if (l4_layer_truncated (ip0)) + if (l4_hdr_truncated (ip0)) { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 1; vnet_buffer (b0)->ip.reass.l4_src_port = 0; vnet_buffer (b0)->ip.reass.l4_dst_port = 0; } else { - vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; if (IP_PROTOCOL_TCP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip0 + 1))->flags; + ((tcp_header_t *) (ip4_next_header (ip0)))->flags; vnet_buffer (b0)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip0 + 1))->ack_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->ack_number; vnet_buffer (b0)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip0 + 1))->seq_number; + ((tcp_header_t *) (ip4_next_header (ip0)))->seq_number; } else if (IP_PROTOCOL_ICMP == ip0->protocol) { vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + ((icmp46_header_t *) (ip4_next_header (ip0)))->type; } vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); @@ -789,7 +918,7 @@ slow_path: vnet_buffer (b0)->ip.reass.ip_proto, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } goto packet_enqueue; } @@ -797,7 +926,11 @@ slow_path: const u32 fragment_length = clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); const u32 fragment_last = fragment_first + fragment_length - 1; - if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791 + if (fragment_first > fragment_last || + fragment_first + fragment_length > UINT16_MAX - 20 || + (fragment_length < 8 && + ip4_get_fragment_more ( + ip0))) // 8 is minimum frag length per RFC 791 { next0 = IP4_SV_REASSEMBLY_NEXT_DROP; error0 = IP4_ERROR_REASS_MALFORMED_PACKET; @@ -807,7 +940,7 @@ slow_path: ip4_sv_reass_kv_t kv; u8 do_handoff = 0; - if (with_custom_context) + if (a.with_custom_context) kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32 << 32; else @@ -819,15 +952,29 @@ slow_path: (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48; - ip4_sv_reass_t *reass = - ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (t[0])); + t->action = REASS_KEY; + STATIC_ASSERT_SIZEOF (t->kv, sizeof (kv)); + clib_memcpy (&t->kv, &kv, sizeof (kv)); + } + + ip4_sv_reass_t *reass = ip4_sv_reass_find_or_create ( + vm, node, bi0, rm, rt, &kv, &do_handoff); if (PREDICT_FALSE (do_handoff)) { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_add_trace (vm, node, reass, bi0, REASS_HANDOFF, + ~0, ~0, ~0, 0, kv.v.thread_index); + } next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF; vnet_buffer (b0)->ip.reass.owner_thread_index = kv.v.thread_index; - if (with_custom_context) + if (a.with_custom_context) forward_context = 1; goto packet_enqueue; } @@ -840,9 +987,9 @@ slow_path: goto packet_enqueue; } - if (reass->is_complete) + if (ip4_sv_reass_is_complete (reass, a.extended)) { - if (is_custom) + if (a.is_custom) { next0 = vnet_buffer (b0)->ip.reass.next_index; } @@ -851,7 +998,7 @@ slow_path: next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; } vnet_buffer (b0)->ip.reass.is_non_first_fragment = - ! !fragment_first; + !!fragment_first; vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = reass->icmp_type_or_tcp_flags; @@ -861,18 +1008,20 @@ slow_path: reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; + if (a.extended) + ip4_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip4_sv_reass_add_trace ( vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, reass->ip_proto, reass->l4_src_port, reass->l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); } goto packet_enqueue; } ip4_sv_reass_rc_t rc = - ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0); + ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0, a.extended); u32 counter = ~0; switch (rc) { @@ -889,62 +1038,64 @@ slow_path: if (~0 != counter) { vlib_node_increment_counter (vm, node->node_index, counter, 1); - ip4_sv_reass_free (vm, rm, rt, reass); + ip4_sv_reass_trace_error_free (vm, node, reass, bi0); + ip4_sv_reass_free (vm, rm, rt, reass, true); goto next_packet; } - if (reass->is_complete) + if (ip4_sv_reass_is_complete (reass, a.extended)) { u32 idx; vec_foreach_index (idx, reass->cached_buffers) - { - u32 bi0 = vec_elt (reass->cached_buffers, idx); - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - ip0 = - (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), - (is_output_feature ? 1 : 0) * - vnet_buffer (b0)-> - ip.save_rewrite_length); - u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; - if (is_feature) - { - vnet_feature_next (&next0, b0); - } - if (is_custom) - { - next0 = vnet_buffer (b0)->ip.reass.next_index; - } - if (0 == n_left_to_next) - { - vlib_put_next_frame (vm, node, next_index, - n_left_to_next); - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - } - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - vnet_buffer (b0)->ip.reass.is_non_first_fragment = - ! !ip4_get_fragment_offset (ip0); - vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - reass->icmp_type_or_tcp_flags; - vnet_buffer (b0)->ip.reass.tcp_ack_number = - reass->tcp_ack_number; - vnet_buffer (b0)->ip.reass.tcp_seq_number = - reass->tcp_seq_number; - vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; - vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - ip4_sv_reass_add_trace ( - vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, - reass->ip_proto, reass->l4_src_port, reass->l4_dst_port, - vnet_buffer (b0)->ip.reass.l4_layer_truncated); - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, - next0); - } + { + u32 bi0 = vec_elt (reass->cached_buffers, idx); + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + ip0 = (ip4_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); + u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; + if (a.is_feature) + { + vnet_feature_next (&next0, b0); + } + if (a.is_custom) + { + next0 = vnet_buffer (b0)->ip.reass.next_index; + } + if (0 == n_left_to_next) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + !!ip4_get_fragment_offset (ip0); + vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; + vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; + vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; + if (a.extended) + ip4_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_add_trace ( + vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, + reass->ip_proto, reass->l4_src_port, + reass->l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0); + } + vlib_validate_buffer_enqueue_x1 ( + vm, node, next_index, to_next, n_left_to_next, bi0, next0); + } vec_set_len (reass->cached_buffers, 0); // buffers are owned by frame now } @@ -954,12 +1105,13 @@ slow_path: to_next[0] = bi0; to_next += 1; n_left_to_next -= 1; - if (is_feature && IP4_ERROR_NONE == error0) + if (a.is_feature && IP4_ERROR_NONE == error0 && + IP4_SV_REASSEMBLY_NEXT_HANDOFF != next0) { b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - if (with_custom_context && forward_context) + if (a.with_custom_context && forward_context) { if (to_next_aux) { @@ -977,7 +1129,7 @@ slow_path: next_packet: from += 1; n_left_from -= 1; - if (with_custom_context) + if (a.with_custom_context) context += 1; } @@ -989,13 +1141,20 @@ done: return frame->n_vectors; } -VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, false /* is_feature */, false /* is_output_feature */, - false /* is_custom */, false /* with_custom_context */); + /* + * Extended reassembly is not supported for non-feature nodes. + */ + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .is_custom = false, + .with_custom_context = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip4_sv_reass_node) = { @@ -1014,13 +1173,27 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node) = { }, }; -VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_node_feature) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, true /* is_feature */, false /* is_output_feature */, - false /* is_custom */, false /* with_custom_context */); + if (ip4_sv_reass_main.extended_refcount > 0) + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = true, + .is_output_feature = false, + .is_custom = false, + .with_custom_context = false, + .extended = true, + }); + + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = true, + .is_output_feature = false, + .is_custom = false, + .with_custom_context = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = { @@ -1039,22 +1212,35 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = { }; VNET_FEATURE_INIT (ip4_sv_reass_feature) = { - .arc_name = "ip4-unicast", - .node_name = "ip4-sv-reassembly-feature", - .runs_before = VNET_FEATURES ("ip4-lookup"), - .runs_after = 0, + .arc_name = "ip4-unicast", + .node_name = "ip4-sv-reassembly-feature", + .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = 0, }; -VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_node_output_feature) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, true /* is_feature */, true /* is_output_feature */, - false /* is_custom */, false /* with_custom_context */); + if (ip4_sv_reass_main.extended_refcount > 0) + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = true, + .is_output_feature = true, + .is_custom = false, + .with_custom_context = false, + .extended = true, + }); + + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = true, + .is_output_feature = true, + .is_custom = false, + .with_custom_context = false, + .extended = false, + }); } - VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { .name = "ip4-sv-reassembly-output-feature", .vector_size = sizeof (u32), @@ -1066,15 +1252,15 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { { [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input", [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop", - [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff", + [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-output-feature-hoff", }, }; VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = { - .arc_name = "ip4-output", - .node_name = "ip4-sv-reassembly-output-feature", - .runs_before = 0, - .runs_after = 0, + .arc_name = "ip4-output", + .node_name = "ip4-sv-reassembly-output-feature", + .runs_before = 0, + .runs_after = 0, }; VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { @@ -1093,13 +1279,20 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { }, }; -VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_custom_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, false /* is_feature */, false /* is_output_feature */, - true /* is_custom */, false /* with_custom_context */); + /* + * Extended reassembly is not supported for non-feature nodes. + */ + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .is_custom = true, + .with_custom_context = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = { @@ -1122,9 +1315,17 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = { VLIB_NODE_FN (ip4_sv_reass_custom_context_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip4_sv_reass_inline ( - vm, node, frame, false /* is_feature */, false /* is_output_feature */, - true /* is_custom */, true /* with_custom_context */); + /* + * Extended reassembly is not supported for non-feature nodes. + */ + return ip4_sv_reass_inline (vm, node, frame, + (struct ip4_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .is_custom = true, + .with_custom_context = true, + .extended = false, + }); } #ifndef CLIB_MARCH_VARIANT @@ -1159,7 +1360,7 @@ typedef struct #ifndef CLIB_MARCH_VARIANT static int -ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx) +ip4_rehash_cb (clib_bihash_kv_16_8_t *kv, void *_ctx) { ip4_rehash_cb_ctx *ctx = _ctx; if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1)) @@ -1186,8 +1387,8 @@ ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, u32 max_reassembly_length, u32 expire_walk_interval_ms) { u32 old_nbuckets = ip4_sv_reass_get_nbuckets (); - ip4_sv_reass_set_params (timeout_ms, max_reassemblies, - max_reassembly_length, expire_walk_interval_ms); + ip4_sv_reass_set_params (timeout_ms, max_reassemblies, max_reassembly_length, + expire_walk_interval_ms); vlib_process_signal_event (ip4_sv_reass_main.vlib_main, ip4_sv_reass_main.ip4_sv_reass_expire_node_idx, IP4_EVENT_CONFIG_CHANGED, 0); @@ -1200,7 +1401,7 @@ ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, ctx.failure = 0; ctx.new_hash = &new_hash; clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets, - new_nbuckets * 1024); + (uword) new_nbuckets * 1024); clib_bihash_foreach_key_value_pair_16_8 (&ip4_sv_reass_main.hash, ip4_rehash_cb, &ctx); if (ctx.failure) @@ -1220,8 +1421,8 @@ ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, } vnet_api_error_t -ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, - u32 * max_reassembly_length, u32 * expire_walk_interval_ms) +ip4_sv_reass_get (u32 *timeout_ms, u32 *max_reassemblies, + u32 *max_reassembly_length, u32 *expire_walk_interval_ms) { *timeout_ms = ip4_sv_reass_main.timeout_ms; *max_reassemblies = ip4_sv_reass_main.max_reass_n; @@ -1231,7 +1432,7 @@ ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, } static clib_error_t * -ip4_sv_reass_init_function (vlib_main_t * vm) +ip4_sv_reass_init_function (vlib_main_t *vm) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; clib_error_t *error = 0; @@ -1244,11 +1445,11 @@ ip4_sv_reass_init_function (vlib_main_t * vm) vec_validate (rm->per_thread_data, vlib_num_workers ()); ip4_sv_reass_per_thread_t *rt; vec_foreach (rt, rm->per_thread_data) - { - clib_spinlock_init (&rt->lock); - pool_alloc (rt->pool, rm->max_reass_n); - rt->lru_first = rt->lru_last = ~0; - } + { + clib_spinlock_init (&rt->lock); + pool_alloc (rt->pool, rm->max_reass_n); + rt->lru_first = rt->lru_last = ~0; + } node = vlib_get_node_by_name (vm, (u8 *) "ip4-sv-reassembly-expire-walk"); ASSERT (node); @@ -1260,15 +1461,14 @@ ip4_sv_reass_init_function (vlib_main_t * vm) IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS); nbuckets = ip4_sv_reass_get_nbuckets (); - clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024); - - node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop"); - ASSERT (node); - rm->ip4_drop_idx = node->index; + clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, + (uword) nbuckets * 1024); rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0); + rm->fq_output_feature_index = + vlib_frame_queue_main_init (ip4_sv_reass_node_output_feature.index, 0); rm->fq_custom_context_index = vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0); @@ -1291,10 +1491,8 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, while (true) { - vlib_process_wait_for_event_or_clock (vm, - (f64) - rm->expire_walk_interval_ms / - (f64) MSEC_PER_SEC); + vlib_process_wait_for_event_or_clock ( + vm, (f64) rm->expire_walk_interval_ms / (f64) MSEC_PER_SEC); event_type = vlib_process_get_events (vm, &event_data); switch (event_type) @@ -1323,19 +1521,20 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - pool_foreach_index (index, rt->pool) { - reass = pool_elt_at_index (rt->pool, index); - if (now > reass->last_heard + rm->timeout) - { - vec_add1 (pool_indexes_to_free, index); - } - } + pool_foreach_index (index, rt->pool) + { + reass = pool_elt_at_index (rt->pool, index); + if (now > reass->last_heard + rm->timeout) + { + vec_add1 (pool_indexes_to_free, index); + } + } int *i; - vec_foreach (i, pool_indexes_to_free) - { - ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); - ip4_sv_reass_free (vm, rm, rt, reass); - } + vec_foreach (i, pool_indexes_to_free) + { + ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); + ip4_sv_reass_free (vm, rm, rt, reass, true); + } clib_spinlock_unlock (&rt->lock); } @@ -1360,7 +1559,7 @@ VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = { }; static u8 * -format_ip4_sv_reass_key (u8 * s, va_list * args) +format_ip4_sv_reass_key (u8 *s, va_list *args) { ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *); s = @@ -1371,37 +1570,35 @@ format_ip4_sv_reass_key (u8 * s, va_list * args) } static u8 * -format_ip4_sv_reass (u8 * s, va_list * args) +format_ip4_sv_reass (u8 *s, va_list *args) { vlib_main_t *vm = va_arg (*args, vlib_main_t *); ip4_sv_reass_t *reass = va_arg (*args, ip4_sv_reass_t *); - s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n", - reass->id, format_ip4_sv_reass_key, &reass->key, - reass->trace_op_counter); + s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n", reass->id, + format_ip4_sv_reass_key, &reass->key, reass->trace_op_counter); vlib_buffer_t *b; u32 *bip; u32 counter = 0; vec_foreach (bip, reass->cached_buffers) - { - u32 bi = *bip; - do - { - b = vlib_get_buffer (vm, bi); - s = format (s, " #%03u: bi: %u, ", counter, bi); - ++counter; - bi = b->next_buffer; - } - while (b->flags & VLIB_BUFFER_NEXT_PRESENT); - } + { + u32 bi = *bip; + do + { + b = vlib_get_buffer (vm, bi); + s = format (s, " #%03u: bi: %u, ", counter, bi); + ++counter; + bi = b->next_buffer; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } return s; } static clib_error_t * -show_ip4_reass (vlib_main_t * vm, - unformat_input_t * input, - CLIB_UNUSED (vlib_cli_command_t * lmd)) +show_ip4_reass (vlib_main_t *vm, unformat_input_t *input, + CLIB_UNUSED (vlib_cli_command_t *lmd)) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; @@ -1424,100 +1621,105 @@ show_ip4_reass (vlib_main_t * vm, clib_spinlock_lock (&rt->lock); if (details) { - pool_foreach (reass, rt->pool) { - vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass); - } + pool_foreach (reass, rt->pool) + { + vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass); + } } - sum_reass_n += rt->reass_n; + sum_reass_n += pool_elts (rt->pool); clib_spinlock_unlock (&rt->lock); } vlib_cli_output (vm, "---------------------"); vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n", (long unsigned) sum_reass_n); vlib_cli_output (vm, - "Maximum configured concurrent shallow virtual IP4 reassemblies per worker-thread: %lu\n", + "Maximum configured concurrent shallow virtual IP4 " + "reassemblies per worker-thread: %lu\n", (long unsigned) rm->max_reass_n); vlib_cli_output (vm, "Maximum configured amount of fragments per shallow " "virtual IP4 reassembly: %lu\n", (long unsigned) rm->max_reass_len); + vlib_cli_output ( + vm, "Maximum configured shallow virtual IP4 reassembly timeout: %lums\n", + (long unsigned) rm->timeout_ms); vlib_cli_output (vm, - "Maximum configured shallow virtual IP4 reassembly timeout: %lums\n", - (long unsigned) rm->timeout_ms); - vlib_cli_output (vm, - "Maximum configured shallow virtual IP4 reassembly expire walk interval: %lums\n", + "Maximum configured shallow virtual IP4 reassembly expire " + "walk interval: %lums\n", (long unsigned) rm->expire_walk_interval_ms); + return 0; } VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = { - .path = "show ip4-sv-reassembly", - .short_help = "show ip4-sv-reassembly [details]", - .function = show_ip4_reass, + .path = "show ip4-sv-reassembly", + .short_help = "show ip4-sv-reassembly [details]", + .function = show_ip4_reass, }; #ifndef CLIB_MARCH_VARIANT vnet_api_error_t ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable) { - return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, - enable_disable); + return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, enable_disable); } #endif /* CLIB_MARCH_VARIANT */ - -#define foreach_ip4_sv_reass_handoff_error \ -_(CONGESTION_DROP, "congestion drop") - +#define foreach_ip4_sv_reass_handoff_error \ + _ (CONGESTION_DROP, "congestion drop") typedef enum { -#define _(sym,str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym, +#define _(sym, str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym, foreach_ip4_sv_reass_handoff_error #undef _ IP4_SV_REASSEMBLY_HANDOFF_N_ERROR, } ip4_sv_reass_handoff_error_t; static char *ip4_sv_reass_handoff_error_strings[] = { -#define _(sym,string) string, +#define _(sym, string) string, foreach_ip4_sv_reass_handoff_error #undef _ }; typedef struct { - u32 next_worker_index; + u32 thread_index; } ip4_sv_reass_handoff_trace_t; static u8 * -format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args) +format_ip4_sv_reass_handoff_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_sv_reass_handoff_trace_t *t = va_arg (*args, ip4_sv_reass_handoff_trace_t *); - s = - format (s, "ip4-sv-reassembly-handoff: next-worker %d", - t->next_worker_index); + s = format (s, "to thread-index: %u", t->thread_index); return s; } +struct ip4_sv_reass_hoff_args +{ + bool is_feature; + bool is_output_feature; + bool is_custom_context; +}; + always_inline uword ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, bool is_feature, - bool is_custom_context) + vlib_frame_t *frame, + struct ip4_sv_reass_hoff_args a) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u32 n_enq, n_left_from, *from, *context; u16 thread_indices[VLIB_FRAME_SIZE], *ti; - u32 fq_index; from = vlib_frame_vector_args (frame); - if (is_custom_context) + if (a.is_custom_context) context = vlib_frame_aux_args (frame); n_left_from = frame->n_vectors; @@ -1526,28 +1728,28 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, b = bufs; ti = thread_indices; - fq_index = (is_feature) ? rm->fq_feature_index : - (is_custom_context ? rm->fq_custom_context_index : - rm->fq_index); + const u32 fq_index = a.is_output_feature ? rm->fq_output_feature_index : + a.is_feature ? rm->fq_feature_index : + a.is_custom_context ? rm->fq_custom_context_index : + rm->fq_index; while (n_left_from > 0) { ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index; - if (PREDICT_FALSE - ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b[0]->flags & VLIB_BUFFER_IS_TRACED))) + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b[0]->flags & VLIB_BUFFER_IS_TRACED))) { ip4_sv_reass_handoff_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->next_worker_index = ti[0]; + t->thread_index = ti[0]; } n_left_from -= 1; ti += 1; b += 1; } - if (is_custom_context) + if (a.is_custom_context) n_enq = vlib_buffer_enqueue_to_thread_with_aux ( vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1); else @@ -1555,21 +1757,22 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vm, node, fq_index, from, thread_indices, frame->n_vectors, 1); if (n_enq < frame->n_vectors) - vlib_node_increment_counter (vm, node->node_index, - IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP, - frame->n_vectors - n_enq); + vlib_node_increment_counter ( + vm, node->node_index, IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); return frame->n_vectors; } -VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip4_sv_reass_handoff_node_inline ( - vm, node, frame, false /* is_feature */, false /* is_custom_context */); + vm, node, frame, + (struct ip4_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = false, + .is_custom_context = false }); } - VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = { .name = "ip4-sv-reassembly-handoff", .vector_size = sizeof (u32), @@ -1588,7 +1791,10 @@ VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip4_sv_reass_handoff_node_inline ( - vm, node, frame, false /* is_feature */, true /* is_custom_context */); + vm, node, frame, + (struct ip4_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = false, + .is_custom_context = true }); } VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = { @@ -1606,16 +1812,16 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = { }, }; -VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * - node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip4_sv_reass_handoff_node_inline ( - vm, node, frame, true /* is_feature */, false /* is_custom_context */); + vm, node, frame, + (struct ip4_sv_reass_hoff_args){ .is_feature = true, + .is_output_feature = false, + .is_custom_context = false }); } - VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = { .name = "ip4-sv-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1630,6 +1836,30 @@ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = { }, }; +VLIB_NODE_FN (ip4_sv_reass_output_feature_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip4_sv_reass_handoff_node_inline ( + vm, node, frame, + (struct ip4_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = true, + .is_custom_context = false }); +} + +VLIB_REGISTER_NODE (ip4_sv_reass_output_feature_handoff_node) = { + .name = "ip4-sv-reass-output-feature-hoff", + .vector_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings), + .error_strings = ip4_sv_reass_handoff_error_strings, + .format_trace = format_ip4_sv_reass_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + #ifndef CLIB_MARCH_VARIANT int ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) @@ -1640,10 +1870,10 @@ ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) { if (!rm->feature_use_refcount_per_intf[sw_if_index]) { - ++rm->feature_use_refcount_per_intf[sw_if_index]; - return vnet_feature_enable_disable ("ip4-unicast", - "ip4-sv-reassembly-feature", - sw_if_index, 1, 0, 0); + int rv = vnet_feature_enable_disable ( + "ip4-unicast", "ip4-sv-reassembly-feature", sw_if_index, 1, 0, 0); + if (0 != rv) + return rv; } ++rm->feature_use_refcount_per_intf[sw_if_index]; } @@ -1652,9 +1882,10 @@ ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) if (rm->feature_use_refcount_per_intf[sw_if_index]) --rm->feature_use_refcount_per_intf[sw_if_index]; if (!rm->feature_use_refcount_per_intf[sw_if_index]) - return vnet_feature_enable_disable ("ip4-unicast", - "ip4-sv-reassembly-feature", - sw_if_index, 0, 0, 0); + { + return vnet_feature_enable_disable ( + "ip4-unicast", "ip4-sv-reassembly-feature", sw_if_index, 0, 0, 0); + } } return 0; } @@ -1674,8 +1905,7 @@ ip4_sv_reass_custom_context_register_next_node (uword node_index) } int -ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, - int is_enable) +ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index); @@ -1683,10 +1913,11 @@ ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, { if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) { - ++rm->output_feature_use_refcount_per_intf[sw_if_index]; - return vnet_feature_enable_disable ("ip4-output", - "ip4-sv-reassembly-output-feature", - sw_if_index, 1, 0, 0); + int rv = vnet_feature_enable_disable ( + "ip4-output", "ip4-sv-reassembly-output-feature", sw_if_index, 1, + 0, 0); + if (0 != rv) + return rv; } ++rm->output_feature_use_refcount_per_intf[sw_if_index]; } @@ -1695,12 +1926,66 @@ ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, if (rm->output_feature_use_refcount_per_intf[sw_if_index]) --rm->output_feature_use_refcount_per_intf[sw_if_index]; if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) - return vnet_feature_enable_disable ("ip4-output", - "ip4-sv-reassembly-output-feature", - sw_if_index, 0, 0, 0); + { + return vnet_feature_enable_disable ( + "ip4-output", "ip4-sv-reassembly-output-feature", sw_if_index, 0, + 0, 0); + } } return 0; } + +void +ip4_sv_reass_enable_disable_extended (bool is_enable) +{ + if (is_enable) + ++ip4_sv_reass_main.extended_refcount; + else + --ip4_sv_reass_main.extended_refcount; +} + +int +ip4_sv_reass_extended_lock (vlib_buffer_t *b, + struct ip4_sv_lock_unlock_args *a) +{ + ip4_sv_reass_per_thread_t *per_thread = + &ip4_sv_reass_main + .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index]; + + if (!vec_is_member (ip4_sv_reass_main.per_thread_data, per_thread)) + return -1; + + clib_spinlock_lock (&per_thread->lock); + if (pool_is_free_index (per_thread->pool, + vnet_buffer2 (b)->ip.reass.pool_index)) + goto fail; + + ip4_sv_reass_t *reass = pool_elt_at_index ( + per_thread->pool, vnet_buffer2 (b)->ip.reass.pool_index); + if (vnet_buffer2 (b)->ip.reass.id == reass->id) + { + *a->total_ip_payload_length = reass->total_ip_payload_length; + + *a->first_fragment_buffer_index = reass->first_fragment_clone_bi; + *a->first_fragment_total_ip_header_length = + reass->first_fragment_total_ip_header_length; + return 0; + } + +fail: + clib_spinlock_unlock (&per_thread->lock); + return -1; +} + +void +ip4_sv_reass_extended_unlock (vlib_buffer_t *b) +{ + ip4_sv_reass_per_thread_t *per_thread = + &ip4_sv_reass_main + .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index]; + clib_spinlock_unlock (&per_thread->lock); +} + #endif /* diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h index 3a684eb9809..a1e5659a9f1 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.h +++ b/src/vnet/ip/reass/ip4_sv_reass.h @@ -23,6 +23,7 @@ #ifndef __included_ip4_sv_reass_h__ #define __included_ip4_sv_reass_h__ +#include <stdbool.h> #include <vnet/api_errno.h> #include <vnet/vnet.h> @@ -48,6 +49,33 @@ int ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); +/* + * Enable or disable extended reassembly. + * + * Extended reassembly means that fragments are cached until both first and + * last fragments are seen. Furthermore, first fragment buffer will be cloned + * and stored in reassembly context for later retrieval. + */ +void ip4_sv_reass_enable_disable_extended (bool is_enable); + +struct ip4_sv_lock_unlock_args +{ + u32 *total_ip_payload_length; + u32 *first_fragment_buffer_index; + u32 *first_fragment_total_ip_header_length; +}; + +/* + * Lock thread-level lock and fetch information from reassembly context. + * Uses vnet_buffer2 data filled by extended reassembly. + * + * Returns 0 on success, -1 otherwise. + */ +int ip4_sv_reass_extended_lock (vlib_buffer_t *b, + struct ip4_sv_lock_unlock_args *a); + +void ip4_sv_reass_extended_unlock (vlib_buffer_t *b); + uword ip4_sv_reass_custom_register_next_node (uword node_index); uword ip4_sv_reass_custom_context_register_next_node (uword node_index); diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c index fe2ed05555c..69b27c5aa8e 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.c +++ b/src/vnet/ip/reass/ip6_sv_reass.c @@ -28,12 +28,13 @@ #include <vnet/ip/reass/ip6_sv_reass.h> #include <vnet/ip/ip6_inlines.h> -#define MSEC_PER_SEC 1000 +#define MSEC_PER_SEC 1000 #define IP6_SV_REASS_TIMEOUT_DEFAULT_MS 100 -#define IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default -#define IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024 +#define IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS \ + 10000 // 10 seconds default +#define IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024 #define IP6_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 -#define IP6_SV_REASS_HT_LOAD_FACTOR (0.75) +#define IP6_SV_REASS_HT_LOAD_FACTOR (0.75) typedef enum { @@ -94,17 +95,23 @@ typedef struct // buffer indexes of buffers in this reassembly in chronological order - // including overlaps and duplicate fragments u32 *cached_buffers; - // set to true when this reassembly is completed - bool is_complete; - // ip protocol + + bool first_fragment_seen; + bool last_fragment_seen; + + // vnet_buffer data u8 ip_proto; u8 icmp_type_or_tcp_flags; u32 tcp_ack_number; u32 tcp_seq_number; - // l4 src port u16 l4_src_port; - // l4 dst port u16 l4_dst_port; + + // vnet_buffer2 data + u32 total_ip_payload_length; + u32 first_fragment_total_ip_header_length; + u32 first_fragment_clone_bi; + // lru indexes u32 lru_prev; u32 lru_next; @@ -142,18 +149,21 @@ typedef struct vlib_main_t *vlib_main; vnet_main_t *vnet_main; - // node index of ip6-drop node - u32 ip6_drop_idx; - u32 ip6_icmp_error_idx; u32 ip6_sv_reass_expire_node_idx; /** Worker handoff */ u32 fq_index; u32 fq_feature_index; + u32 fq_output_feature_index; u32 fq_custom_context_index; // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; + // reference count for enabling/disabling output feature - per interface + u32 *output_feature_use_refcount_per_intf; + + // extended reassembly refcount - see ip6_sv_reass_enable_disable_extended() + u32 extended_refcount; } ip6_sv_reass_main_t; extern ip6_sv_reass_main_t ip6_sv_reass_main; @@ -174,7 +184,8 @@ typedef enum typedef enum { REASS_FRAGMENT_CACHE, - REASS_FINISH, + REASS_FIRST_FRAG, + REASS_LAST_FRAG, REASS_FRAGMENT_FORWARD, REASS_PASSTHROUGH, } ip6_sv_reass_trace_operation_e; @@ -190,7 +201,7 @@ typedef struct } ip6_sv_reass_trace_t; static u8 * -format_ip6_sv_reass_trace (u8 * s, va_list * args) +format_ip6_sv_reass_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); @@ -204,17 +215,19 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args) case REASS_FRAGMENT_CACHE: s = format (s, "[cached]"); break; - case REASS_FINISH: + case REASS_FIRST_FRAG: s = - format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]", + format (s, "[first-frag-seen, ip proto=%u, src_port=%u, dst_port=%u]", t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), clib_net_to_host_u16 (t->l4_dst_port)); break; + case REASS_LAST_FRAG: + s = format (s, "[last-frag-seen]"); + break; case REASS_FRAGMENT_FORWARD: - s = - format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]", - t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), - clib_net_to_host_u16 (t->l4_dst_port)); + s = format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]", + t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), + clib_net_to_host_u16 (t->l4_dst_port)); break; case REASS_PASSTHROUGH: s = format (s, "[not fragmented or atomic fragment]"); @@ -224,14 +237,14 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args) } static void -ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, - ip6_sv_reass_t * reass, u32 bi, - ip6_sv_reass_trace_operation_e action, - u32 ip_proto, u16 l4_src_port, u16 l4_dst_port) +ip6_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + ip6_sv_reass_t *reass, u32 bi, + ip6_sv_reass_trace_operation_e action, u32 ip_proto, + u16 l4_src_port, u16 l4_dst_port) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (pool_is_free_index - (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b))) + if (pool_is_free_index (vm->trace_main.trace_buffer_pool, + vlib_buffer_get_trace_index (b))) { // this buffer's trace is gone b->flags &= ~VLIB_BUFFER_IS_TRACED; @@ -258,31 +271,35 @@ ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, } always_inline void -ip6_sv_reass_free (vlib_main_t * vm, ip6_sv_reass_main_t * rm, - ip6_sv_reass_per_thread_t * rt, ip6_sv_reass_t * reass) +ip6_sv_reass_free (vlib_main_t *vm, ip6_sv_reass_main_t *rm, + ip6_sv_reass_per_thread_t *rt, ip6_sv_reass_t *reass, + bool del_bihash) { - clib_bihash_kv_48_8_t kv; - kv.key[0] = reass->key.as_u64[0]; - kv.key[1] = reass->key.as_u64[1]; - kv.key[2] = reass->key.as_u64[2]; - kv.key[3] = reass->key.as_u64[3]; - kv.key[4] = reass->key.as_u64[4]; - kv.key[5] = reass->key.as_u64[5]; - clib_bihash_add_del_48_8 (&rm->hash, &kv, 0); + if (del_bihash) + { + clib_bihash_kv_48_8_t kv; + kv.key[0] = reass->key.as_u64[0]; + kv.key[1] = reass->key.as_u64[1]; + kv.key[2] = reass->key.as_u64[2]; + kv.key[3] = reass->key.as_u64[3]; + kv.key[4] = reass->key.as_u64[4]; + kv.key[5] = reass->key.as_u64[5]; + clib_bihash_add_del_48_8 (&rm->hash, &kv, 0); + } vlib_buffer_free (vm, reass->cached_buffers, vec_len (reass->cached_buffers)); vec_free (reass->cached_buffers); reass->cached_buffers = NULL; + if (~0 != reass->first_fragment_clone_bi) + vlib_buffer_free_one (vm, reass->first_fragment_clone_bi); if (~0 != reass->lru_prev) { - ip6_sv_reass_t *lru_prev = - pool_elt_at_index (rt->pool, reass->lru_prev); + ip6_sv_reass_t *lru_prev = pool_elt_at_index (rt->pool, reass->lru_prev); lru_prev->lru_next = reass->lru_next; } if (~0 != reass->lru_next) { - ip6_sv_reass_t *lru_next = - pool_elt_at_index (rt->pool, reass->lru_next); + ip6_sv_reass_t *lru_next = pool_elt_at_index (rt->pool, reass->lru_next); lru_next->lru_prev = reass->lru_prev; } if (rt->lru_first == reass - rt->pool) @@ -297,13 +314,6 @@ ip6_sv_reass_free (vlib_main_t * vm, ip6_sv_reass_main_t * rm, --rt->reass_n; } -always_inline void -ip6_sv_reass_init (ip6_sv_reass_t * reass) -{ - reass->cached_buffers = NULL; - reass->is_complete = false; -} - always_inline ip6_sv_reass_t * ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm, ip6_sv_reass_per_thread_t *rt, @@ -325,7 +335,7 @@ again: if (now > reass->last_heard + rm->timeout) { - ip6_sv_reass_free (vm, rm, rt, reass); + ip6_sv_reass_free (vm, rm, rt, reass, true); reass = NULL; } } @@ -336,19 +346,17 @@ again: return reass; } - if (rt->reass_n >= rm->max_reass_n) + if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n) { reass = pool_elt_at_index (rt->pool, rt->lru_first); - ip6_sv_reass_free (vm, rm, rt, reass); + ip6_sv_reass_free (vm, rm, rt, reass, true); } - pool_get (rt->pool, reass); - clib_memset (reass, 0, sizeof (*reass)); + pool_get_zero (rt->pool, reass); + reass->first_fragment_clone_bi = ~0; reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter; ++rt->id_counter; - ip6_sv_reass_init (reass); ++rt->reass_n; - reass->lru_prev = reass->lru_next = ~0; if (~0 != rt->lru_last) @@ -376,7 +384,7 @@ again: int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2); if (rv) { - ip6_sv_reass_free (vm, rm, rt, reass); + ip6_sv_reass_free (vm, rm, rt, reass, false); reass = NULL; // if other worker created a context already work with the other copy if (-2 == rv) @@ -386,10 +394,23 @@ again: return reass; } +always_inline bool +ip6_sv_reass_is_complete (ip6_sv_reass_t *reass, bool extended) +{ + /* + * Both first and last fragments have to be seen for extended reassembly to + * be complete. Otherwise first fragment is enough. + */ + if (extended) + return reass->first_fragment_seen && reass->last_fragment_seen; + + return reass->first_fragment_seen; +} + always_inline ip6_sv_reass_rc_t ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, ip6_sv_reass_main_t *rm, ip6_sv_reass_t *reass, u32 bi0, - ip6_frag_hdr_t *frag_hdr) + ip6_frag_hdr_t *frag_hdr, bool extended) { vlib_buffer_t *fb = vlib_get_buffer (vm, bi0); vnet_buffer_opaque_t *fvnb = vnet_buffer (fb); @@ -417,26 +438,51 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, fvnb->ip.reass.range_first = fragment_first; fvnb->ip.reass.range_last = fragment_last; fvnb->ip.reass.next_range_bi = ~0; + void *l4_hdr = NULL; if (0 == fragment_first) { - if (!ip6_get_port - (vm, fb, fip, fb->current_length, &reass->ip_proto, - &reass->l4_src_port, &reass->l4_dst_port, - &reass->icmp_type_or_tcp_flags, &reass->tcp_ack_number, - &reass->tcp_seq_number)) + if (!ip6_get_port (vm, fb, fip, fb->current_length, &reass->ip_proto, + &reass->l4_src_port, &reass->l4_dst_port, + &reass->icmp_type_or_tcp_flags, + &reass->tcp_ack_number, &reass->tcp_seq_number, + &l4_hdr)) return IP6_SV_REASS_RC_UNSUPP_IP_PROTO; - reass->is_complete = true; + reass->first_fragment_seen = true; + if (extended) + { + reass->first_fragment_total_ip_header_length = + (u8 *) l4_hdr - (u8 *) fip; + vlib_buffer_t *clone = vlib_buffer_copy_no_chain ( + vm, fb, &reass->first_fragment_clone_bi); + if (!clone) + reass->first_fragment_clone_bi = ~0; + } + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FINISH, + ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FIRST_FRAG, reass->ip_proto, reass->l4_src_port, reass->l4_dst_port); } } + + if (!ip6_frag_hdr_more (frag_hdr)) + { + reass->last_fragment_seen = true; + reass->total_ip_payload_length = fragment_last - 1; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_LAST_FRAG, ~0, + ~0, ~0); + } + } + vec_add1 (reass->cached_buffers, bi0); - if (!reass->is_complete) + + if (!ip6_sv_reass_is_complete (reass, extended)) { if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) { @@ -453,8 +499,7 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, } always_inline bool -ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node, - vlib_buffer_t *b, +ip6_sv_reass_verify_upper_layer_present (vlib_buffer_t *b, ip6_ext_hdr_chain_t *hc) { int nh = hc->eh[hc->length - 1].protocol; @@ -464,16 +509,14 @@ ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node, icmp6_error_set_vnet_buffer ( b, ICMP6_parameter_problem, ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0); - b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER]; return false; } return true; } always_inline bool -ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t * vm, - vlib_buffer_t * b, - ip6_frag_hdr_t * frag_hdr) +ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t *vm, vlib_buffer_t *b, + ip6_frag_hdr_t *frag_hdr) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); ip6_header_t *ip = vlib_buffer_get_current (b); @@ -483,18 +526,18 @@ ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t * vm, (vnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr)); if (more_fragments && 0 != fragment_length % 8) { - icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem, - ICMP6_parameter_problem_erroneous_header_field, - (u8 *) & ip->payload_length - (u8 *) ip); + icmp6_error_set_vnet_buffer ( + b, ICMP6_parameter_problem, + ICMP6_parameter_problem_erroneous_header_field, + (u8 *) &ip->payload_length - (u8 *) ip); return false; } return true; } always_inline bool -ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm, - vlib_buffer_t * b, - ip6_frag_hdr_t * frag_hdr) +ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t *vm, vlib_buffer_t *b, + ip6_frag_hdr_t *frag_hdr) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr); @@ -504,26 +547,52 @@ ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm, if (fragment_first + fragment_length > 65535) { ip6_header_t *ip0 = vlib_buffer_get_current (b); - icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem, - ICMP6_parameter_problem_erroneous_header_field, - (u8 *) & frag_hdr->fragment_offset_and_more - - (u8 *) ip0); + icmp6_error_set_vnet_buffer ( + b, ICMP6_parameter_problem, + ICMP6_parameter_problem_erroneous_header_field, + (u8 *) &frag_hdr->fragment_offset_and_more - (u8 *) ip0); return false; } return true; } +always_inline void +ip6_sv_reass_reset_vnet_buffer2 (vlib_buffer_t *b) +{ + vnet_buffer2 (b)->ip.reass.pool_index = ~0; + vnet_buffer2 (b)->ip.reass.thread_index = ~0; + vnet_buffer2 (b)->ip.reass.id = ~0; +} + +always_inline void +ip6_sv_reass_set_vnet_buffer2_from_reass (vlib_main_t *vm, vlib_buffer_t *b, + ip6_sv_reass_t *reass) +{ + vnet_buffer2 (b)->ip.reass.thread_index = vm->thread_index; + vnet_buffer2 (b)->ip.reass.id = reass->id; + vnet_buffer2 (b)->ip.reass.pool_index = + reass - ip6_sv_reass_main.per_thread_data[vm->thread_index].pool; +} + +struct ip6_sv_reass_args +{ + bool is_feature; + bool is_output_feature; + bool custom_next; + bool custom_context; + bool extended; +}; + always_inline uword ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, bool is_feature, - bool custom_next, bool custom_context) + vlib_frame_t *frame, struct ip6_sv_reass_args a) { u32 *from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index; ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; u32 *context; - if (custom_context) + if (a.custom_context) context = vlib_frame_aux_args (frame); clib_spinlock_lock (&rt->lock); @@ -533,7 +602,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, while (n_left_from > 0) { - if (custom_context) + if (a.custom_context) vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, to_next_aux, n_left_to_next); else @@ -549,7 +618,11 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); - ip6_header_t *ip0 = vlib_buffer_get_current (b0); + ip6_header_t *ip0 = (ip6_header_t *) u8_ptr_add ( + vlib_buffer_get_current (b0), + (ptrdiff_t) (a.is_output_feature ? 1 : 0) * + vnet_buffer (b0)->ip.save_rewrite_length); + ip6_frag_hdr_t *frag_hdr; ip6_ext_hdr_chain_t hdr_chain; bool is_atomic_fragment = false; @@ -569,24 +642,29 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION || is_atomic_fragment) { - // this is a regular unfragmented packet or an atomic fragment - if (!ip6_get_port - (vm, b0, ip0, b0->current_length, - &(vnet_buffer (b0)->ip.reass.ip_proto), - &(vnet_buffer (b0)->ip.reass.l4_src_port), - &(vnet_buffer (b0)->ip.reass.l4_dst_port), - &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags), - &(vnet_buffer (b0)->ip.reass.tcp_ack_number), - &(vnet_buffer (b0)->ip.reass.tcp_seq_number))) + void *l4_hdr; + // this is a regular unfragmented packet or an atomic + // fragment + if (!ip6_get_port ( + vm, b0, ip0, b0->current_length, + &(vnet_buffer (b0)->ip.reass.ip_proto), + &(vnet_buffer (b0)->ip.reass.l4_src_port), + &(vnet_buffer (b0)->ip.reass.l4_dst_port), + &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags), + &(vnet_buffer (b0)->ip.reass.tcp_ack_number), + &(vnet_buffer (b0)->ip.reass.tcp_seq_number), &l4_hdr)) { error0 = IP6_ERROR_REASS_UNSUPP_IP_PROTO; b0->error = node->errors[error0]; next0 = IP6_SV_REASSEMBLY_NEXT_DROP; goto packet_enqueue; } + if (a.extended) + ip6_sv_reass_reset_vnet_buffer2 (b0); + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; - next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index : - IP6_SV_REASSEMBLY_NEXT_INPUT; + next0 = a.custom_next ? vnet_buffer (b0)->ip.reass.next_index : + IP6_SV_REASSEMBLY_NEXT_INPUT; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip6_sv_reass_add_trace ( @@ -604,9 +682,10 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (0 == ip6_frag_hdr_offset (frag_hdr)) { // first fragment - verify upper-layer is present - if (!ip6_sv_reass_verify_upper_layer_present (node, b0, - &hdr_chain)) + if (!ip6_sv_reass_verify_upper_layer_present (b0, &hdr_chain)) { + error0 = IP6_ERROR_REASS_MISSING_UPPER; + b0->error = node->errors[error0]; next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR; goto packet_enqueue; } @@ -614,6 +693,8 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (!ip6_sv_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) || !ip6_sv_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr)) { + error0 = IP6_ERROR_REASS_INVALID_FRAG_LEN; + b0->error = node->errors[error0]; next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR; goto packet_enqueue; } @@ -625,7 +706,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, kv.k.as_u64[1] = ip0->src_address.as_u64[1]; kv.k.as_u64[2] = ip0->dst_address.as_u64[0]; kv.k.as_u64[3] = ip0->dst_address.as_u64[1]; - if (custom_context) + if (a.custom_context) kv.k.as_u64[4] = (u64) *context << 32 | (u64) frag_hdr->identification; else @@ -644,7 +725,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF; vnet_buffer (b0)->ip.reass.owner_thread_index = kv.v.thread_index; - if (custom_context) + if (a.custom_context) forward_context = 1; goto packet_enqueue; } @@ -657,10 +738,11 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, goto packet_enqueue; } - if (reass->is_complete) + if (ip6_sv_reass_is_complete (reass, a.extended)) { + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; vnet_buffer (b0)->ip.reass.is_non_first_fragment = - ! !ip6_frag_hdr_offset (frag_hdr); + !!ip6_frag_hdr_offset (frag_hdr); vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = reass->icmp_type_or_tcp_flags; @@ -670,8 +752,12 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index : - IP6_SV_REASSEMBLY_NEXT_INPUT; + + if (a.extended) + ip6_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass); + + next0 = a.custom_next ? vnet_buffer (b0)->ip.reass.next_index : + IP6_SV_REASSEMBLY_NEXT_INPUT; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip6_sv_reass_add_trace ( @@ -682,7 +768,8 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } u32 counter = ~0; - switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr)) + switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr, + a.extended)) { case IP6_SV_REASS_RC_OK: /* nothing to do here */ @@ -703,55 +790,57 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (~0 != counter) { vlib_node_increment_counter (vm, node->node_index, counter, 1); - ip6_sv_reass_free (vm, rm, rt, reass); + ip6_sv_reass_free (vm, rm, rt, reass, true); goto next_packet; } - if (reass->is_complete) + if (ip6_sv_reass_is_complete (reass, a.extended)) { u32 idx; vec_foreach_index (idx, reass->cached_buffers) - { - u32 bi0 = vec_elt (reass->cached_buffers, idx); - if (0 == n_left_to_next) - { - vlib_put_next_frame (vm, node, next_index, - n_left_to_next); - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - } - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - b0 = vlib_get_buffer (vm, bi0); - if (is_feature) - { - vnet_feature_next (&next0, b0); - } - frag_hdr = - vlib_buffer_get_current (b0) + - vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset; - vnet_buffer (b0)->ip.reass.is_non_first_fragment = - ! !ip6_frag_hdr_offset (frag_hdr); - vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - reass->icmp_type_or_tcp_flags; - vnet_buffer (b0)->ip.reass.tcp_ack_number = - reass->tcp_ack_number; - vnet_buffer (b0)->ip.reass.tcp_seq_number = - reass->tcp_seq_number; - vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; - vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - ip6_sv_reass_add_trace ( - vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, - reass->ip_proto, reass->l4_src_port, reass->l4_dst_port); - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, - next0); - } + { + u32 bi0 = vec_elt (reass->cached_buffers, idx); + if (0 == n_left_to_next) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + if (a.is_feature || a.is_output_feature) + { + vnet_feature_next (&next0, b0); + } + frag_hdr = vlib_buffer_get_current (b0) + + vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset; + vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + !!ip6_frag_hdr_offset (frag_hdr); + vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; + vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; + vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; + if (a.extended) + ip6_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass); + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip6_sv_reass_add_trace ( + vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, + reass->ip_proto, reass->l4_src_port, + reass->l4_dst_port); + } + vlib_validate_buffer_enqueue_x1 ( + vm, node, next_index, to_next, n_left_to_next, bi0, next0); + } vec_set_len (reass->cached_buffers, 0); // buffers are owned by frame now } @@ -761,12 +850,14 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, to_next[0] = bi0; to_next += 1; n_left_to_next -= 1; - if (is_feature && IP6_ERROR_NONE == error0) + if ((a.is_feature || a.is_output_feature) && + IP6_ERROR_NONE == error0 && + IP6_SV_REASSEMBLY_NEXT_HANDOFF != next0) { b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - if (custom_context && forward_context) + if (a.custom_context && forward_context) { if (to_next_aux) { @@ -783,7 +874,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, next_packet: from += 1; - if (custom_context) + if (a.custom_context) context += 1; n_left_from -= 1; } @@ -795,13 +886,20 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, return frame->n_vectors; } -VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip6_sv_reass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */, - false /* custom next */, - false /* custom context */); + /* + * Extended reassembly not supported for non-feature nodes. + */ + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .custom_context = false, + .custom_next = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip6_sv_reass_node) = { @@ -820,13 +918,26 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node) = { }, }; -VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip6_sv_reass_node_feature) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */, - false /* custom next */, - false /* custom context */); + if (ip6_sv_reass_main.extended_refcount > 0) + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = true, + .is_output_feature = false, + .custom_context = false, + .custom_next = false, + .extended = true, + }); + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = true, + .is_output_feature = false, + .custom_context = false, + .custom_next = false, + .extended = false, + }); } VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = { @@ -846,18 +957,70 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = { }; VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = { - .arc_name = "ip6-unicast", - .node_name = "ip6-sv-reassembly-feature", - .runs_before = VNET_FEATURES ("ip6-lookup"), - .runs_after = 0, + .arc_name = "ip6-unicast", + .node_name = "ip6-sv-reassembly-feature", + .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = 0, +}; + +VLIB_NODE_FN (ip6_sv_reass_node_output_feature) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + if (ip6_sv_reass_main.extended_refcount > 0) + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = false, + .is_output_feature = true, + .custom_context = false, + .custom_next = false, + .extended = true, + }); + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = false, + .is_output_feature = true, + .custom_context = false, + .custom_next = false, + .extended = false, + }); +} + +VLIB_REGISTER_NODE (ip6_sv_reass_node_output_feature) = { + .name = "ip6-sv-reassembly-output-feature", + .vector_size = sizeof (u32), + .format_trace = format_ip6_sv_reass_trace, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, + .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input", + [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop", + [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-output-feature-hoff", + }, +}; + +VNET_FEATURE_INIT (ip6_sv_reassembly_output_feature) = { + .arc_name = "ip6-output", + .node_name = "ip6-sv-reassembly-output-feature", + .runs_after = 0, }; VLIB_NODE_FN (ip6_sv_reass_custom_context_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */, - true /* custom next */, - true /* custom context */); + /* + * Extended reassembly not supported for non-feature nodes. + */ + return ip6_sv_reassembly_inline (vm, node, frame, + (struct ip6_sv_reass_args){ + .is_feature = false, + .is_output_feature = false, + .custom_context = true, + .custom_next = true, + .extended = false, + }); } VLIB_REGISTER_NODE (ip6_sv_reass_custom_context_node) = { @@ -909,7 +1072,7 @@ typedef struct } ip6_rehash_cb_ctx; static int -ip6_rehash_cb (clib_bihash_kv_48_8_t * kv, void *_ctx) +ip6_rehash_cb (clib_bihash_kv_48_8_t *kv, void *_ctx) { ip6_rehash_cb_ctx *ctx = _ctx; if (clib_bihash_add_del_48_8 (ctx->new_hash, kv, 1)) @@ -936,8 +1099,8 @@ ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, u32 max_reassembly_length, u32 expire_walk_interval_ms) { u32 old_nbuckets = ip6_sv_reass_get_nbuckets (); - ip6_sv_reass_set_params (timeout_ms, max_reassemblies, - max_reassembly_length, expire_walk_interval_ms); + ip6_sv_reass_set_params (timeout_ms, max_reassemblies, max_reassembly_length, + expire_walk_interval_ms); vlib_process_signal_event (ip6_sv_reass_main.vlib_main, ip6_sv_reass_main.ip6_sv_reass_expire_node_idx, IP6_EVENT_CONFIG_CHANGED, 0); @@ -950,7 +1113,7 @@ ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, ctx.failure = 0; ctx.new_hash = &new_hash; clib_bihash_init_48_8 (&new_hash, "ip6-sv-reass", new_nbuckets, - new_nbuckets * 1024); + (uword) new_nbuckets * 1024); clib_bihash_foreach_key_value_pair_48_8 (&ip6_sv_reass_main.hash, ip6_rehash_cb, &ctx); if (ctx.failure) @@ -970,8 +1133,8 @@ ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies, } vnet_api_error_t -ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, - u32 * max_reassembly_length, u32 * expire_walk_interval_ms) +ip6_sv_reass_get (u32 *timeout_ms, u32 *max_reassemblies, + u32 *max_reassembly_length, u32 *expire_walk_interval_ms) { *timeout_ms = ip6_sv_reass_main.timeout_ms; *max_reassemblies = ip6_sv_reass_main.max_reass_n; @@ -981,7 +1144,7 @@ ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, } static clib_error_t * -ip6_sv_reass_init_function (vlib_main_t * vm) +ip6_sv_reass_init_function (vlib_main_t *vm) { ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; clib_error_t *error = 0; @@ -994,11 +1157,11 @@ ip6_sv_reass_init_function (vlib_main_t * vm) vec_validate (rm->per_thread_data, vlib_num_workers ()); ip6_sv_reass_per_thread_t *rt; vec_foreach (rt, rm->per_thread_data) - { - clib_spinlock_init (&rt->lock); - pool_alloc (rt->pool, rm->max_reass_n); - rt->lru_first = rt->lru_last = ~0; - } + { + clib_spinlock_init (&rt->lock); + pool_alloc (rt->pool, rm->max_reass_n); + rt->lru_first = rt->lru_last = ~0; + } node = vlib_get_node_by_name (vm, (u8 *) "ip6-sv-reassembly-expire-walk"); ASSERT (node); @@ -1011,14 +1174,7 @@ ip6_sv_reass_init_function (vlib_main_t * vm) nbuckets = ip6_sv_reass_get_nbuckets (); clib_bihash_init_48_8 (&rm->hash, "ip6-sv-reass", nbuckets, - nbuckets * 1024); - - node = vlib_get_node_by_name (vm, (u8 *) "ip6-drop"); - ASSERT (node); - rm->ip6_drop_idx = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error"); - ASSERT (node); - rm->ip6_icmp_error_idx = node->index; + (uword) nbuckets * 1024); if ((error = vlib_call_init_function (vm, ip_main_init))) return error; @@ -1026,6 +1182,8 @@ ip6_sv_reass_init_function (vlib_main_t * vm) rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0); + rm->fq_output_feature_index = + vlib_frame_queue_main_init (ip6_sv_reass_node_output_feature.index, 0); rm->fq_custom_context_index = vlib_frame_queue_main_init (ip6_sv_reass_custom_context_node.index, 0); @@ -1047,9 +1205,8 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, while (true) { - vlib_process_wait_for_event_or_clock (vm, - (f64) rm->expire_walk_interval_ms - / (f64) MSEC_PER_SEC); + vlib_process_wait_for_event_or_clock ( + vm, (f64) rm->expire_walk_interval_ms / (f64) MSEC_PER_SEC); event_type = vlib_process_get_events (vm, &event_data); switch (event_type) @@ -1078,19 +1235,20 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - pool_foreach_index (index, rt->pool) { - reass = pool_elt_at_index (rt->pool, index); - if (now > reass->last_heard + rm->timeout) - { - vec_add1 (pool_indexes_to_free, index); - } - } + pool_foreach_index (index, rt->pool) + { + reass = pool_elt_at_index (rt->pool, index); + if (now > reass->last_heard + rm->timeout) + { + vec_add1 (pool_indexes_to_free, index); + } + } int *i; - vec_foreach (i, pool_indexes_to_free) - { - ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); - ip6_sv_reass_free (vm, rm, rt, reass); - } + vec_foreach (i, pool_indexes_to_free) + { + ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); + ip6_sv_reass_free (vm, rm, rt, reass, true); + } clib_spinlock_unlock (&rt->lock); } @@ -1116,7 +1274,7 @@ VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = { }; static u8 * -format_ip6_sv_reass_key (u8 * s, va_list * args) +format_ip6_sv_reass_key (u8 *s, va_list *args) { ip6_sv_reass_key_t *key = va_arg (*args, ip6_sv_reass_key_t *); s = @@ -1127,35 +1285,34 @@ format_ip6_sv_reass_key (u8 * s, va_list * args) } static u8 * -format_ip6_sv_reass (u8 * s, va_list * args) +format_ip6_sv_reass (u8 *s, va_list *args) { vlib_main_t *vm = va_arg (*args, vlib_main_t *); ip6_sv_reass_t *reass = va_arg (*args, ip6_sv_reass_t *); - s = format (s, "ID: %lu, key: %U, trace_op_counter: %u\n", - reass->id, format_ip6_sv_reass_key, &reass->key, - reass->trace_op_counter); + s = format (s, "ID: %lu, key: %U, trace_op_counter: %u\n", reass->id, + format_ip6_sv_reass_key, &reass->key, reass->trace_op_counter); vlib_buffer_t *b; u32 *bip; u32 counter = 0; vec_foreach (bip, reass->cached_buffers) - { - u32 bi = *bip; - do - { - b = vlib_get_buffer (vm, bi); - s = format (s, " #%03u: bi: %u\n", counter, bi); - ++counter; - bi = b->next_buffer; - } - while (b->flags & VLIB_BUFFER_NEXT_PRESENT); - } + { + u32 bi = *bip; + do + { + b = vlib_get_buffer (vm, bi); + s = format (s, " #%03u: bi: %u\n", counter, bi); + ++counter; + bi = b->next_buffer; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } return s; } static clib_error_t * -show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, - CLIB_UNUSED (vlib_cli_command_t * lmd)) +show_ip6_sv_reass (vlib_main_t *vm, unformat_input_t *input, + CLIB_UNUSED (vlib_cli_command_t *lmd)) { ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; @@ -1179,9 +1336,10 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, clib_spinlock_lock (&rt->lock); if (details) { - pool_foreach (reass, rt->pool) { - vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass); - } + pool_foreach (reass, rt->pool) + { + vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass); + } } sum_reass_n += rt->reass_n; clib_spinlock_unlock (&rt->lock); @@ -1190,90 +1348,93 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "Current IP6 reassemblies count: %lu\n", (long unsigned) sum_reass_n); vlib_cli_output (vm, - "Maximum configured concurrent shallow virtual IP6 reassemblies per worker-thread: %lu\n", + "Maximum configured concurrent shallow virtual IP6 " + "reassemblies per worker-thread: %lu\n", (long unsigned) rm->max_reass_n); vlib_cli_output (vm, "Maximum configured amount of fragments per shallow " "virtual IP6 reassembly: %lu\n", (long unsigned) rm->max_reass_len); + vlib_cli_output ( + vm, "Maximum configured shallow virtual IP6 reassembly timeout: %lums\n", + (long unsigned) rm->timeout_ms); vlib_cli_output (vm, - "Maximum configured shallow virtual IP6 reassembly timeout: %lums\n", - (long unsigned) rm->timeout_ms); - vlib_cli_output (vm, - "Maximum configured shallow virtual IP6 reassembly expire walk interval: %lums\n", + "Maximum configured shallow virtual IP6 reassembly expire " + "walk interval: %lums\n", (long unsigned) rm->expire_walk_interval_ms); - vlib_cli_output (vm, "Buffers in use: %lu\n", - (long unsigned) sum_buffers_n); + vlib_cli_output (vm, "Buffers in use: %lu\n", (long unsigned) sum_buffers_n); return 0; } VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = { - .path = "show ip6-sv-reassembly", - .short_help = "show ip6-sv-reassembly [details]", - .function = show_ip6_sv_reass, + .path = "show ip6-sv-reassembly", + .short_help = "show ip6-sv-reassembly [details]", + .function = show_ip6_sv_reass, }; #ifndef CLIB_MARCH_VARIANT vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable) { - return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, - enable_disable); + return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, enable_disable); } #endif /* CLIB_MARCH_VARIANT */ -#define foreach_ip6_sv_reassembly_handoff_error \ -_(CONGESTION_DROP, "congestion drop") - +#define foreach_ip6_sv_reassembly_handoff_error \ + _ (CONGESTION_DROP, "congestion drop") typedef enum { -#define _(sym,str) IP6_SV_REASSEMBLY_HANDOFF_ERROR_##sym, +#define _(sym, str) IP6_SV_REASSEMBLY_HANDOFF_ERROR_##sym, foreach_ip6_sv_reassembly_handoff_error #undef _ IP6_SV_REASSEMBLY_HANDOFF_N_ERROR, } ip6_sv_reassembly_handoff_error_t; static char *ip6_sv_reassembly_handoff_error_strings[] = { -#define _(sym,string) string, +#define _(sym, string) string, foreach_ip6_sv_reassembly_handoff_error #undef _ }; typedef struct { - u32 next_worker_index; + u32 thread_index; } ip6_sv_reassembly_handoff_trace_t; static u8 * -format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args) +format_ip6_sv_reassembly_handoff_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_sv_reassembly_handoff_trace_t *t = va_arg (*args, ip6_sv_reassembly_handoff_trace_t *); - s = - format (s, "ip6-sv-reassembly-handoff: next-worker %d", - t->next_worker_index); + s = format (s, "to thread-index: %u", t->thread_index); return s; } +struct ip6_sv_reass_hoff_args +{ + bool is_feature; + bool is_output_feature; + bool custom_context; +}; + always_inline uword ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, bool is_feature, - bool custom_context) + vlib_frame_t *frame, + struct ip6_sv_reass_hoff_args a) { ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u32 n_enq, n_left_from, *from, *context; u16 thread_indices[VLIB_FRAME_SIZE], *ti; - u32 fq_index; from = vlib_frame_vector_args (frame); - if (custom_context) + if (a.custom_context) context = vlib_frame_aux_args (frame); n_left_from = frame->n_vectors; vlib_get_buffers (vm, from, bufs, n_left_from); @@ -1281,28 +1442,28 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, b = bufs; ti = thread_indices; - fq_index = (is_feature) ? - rm->fq_feature_index : - (custom_context ? rm->fq_custom_context_index : rm->fq_index); + const u32 fq_index = a.is_output_feature ? rm->fq_output_feature_index : + a.is_feature ? rm->fq_feature_index : + a.custom_context ? rm->fq_custom_context_index : + rm->fq_index; while (n_left_from > 0) { ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index; - if (PREDICT_FALSE - ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b[0]->flags & VLIB_BUFFER_IS_TRACED))) + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b[0]->flags & VLIB_BUFFER_IS_TRACED))) { ip6_sv_reassembly_handoff_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->next_worker_index = ti[0]; + t->thread_index = ti[0]; } n_left_from -= 1; ti += 1; b += 1; } - if (custom_context) + if (a.custom_context) n_enq = vlib_buffer_enqueue_to_thread_with_aux ( vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1); else @@ -1310,18 +1471,20 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vm, node, fq_index, from, thread_indices, frame->n_vectors, 1); if (n_enq < frame->n_vectors) - vlib_node_increment_counter (vm, node->node_index, - IP6_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP, - frame->n_vectors - n_enq); + vlib_node_increment_counter ( + vm, node->node_index, IP6_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); return frame->n_vectors; } -VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip6_sv_reassembly_handoff_inline ( - vm, node, frame, false /* is_feature */, false /* custom_context */); + vm, node, frame, + (struct ip6_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = false, + .custom_context = false }); } VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = { @@ -1338,15 +1501,16 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = { }, }; - -VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip6_sv_reassembly_handoff_inline ( - vm, node, frame, true /* is_feature */, false /* custom_context */); + vm, node, frame, + (struct ip6_sv_reass_hoff_args){ .is_feature = true, + .is_output_feature = false, + .custom_context = false }); } - VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = { .name = "ip6-sv-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1361,11 +1525,38 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = { }, }; +VLIB_NODE_FN (ip6_sv_reassembly_output_feature_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_sv_reassembly_handoff_inline ( + vm, node, frame, + (struct ip6_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = true, + .custom_context = false }); +} + +VLIB_REGISTER_NODE (ip6_sv_reassembly_output_feature_handoff_node) = { + .name = "ip6-sv-reass-output-feature-hoff", + .vector_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings), + .error_strings = ip6_sv_reassembly_handoff_error_strings, + .format_trace = format_ip6_sv_reassembly_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + VLIB_NODE_FN (ip6_sv_reassembly_custom_context_handoff_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { return ip6_sv_reassembly_handoff_inline ( - vm, node, frame, false /* is_feature */, true /* custom_context */); + vm, node, frame, + (struct ip6_sv_reass_hoff_args){ .is_feature = false, + .is_output_feature = false, + .custom_context = true }); } VLIB_REGISTER_NODE (ip6_sv_reassembly_custom_context_handoff_node) = { @@ -1393,10 +1584,10 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) { if (!rm->feature_use_refcount_per_intf[sw_if_index]) { - ++rm->feature_use_refcount_per_intf[sw_if_index]; - return vnet_feature_enable_disable ("ip6-unicast", - "ip6-sv-reassembly-feature", - sw_if_index, 1, 0, 0); + int rv = vnet_feature_enable_disable ( + "ip6-unicast", "ip6-sv-reassembly-feature", sw_if_index, 1, 0, 0); + if (0 != rv) + return rv; } ++rm->feature_use_refcount_per_intf[sw_if_index]; } @@ -1404,8 +1595,35 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) { --rm->feature_use_refcount_per_intf[sw_if_index]; if (!rm->feature_use_refcount_per_intf[sw_if_index]) - return vnet_feature_enable_disable ("ip6-unicast", - "ip6-sv-reassembly-feature", + return vnet_feature_enable_disable ( + "ip6-unicast", "ip6-sv-reassembly-feature", sw_if_index, 0, 0, 0); + } + return 0; +} + +vnet_api_error_t +ip6_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) +{ + ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; + vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index); + if (is_enable) + { + if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) + { + int rv = vnet_feature_enable_disable ( + "ip6-output", "ip6-sv-reassembly-output-feature", sw_if_index, 1, + 0, 0); + if (0 != rv) + return rv; + } + ++rm->output_feature_use_refcount_per_intf[sw_if_index]; + } + else + { + --rm->output_feature_use_refcount_per_intf[sw_if_index]; + if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) + return vnet_feature_enable_disable ("ip6-output", + "ip6-sv-reassembly-output-feature", sw_if_index, 0, 0, 0); } return 0; @@ -1418,6 +1636,57 @@ ip6_sv_reass_custom_context_register_next_node (uword node_index) vlib_get_main (), ip6_sv_reassembly_custom_context_handoff_node.index, node_index); } + +void +ip6_sv_reass_enable_disable_extended (bool is_enable) +{ + if (is_enable) + ++ip6_sv_reass_main.extended_refcount; + else + --ip6_sv_reass_main.extended_refcount; +} + +int +ip6_sv_reass_extended_lock (vlib_buffer_t *b, + struct ip6_sv_lock_unlock_args *a) +{ + ip6_sv_reass_per_thread_t *per_thread = + &ip6_sv_reass_main + .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index]; + + if (!vec_is_member (ip6_sv_reass_main.per_thread_data, per_thread)) + return -1; + + clib_spinlock_lock (&per_thread->lock); + if (pool_is_free_index (per_thread->pool, + vnet_buffer2 (b)->ip.reass.pool_index)) + goto fail; + + ip6_sv_reass_t *reass = pool_elt_at_index ( + per_thread->pool, vnet_buffer2 (b)->ip.reass.pool_index); + if (vnet_buffer2 (b)->ip.reass.id == reass->id) + { + *a->total_ip_payload_length = reass->total_ip_payload_length; + + *a->first_fragment_buffer_index = reass->first_fragment_clone_bi; + *a->first_fragment_total_ip_header_length = + reass->first_fragment_total_ip_header_length; + return 0; + } + +fail: + clib_spinlock_unlock (&per_thread->lock); + return -1; +} + +void +ip6_sv_reass_extended_unlock (vlib_buffer_t *b) +{ + ip6_sv_reass_per_thread_t *per_thread = + &ip6_sv_reass_main + .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index]; + clib_spinlock_unlock (&per_thread->lock); +} #endif /* diff --git a/src/vnet/ip/reass/ip6_sv_reass.h b/src/vnet/ip/reass/ip6_sv_reass.h index 7dc9df132dd..9220581ffd3 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.h +++ b/src/vnet/ip/reass/ip6_sv_reass.h @@ -23,6 +23,7 @@ #ifndef __included_ip6_sv_reass_h__ #define __included_ip6_sv_reass_h__ +#include <stdbool.h> #include <vnet/api_errno.h> #include <vnet/vnet.h> @@ -42,6 +43,36 @@ vnet_api_error_t ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies, vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable); +vnet_api_error_t +ip6_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, + int is_enable); + +/* + * Enable or disable extended reassembly. + * + * Extended reassembly means that fragments are cached until both first and + * last fragments are seen. Furthermore, first fragment buffer will be cloned + * and stored in reassembly context for later retrieval. + */ +void ip6_sv_reass_enable_disable_extended (bool is_enable); + +struct ip6_sv_lock_unlock_args +{ + u32 *total_ip_payload_length; + u32 *first_fragment_buffer_index; + u32 *first_fragment_total_ip_header_length; +}; + +/* + * Lock thread-level lock and fetch information from reassembly context. + * Uses vnet_buffer2 data filled by extended reassembly. + * + * Returns 0 on success, -1 otherwise. + */ +int ip6_sv_reass_extended_lock (vlib_buffer_t *b, + struct ip6_sv_lock_unlock_args *a); + +void ip6_sv_reass_extended_unlock (vlib_buffer_t *b); int ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); uword ip6_sv_reass_custom_context_register_next_node (uword node_index); diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c index 48f7deadda3..6a25f6c583c 100644 --- a/src/vnet/ipsec/ipsec_input.c +++ b/src/vnet/ipsec/ipsec_input.c @@ -428,11 +428,12 @@ ipsec_ah_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, always_inline void ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, - esp_header_t *esp0, u32 thread_index, - ipsec_spd_t *spd0, vlib_buffer_t **b, - vlib_node_runtime_t *node, u64 *ipsec_bypassed, - u64 *ipsec_dropped, u64 *ipsec_matched, - u64 *ipsec_unprocessed, u16 *next) + udp_header_t *udp0, esp_header_t *esp0, + u32 thread_index, ipsec_spd_t *spd0, + vlib_buffer_t **b, vlib_node_runtime_t *node, + u64 *ipsec_bypassed, u64 *ipsec_dropped, + u64 *ipsec_matched, u64 *ipsec_unprocessed, + u16 *next) { ipsec_policy_t *p0 = NULL; @@ -445,17 +446,40 @@ ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, /* if flow cache is enabled, first search through flow cache for a * policy match for either protect, bypass or discard rules, in that - * order. if no match is found search_flow_cache is set to false (1) + * order. if no match is found search_flow_cache is set to false (0) * and we revert back to linear search */ - search_flow_cache = im->input_flow_cache_flag; + udp_or_esp: - if (esp0->spi == 0) + /* RFC5996 Section 2.23: "To tunnel IKE packets over UDP port 4500, the IKE + * header has four octets of zero prepended and the result immediately + * follows the UDP header. To tunnel ESP packets over UDP port 4500, the ESP + * header immediately follows the UDP header. Since the first four octets of + * the ESP header contain the SPI, and the SPI cannot validly be zero, it is + * always possible to distinguish ESP and IKE messages." + */ + + /* RFC3948 Section 2.1 UDP-Encapsulated ESP Header Format: + * "The UDP header is a standard [RFC0768] header, where + * - the Source Port and Destination Port MUST be the same as that used + * by IKE traffic, + * - the IPv4 UDP Checksum SHOULD be transmitted as a zero value, and + * - receivers MUST NOT depend on the UDP checksum being a zero value. + * The SPI field in the ESP header MUST NOT be a zero value." + */ + + /* + * UDP-IKEv2: UDP protocol, checksum != 0, SPI == 0 and port 500/4500 + * UDP-ESP: UDP protocol, checksum == 0, SPI != 0 and port 4500 + */ + if ((((udp0 != NULL) && (udp0->checksum == 0)) || (udp0 == NULL)) && + (esp0->spi == 0)) { - /* RFC 4303, section 2.1: The SPI value of zero (0 is reserved for - * local, implementation-specific use and MUST NOT be sent on the wire. + /* RFC4303 Section 2.1: "The SPI value of zero (0 is reserved for + * local, implementation-specific use and MUST NOT be sent on the + * wire." */ *ipsec_unprocessed += 1; next[0] = IPSEC_INPUT_NEXT_DROP; @@ -703,27 +727,30 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, udp_header_t *udp0 = NULL; udp0 = (udp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - /* RFC5996 Section 2.23 "Port 4500 is reserved for + /* RFC5996 Section 2.23: "Port 4500 is reserved for * UDP-encapsulated ESP and IKE." + * RFC5996 Section 3.1: "IKE messages use UDP ports 500 and/or 4500" */ - if (clib_host_to_net_u16 (4500) == udp0->dst_port) - { - esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t)); - - ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, - b, node, &ipsec_bypassed, - &ipsec_dropped, &ipsec_matched, - &ipsec_unprocessed, next); - if (ipsec_bypassed > 0) - goto ipsec_bypassed; - } + if ((clib_host_to_net_u16 (500) == udp0->dst_port) || + (clib_host_to_net_u16 (4500) == udp0->dst_port)) + { + esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t)); + + ipsec_esp_packet_process (vm, im, ip0, udp0, esp0, thread_index, + spd0, b, node, &ipsec_bypassed, + &ipsec_dropped, &ipsec_matched, + &ipsec_unprocessed, next); + if (ipsec_bypassed > 0) + goto ipsec_bypassed; + } } else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)) { esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, b, - node, &ipsec_bypassed, &ipsec_dropped, - &ipsec_matched, &ipsec_unprocessed, next); + ipsec_esp_packet_process (vm, im, ip0, NULL, esp0, thread_index, + spd0, b, node, &ipsec_bypassed, + &ipsec_dropped, &ipsec_matched, + &ipsec_unprocessed, next); if (ipsec_bypassed > 0) goto ipsec_bypassed; } diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c index 3f2de2604b2..6cd9cbd3be5 100644 --- a/src/vnet/pg/cli.c +++ b/src/vnet/pg/cli.c @@ -672,7 +672,7 @@ create_pg_if_cmd_fn (vlib_main_t * vm, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "interface pg%u", &if_id)) + if (unformat (line_input, "pg%u", &if_id)) ; else if (unformat (line_input, "coalesce-enabled")) coalesce_enabled = 1; @@ -709,13 +709,60 @@ done: } VLIB_CLI_COMMAND (create_pg_if_cmd, static) = { - .path = "create packet-generator", + .path = "create packet-generator interface", .short_help = "create packet-generator interface <interface name>" " [gso-enabled gso-size <size> [coalesce-enabled]]" " [mode <ethernet | ip4 | ip6>]", .function = create_pg_if_cmd_fn, }; +static clib_error_t * +delete_pg_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index = ~0; + int rv = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing <interface>"); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else + { + return clib_error_create ("unknown input `%U'", + format_unformat_error, input); + } + } + unformat_free (line_input); + + if (sw_if_index == ~0) + return clib_error_return (0, + "please specify interface name or sw_if_index"); + + rv = pg_interface_delete (sw_if_index); + if (rv == VNET_API_ERROR_INVALID_SW_IF_INDEX) + return clib_error_return (0, "not a pg interface"); + else if (rv != 0) + return clib_error_return (0, "error on deleting pg interface"); + + return 0; +} + +VLIB_CLI_COMMAND (delete_pg_if_cmd, static) = { + .path = "delete packet-generator interface", + .short_help = "delete packet-generator interface {<interface name> | " + "sw_if_index <sw_idx>}", + .function = delete_pg_if_cmd_fn, +}; + /* Dummy init function so that we can be linked in. */ static clib_error_t * pg_cli_init (vlib_main_t * vm) diff --git a/src/vnet/pg/pg.api b/src/vnet/pg/pg.api index 4f531fb1f5e..7c6fdcc97cf 100644 --- a/src/vnet/pg/pg.api +++ b/src/vnet/pg/pg.api @@ -18,7 +18,7 @@ This file defines packet-generator interface APIs. */ -option version = "2.0.0"; +option version = "2.1.0"; import "vnet/interface_types.api"; @@ -75,6 +75,18 @@ define pg_create_interface_v2_reply vl_api_interface_index_t sw_if_index; }; +/** \brief PacketGenerator delete interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index +*/ +autoreply define pg_delete_interface +{ + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; +}; + /** \brief PacketGenerator interface enable/disable packet coalesce @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/pg/pg.h b/src/vnet/pg/pg.h index bede747428c..5e99d9af9f6 100644 --- a/src/vnet/pg/pg.h +++ b/src/vnet/pg/pg.h @@ -184,7 +184,11 @@ typedef struct pg_stream_t always_inline void pg_buffer_index_free (pg_buffer_index_t * bi) { + vlib_main_t *vm = vlib_get_main (); + word n_alloc; vec_free (bi->edits); + n_alloc = clib_fifo_elts (bi->buffer_fifo); + vlib_buffer_free (vm, bi->buffer_fifo, n_alloc); clib_fifo_free (bi->buffer_fifo); } @@ -396,6 +400,8 @@ u32 pg_interface_add_or_get (pg_main_t *pg, u32 stream_index, u8 gso_enabled, u32 gso_size, u8 coalesce_enabled, pg_interface_mode_t mode); +int pg_interface_delete (u32 sw_if_index); + always_inline pg_node_t * pg_get_node (uword node_index) { diff --git a/src/vnet/pg/pg_api.c b/src/vnet/pg/pg_api.c index e5d0a08a527..57fb40cdda4 100644 --- a/src/vnet/pg/pg_api.c +++ b/src/vnet/pg/pg_api.c @@ -63,18 +63,36 @@ vl_api_pg_create_interface_v2_t_handler (vl_api_pg_create_interface_v2_t *mp) } static void +vl_api_pg_delete_interface_t_handler (vl_api_pg_delete_interface_t *mp) +{ + vl_api_pg_delete_interface_reply_t *rmp; + pg_main_t *pg = &pg_main; + u32 sw_if_index = ~0; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + sw_if_index = ntohl (mp->sw_if_index); + + rv = pg_interface_delete (sw_if_index); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_PG_DELETE_INTERFACE_REPLY); +} + +static void vl_api_pg_interface_enable_disable_coalesce_t_handler (vl_api_pg_interface_enable_disable_coalesce_t * mp) { vl_api_pg_interface_enable_disable_coalesce_reply_t *rmp; + pg_main_t *pg = &pg_main; + vnet_main_t *vnm = vnet_get_main (); int rv = 0; VALIDATE_SW_IF_INDEX (mp); u32 sw_if_index = ntohl (mp->sw_if_index); - pg_main_t *pg = &pg_main; - vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index); diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c index cf3d37d5e9e..440e285031a 100644 --- a/src/vnet/pg/stream.c +++ b/src/vnet/pg/stream.c @@ -325,6 +325,50 @@ pg_interface_add_or_get (pg_main_t *pg, u32 if_id, u8 gso_enabled, return i; } +int +pg_interface_delete (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + pg_main_t *pm = &pg_main; + pg_interface_t *pi; + vnet_hw_interface_t *hw; + uword *p; + + hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index); + if (hw == NULL || pg_dev_class.index != hw->dev_class_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + pi = pool_elt_at_index (pm->interfaces, hw->dev_instance); + + vnet_hw_interface_set_flags (vnm, pi->hw_if_index, 0); + vnet_sw_interface_set_flags (vnm, pi->sw_if_index, 0); + + if (pi->mode == PG_MODE_ETHERNET) + ethernet_delete_interface (vnm, pi->hw_if_index); + else + vnet_delete_hw_interface (vnm, pi->hw_if_index); + + pi->hw_if_index = ~0; + + if (pi->coalesce_enabled) + pg_interface_enable_disable_coalesce (pi, 0, ~0); + + if (vlib_num_workers ()) + { + clib_mem_free ((void *) pi->lockp); + pi->lockp = 0; + } + + vec_del1 (pm->if_index_by_sw_if_index, sw_if_index); + p = hash_get (pm->if_index_by_if_id, pi->id); + if (p) + hash_unset (pm->if_index_by_if_id, pi->id); + + clib_memset (pi, 0, sizeof (*pi)); + pool_put (pm->interfaces, pi); + return 0; +} + static void do_edit (pg_stream_t * stream, pg_edit_group_t * g, pg_edit_t * e, uword want_commit) @@ -571,18 +615,12 @@ void pg_stream_del (pg_main_t * pg, uword index) { pg_stream_t *s; - pg_buffer_index_t *bi; s = pool_elt_at_index (pg->streams, index); pg_stream_enable_disable (pg, s, /* want_enabled */ 0); hash_unset_mem (pg->stream_index_by_name, s->name); - vec_foreach (bi, s->buffer_indices) - { - clib_fifo_free (bi->buffer_fifo); - } - pg_stream_free (s); pool_put (pg->streams, s); } diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index 7c63ada2774..5c52adb853f 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -175,8 +175,8 @@ app_listener_alloc_and_init (application_t * app, { session_type_t local_st; - local_st = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, - sep->is_ip4); + local_st = + session_type_from_proto_and_ip (TRANSPORT_PROTO_CT, sep->is_ip4); ls = listen_session_alloc (0, local_st); ls->app_wrk_index = sep->app_wrk_index; lh = session_handle (ls); @@ -1430,7 +1430,7 @@ vnet_connect (vnet_connect_args_t *a) session_error_t rv; a->sep_ext.original_tp = a->sep_ext.transport_proto; - a->sep_ext.transport_proto = TRANSPORT_PROTO_NONE; + a->sep_ext.transport_proto = TRANSPORT_PROTO_CT; rv = app_worker_connect_session (client_wrk, &a->sep_ext, &a->sh); a->sep_ext.transport_proto = a->sep_ext.original_tp; if (!rv || rv != SESSION_E_LOCAL_CONNECT) diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index f175e4a58c6..d5656ff8341 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -396,6 +396,7 @@ typedef struct session_accepted_msg_ transport_endpoint_t lcl; transport_endpoint_t rmt; u8 flags; + /* TODO(fcoras) maybe refactor to pass as transport attr */ u32 original_dst_ip4; u16 original_dst_port; } __clib_packed session_accepted_msg_t; @@ -909,17 +910,63 @@ typedef struct app_sapi_msg_ } __clib_packed app_sapi_msg_t; static inline void -session_endpoint_alloc_ext_cfg (session_endpoint_cfg_t *sep_ext, - transport_endpt_ext_cfg_type_t type) +session_endpoint_init_ext_cfgs (session_endpoint_cfg_t *sep_ext, u32 len) { - transport_endpt_ext_cfg_t *cfg; - u32 cfg_size; + sep_ext->ext_cfgs.len = len; + sep_ext->ext_cfgs.data = clib_mem_alloc (len); + clib_memset (sep_ext->ext_cfgs.data, 0, len); +} + +static inline transport_endpt_ext_cfg_t * +session_endpoint_add_ext_cfg (session_endpoint_cfg_t *sep_ext, + transport_endpt_ext_cfg_type_t type, u16 len) +{ + transport_endpt_ext_cfg_t *ext_cfg; + + if (!sep_ext->ext_cfgs.len) + session_endpoint_init_ext_cfgs (sep_ext, + TRANSPORT_ENDPT_EXT_CFGS_CHUNK_SIZE); + + ASSERT (sep_ext->ext_cfgs.tail_offset + len + + TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE < + sep_ext->ext_cfgs.len); + ext_cfg = (transport_endpt_ext_cfg_t *) (sep_ext->ext_cfgs.data + + sep_ext->ext_cfgs.tail_offset); + ext_cfg->len = len; + ext_cfg->type = type; + sep_ext->ext_cfgs.tail_offset += len + TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE; + return ext_cfg; +} + +static inline transport_endpt_ext_cfg_t * +session_endpoint_get_ext_cfg (session_endpoint_cfg_t *sep_ext, + transport_endpt_ext_cfg_type_t type) +{ + transport_endpt_ext_cfg_t *ext_cfg; + + if (!sep_ext->ext_cfgs.len) + return 0; + + ext_cfg = (transport_endpt_ext_cfg_t *) sep_ext->ext_cfgs.data; + while ((u8 *) ext_cfg < + sep_ext->ext_cfgs.data + sep_ext->ext_cfgs.tail_offset) + { + if (ext_cfg->type == type) + return ext_cfg; + ext_cfg = (transport_endpt_ext_cfg_t *) (ext_cfg->data + ext_cfg->len); + } + return 0; +} - cfg_size = sizeof (transport_endpt_ext_cfg_t); - cfg = clib_mem_alloc (cfg_size); - clib_memset (cfg, 0, cfg_size); - cfg->type = type; - sep_ext->ext_cfg = cfg; +static inline void +session_endpoint_free_ext_cfgs (session_endpoint_cfg_t *sep_ext) +{ + if (!sep_ext->ext_cfgs.len) + return; + clib_mem_free (sep_ext->ext_cfgs.data); + sep_ext->ext_cfgs.len = 0; + sep_ext->ext_cfgs.tail_offset = 0; + sep_ext->ext_cfgs.data = 0; } #endif /* __included_uri_h__ */ diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c index 3ac2ba4cfbc..afa39f6ded3 100644 --- a/src/vnet/session/application_local.c +++ b/src/vnet/session/application_local.c @@ -710,7 +710,7 @@ ct_accept_one (u32 thread_index, u32 ho_index) sct->c_is_ip4 = cct->c_is_ip4; clib_memcpy (&sct->c_lcl_ip, &cct->c_rmt_ip, sizeof (cct->c_rmt_ip)); sct->client_wrk = cct->client_wrk; - sct->c_proto = TRANSPORT_PROTO_NONE; + sct->c_proto = TRANSPORT_PROTO_CT; sct->client_opaque = cct->client_opaque; sct->actual_tp = cct->actual_tp; @@ -723,8 +723,8 @@ ct_accept_one (u32 thread_index, u32 ho_index) */ ss = session_alloc (thread_index); ll = listen_session_get (ll_index); - ss->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, - sct->c_is_ip4); + ss->session_type = + session_type_from_proto_and_ip (TRANSPORT_PROTO_CT, sct->c_is_ip4); ss->connection_index = sct->c_c_index; ss->listener_handle = listen_session_get_handle (ll); session_set_state (ss, SESSION_STATE_CREATED); @@ -889,7 +889,7 @@ ct_connect (app_worker_t *client_wrk, session_t *ll, ho->client_opaque = sep->opaque; ho->client_wrk = client_wrk->wrk_index; ho->peer_index = ll->session_index; - ho->c_proto = TRANSPORT_PROTO_NONE; + ho->c_proto = TRANSPORT_PROTO_CT; ho->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; clib_memcpy (&ho->c_rmt_ip, &sep->ip, sizeof (sep->ip)); ho->flags |= CT_CONN_F_CLIENT; @@ -1425,9 +1425,9 @@ ct_session_tx (session_t * s) static clib_error_t * ct_transport_init (vlib_main_t * vm) { - transport_register_protocol (TRANSPORT_PROTO_NONE, &cut_thru_proto, + transport_register_protocol (TRANSPORT_PROTO_CT, &cut_thru_proto, FIB_PROTOCOL_IP4, ~0); - transport_register_protocol (TRANSPORT_PROTO_NONE, &cut_thru_proto, + transport_register_protocol (TRANSPORT_PROTO_CT, &cut_thru_proto, FIB_PROTOCOL_IP6, ~0); return 0; } diff --git a/src/vnet/session/application_namespace.c b/src/vnet/session/application_namespace.c index 8b06331d803..f5b70a9c4cf 100644 --- a/src/vnet/session/application_namespace.c +++ b/src/vnet/session/application_namespace.c @@ -52,6 +52,14 @@ app_namespace_get (u32 index) } app_namespace_t * +app_namespace_get_if_valid (u32 index) +{ + if (pool_is_free_index (app_namespace_pool, index)) + return 0; + return pool_elt_at_index (app_namespace_pool, index); +} + +app_namespace_t * app_namespace_get_from_id (const u8 *ns_id) { u32 index = app_namespace_index_from_id (ns_id); @@ -127,7 +135,7 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a) st = session_table_alloc (); session_table_init (st, FIB_PROTOCOL_MAX); st->is_local = 1; - st->appns_index = app_namespace_index (app_ns); + vec_add1 (st->appns_index, app_namespace_index (app_ns)); app_ns->local_table_index = session_table_index (st); if (a->sock_name) { @@ -173,8 +181,10 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a) if (app_ns->sock_name) vec_free (app_ns->sock_name); - session_lookup_table_cleanup (FIB_PROTOCOL_IP4, app_ns->ip4_fib_index); - session_lookup_table_cleanup (FIB_PROTOCOL_IP6, app_ns->ip6_fib_index); + session_lookup_table_cleanup (FIB_PROTOCOL_IP4, app_ns->ip4_fib_index, + ns_index); + session_lookup_table_cleanup (FIB_PROTOCOL_IP6, app_ns->ip6_fib_index, + ns_index); app_namespace_free (app_ns); } diff --git a/src/vnet/session/application_namespace.h b/src/vnet/session/application_namespace.h index b441e3c48f2..63ff7cc58a2 100644 --- a/src/vnet/session/application_namespace.h +++ b/src/vnet/session/application_namespace.h @@ -77,6 +77,7 @@ typedef struct _vnet_app_namespace_add_del_args app_namespace_t *app_namespace_alloc (const u8 *ns_id); app_namespace_t *app_namespace_get (u32 index); +app_namespace_t *app_namespace_get_if_valid (u32 index); app_namespace_t *app_namespace_get_from_id (const u8 *ns_id); u32 app_namespace_index (app_namespace_t * app_ns); const u8 *app_namespace_id (app_namespace_t * app_ns); diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api index f678f8bd80f..2805546db52 100644 --- a/src/vnet/session/session.api +++ b/src/vnet/session/session.api @@ -13,7 +13,7 @@ * limitations under the License. */ -option version = "4.0.2"; +option version = "4.0.3"; import "vnet/interface_types.api"; import "vnet/ip/ip_types.api"; @@ -411,6 +411,7 @@ autoreply define session_rule_add_del { */ define session_rules_dump { + option deprecated; u32 client_index; u32 context; }; @@ -434,6 +435,7 @@ define session_rules_dump */ define session_rules_details { + option deprecated; u32 context; vl_api_transport_proto_t transport_proto; vl_api_prefix_t lcl; @@ -446,6 +448,49 @@ define session_rules_details string tag[64]; }; +/** \brief Dump session rules + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + */ +define session_rules_v2_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Session rules details + @param context - sender context, to match reply w/ request + @param transport_proto - transport protocol + @param is_ip4 - flag to indicate if ip addresses are ip4 or 6 + @param lcl_ip - local ip + @param lcl_plen - local prefix length + @param rmt_ip - remote ip + @param rmt_ple - remote prefix length + @param lcl_port - local port + @param rmt_port - remote port + @param action_index - the only action defined now is forward to + application with index action_index + @param scope - enum that indicates scope of the rule: global or local. + If 0, default is global, 1 is global 2 is local, 3 is both + @param tag - tag + @param count - count of the number of appns_index + @param appns_index - application namespaces where rule is to be applied to + */ +define session_rules_v2_details +{ + u32 context; + vl_api_transport_proto_t transport_proto; + vl_api_prefix_t lcl; + vl_api_prefix_t rmt; + u16 lcl_port; + u16 rmt_port; + u32 action_index; + vl_api_session_rule_scope_t scope; + string tag[64]; + u32 count; + u32 appns_index[count]; +}; + autoreply define session_sdl_add_del { option deprecated; u32 client_index; @@ -500,6 +545,7 @@ define session_sdl_details */ define session_sdl_v2_dump { + option deprecated; u32 client_index; u32 context; }; @@ -514,6 +560,7 @@ define session_sdl_v2_dump */ define session_sdl_v2_details { + option deprecated; u32 context; vl_api_prefix_t rmt; u32 action_index; @@ -521,6 +568,35 @@ define session_sdl_v2_details string tag[64]; }; +/** \brief Dump session sdl v3 + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + */ +define session_sdl_v3_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Session sdl details v3 + @param context - sender context, to match reply w/ request + @param rmt - remote prefix + @param action_index - the only action defined now is forward to + application with index action_index + @param tag - tag + @param count - count of the number of appns_index + @param appns_index - application namespaces where rule is to be applied to + */ +define session_sdl_v3_details +{ + u32 context; + vl_api_prefix_t rmt; + u32 action_index; + string tag[64]; + u32 count; + u32 appns_index[count]; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index ac02281cf5c..8a9fc4b2371 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -1981,11 +1981,14 @@ session_stats_collector_fn (vlib_stats_collector_data_t *d) } vlib_stats_set_gauge (d->private_data, n_sessions); + vlib_stats_set_gauge (smm->stats_seg_idx.tp_port_alloc_max_tries, + transport_port_alloc_max_tries ()); } static void session_stats_collector_init (void) { + session_main_t *smm = &session_main; vlib_stats_collector_reg_t reg = {}; reg.entry_index = @@ -1994,6 +1997,10 @@ session_stats_collector_init (void) reg.collect_fn = session_stats_collector_fn; vlib_stats_register_collector_fn (®); vlib_stats_validate (reg.entry_index, 0, vlib_get_n_threads ()); + + smm->stats_seg_idx.tp_port_alloc_max_tries = + vlib_stats_add_gauge ("/sys/session/transport_port_alloc_max_tries"); + vlib_stats_set_gauge (smm->stats_seg_idx.tp_port_alloc_max_tries, 0); } static clib_error_t * diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 24150fbbcd1..823bdcb02af 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -197,6 +197,11 @@ typedef enum #undef _ } session_rt_engine_type_t; +typedef struct session_stats_seg_indices_ +{ + u32 tp_port_alloc_max_tries; +} session_stats_segs_indicies_t; + typedef struct session_main_ { /** Worker contexts */ @@ -294,6 +299,7 @@ typedef struct session_main_ /** Query nat44-ed session to get original dst ip4 & dst port. */ nat44_original_dst_lookup_fn original_dst_lookup; + session_stats_segs_indicies_t stats_seg_idx; } session_main_t; extern session_main_t session_main; diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index f0043f90c9a..c6df47b412b 100644 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -1181,8 +1181,8 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp) ip_prefix_decode (&mp->lcl, &table_args->lcl); ip_prefix_decode (&mp->rmt, &table_args->rmt); - table_args->lcl_port = mp->lcl_port; - table_args->rmt_port = mp->rmt_port; + table_args->lcl_port = clib_net_to_host_u16 (mp->lcl_port); + table_args->rmt_port = clib_net_to_host_u16 (mp->rmt_port); table_args->action_index = clib_net_to_host_u32 (mp->action_index); table_args->is_add = mp->is_add; mp->tag[sizeof (mp->tag) - 1] = 0; @@ -1232,8 +1232,8 @@ send_session_rule_details4 (mma_rule_16_t * rule, u8 is_local, ip_prefix_encode (&lcl, &rmp->lcl); ip_prefix_encode (&rmt, &rmp->rmt); - rmp->lcl_port = match->lcl_port; - rmp->rmt_port = match->rmt_port; + rmp->lcl_port = clib_host_to_net_u16 (match->lcl_port); + rmp->rmt_port = clib_host_to_net_u16 (match->rmt_port); rmp->action_index = clib_host_to_net_u32 (rule->action_index); rmp->scope = is_local ? SESSION_RULE_SCOPE_API_LOCAL : SESSION_RULE_SCOPE_API_GLOBAL; @@ -1276,8 +1276,8 @@ send_session_rule_details6 (mma_rule_40_t * rule, u8 is_local, ip_prefix_encode (&lcl, &rmp->lcl); ip_prefix_encode (&rmt, &rmp->rmt); - rmp->lcl_port = match->lcl_port; - rmp->rmt_port = match->rmt_port; + rmp->lcl_port = clib_host_to_net_u16 (match->lcl_port); + rmp->rmt_port = clib_host_to_net_u16 (match->rmt_port); rmp->action_index = clib_host_to_net_u32 (rule->action_index); rmp->scope = is_local ? SESSION_RULE_SCOPE_API_LOCAL : SESSION_RULE_SCOPE_API_GLOBAL; @@ -1333,6 +1333,7 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp) vl_api_registration_t *reg; session_table_t *st; u8 tp; + u32 appns_index; reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) @@ -1344,8 +1345,183 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp) { session_rules_table_t *srt = srtg_handle_to_srt (st->srtg_handle, tp); + appns_index = *vec_elt_at_index ( + st->appns_index, + vec_len (st->appns_index) - 1); send_session_rules_table_details ( srt, st->active_fib_proto, tp, st->is_local, + appns_index, reg, mp->context); + } + })); +} + +/* + * session_rules_v2_dunp handler + */ +static void +send_session_rule_v2_details4 (mma_rule_16_t *rule, u8 is_local, + u8 transport_proto, u32 *appns_index, u8 *tag, + vl_api_registration_t *reg, u32 context) +{ + vl_api_session_rules_v2_details_t *rmp = 0; + session_mask_or_match_4_t *match = + (session_mask_or_match_4_t *) &rule->match; + session_mask_or_match_4_t *mask = (session_mask_or_match_4_t *) &rule->mask; + fib_prefix_t lcl, rmt; + u32 i, appns_index_count = vec_len (appns_index); + + rmp = vl_msg_api_alloc (sizeof (*rmp) + + appns_index_count * sizeof (*appns_index)); + if (!rmp) + return; + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (REPLY_MSG_ID_BASE + VL_API_SESSION_RULES_V2_DETAILS); + rmp->context = context; + + rmp->count = clib_host_to_net_u32 (appns_index_count); + vec_foreach_index (i, appns_index) + { + u32 index = *vec_elt_at_index (appns_index, i); + rmp->appns_index[i] = clib_host_to_net_u32 (index); + } + + clib_memset (&lcl, 0, sizeof (lcl)); + clib_memset (&rmt, 0, sizeof (rmt)); + ip_set (&lcl.fp_addr, &match->lcl_ip, 1); + ip_set (&rmt.fp_addr, &match->rmt_ip, 1); + lcl.fp_len = ip4_mask_to_preflen (&mask->lcl_ip); + rmt.fp_len = ip4_mask_to_preflen (&mask->rmt_ip); + lcl.fp_proto = FIB_PROTOCOL_IP4; + rmt.fp_proto = FIB_PROTOCOL_IP4; + + ip_prefix_encode (&lcl, &rmp->lcl); + ip_prefix_encode (&rmt, &rmp->rmt); + rmp->lcl_port = clib_host_to_net_u16 (match->lcl_port); + rmp->rmt_port = clib_host_to_net_u16 (match->rmt_port); + rmp->action_index = clib_host_to_net_u32 (rule->action_index); + rmp->scope = + is_local ? SESSION_RULE_SCOPE_API_LOCAL : SESSION_RULE_SCOPE_API_GLOBAL; + rmp->transport_proto = api_session_transport_proto_encode (transport_proto); + if (tag) + { + clib_memcpy_fast (rmp->tag, tag, vec_len (tag)); + rmp->tag[vec_len (tag)] = 0; + } + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +send_session_rule_v2_details6 (mma_rule_40_t *rule, u8 is_local, + u8 transport_proto, u32 *appns_index, u8 *tag, + vl_api_registration_t *reg, u32 context) +{ + vl_api_session_rules_v2_details_t *rmp = 0; + session_mask_or_match_6_t *match = + (session_mask_or_match_6_t *) &rule->match; + session_mask_or_match_6_t *mask = (session_mask_or_match_6_t *) &rule->mask; + fib_prefix_t lcl, rmt; + u32 i, appns_index_count = vec_len (appns_index); + + rmp = vl_msg_api_alloc (sizeof (*rmp) + + appns_index_count * sizeof (*appns_index)); + if (!rmp) + return; + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (REPLY_MSG_ID_BASE + VL_API_SESSION_RULES_V2_DETAILS); + rmp->context = context; + + rmp->count = clib_host_to_net_u32 (appns_index_count); + vec_foreach_index (i, appns_index) + { + u32 index = *vec_elt_at_index (appns_index, i); + rmp->appns_index[i] = clib_host_to_net_u32 (index); + } + + clib_memset (&lcl, 0, sizeof (lcl)); + clib_memset (&rmt, 0, sizeof (rmt)); + ip_set (&lcl.fp_addr, &match->lcl_ip, 0); + ip_set (&rmt.fp_addr, &match->rmt_ip, 0); + lcl.fp_len = ip6_mask_to_preflen (&mask->lcl_ip); + rmt.fp_len = ip6_mask_to_preflen (&mask->rmt_ip); + lcl.fp_proto = FIB_PROTOCOL_IP6; + rmt.fp_proto = FIB_PROTOCOL_IP6; + + ip_prefix_encode (&lcl, &rmp->lcl); + ip_prefix_encode (&rmt, &rmp->rmt); + rmp->lcl_port = clib_host_to_net_u16 (match->lcl_port); + rmp->rmt_port = clib_host_to_net_u16 (match->rmt_port); + rmp->action_index = clib_host_to_net_u32 (rule->action_index); + rmp->scope = + is_local ? SESSION_RULE_SCOPE_API_LOCAL : SESSION_RULE_SCOPE_API_GLOBAL; + rmp->transport_proto = api_session_transport_proto_encode (transport_proto); + if (tag) + { + clib_memcpy_fast (rmp->tag, tag, vec_len (tag)); + rmp->tag[vec_len (tag)] = 0; + } + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +send_session_rules_table_v2_details (session_rules_table_t *srt, u8 fib_proto, + u8 tp, u8 is_local, u32 *appns_index, + vl_api_registration_t *reg, u32 context) +{ + mma_rule_16_t *rule16; + mma_rule_40_t *rule40; + mma_rules_table_16_t *srt16; + mma_rules_table_40_t *srt40; + u32 ri; + + if (is_local || fib_proto == FIB_PROTOCOL_IP4) + { + u8 *tag = 0; + srt16 = &srt->session_rules_tables_16; + pool_foreach (rule16, srt16->rules) + { + ri = mma_rules_table_rule_index_16 (srt16, rule16); + tag = session_rules_table_rule_tag (srt, ri, 1); + send_session_rule_v2_details4 (rule16, is_local, tp, appns_index, tag, + reg, context); + } + } + if (is_local || fib_proto == FIB_PROTOCOL_IP6) + { + u8 *tag = 0; + srt40 = &srt->session_rules_tables_40; + pool_foreach (rule40, srt40->rules) + { + ri = mma_rules_table_rule_index_40 (srt40, rule40); + tag = session_rules_table_rule_tag (srt, ri, 1); + send_session_rule_v2_details6 (rule40, is_local, tp, appns_index, tag, + reg, context); + } + } +} + +static void +vl_api_session_rules_v2_dump_t_handler (vl_api_session_rules_dump_t *mp) +{ + vl_api_registration_t *reg; + session_table_t *st; + u8 tp; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + session_table_foreach (st, ({ + if (st->srtg_handle != SESSION_SRTG_HANDLE_INVALID) + for (tp = 0; tp < TRANSPORT_N_PROTOS; tp++) + { + session_rules_table_t *srt = + srtg_handle_to_srt (st->srtg_handle, tp); + send_session_rules_table_v2_details ( + srt, st->active_fib_proto, tp, st->is_local, st->appns_index, reg, mp->context); } })); @@ -1355,7 +1531,7 @@ typedef struct session_sdl_table_walk_ctx_ { vl_api_registration_t *reg; u32 mp_context; - u32 appns_index; + u32 *appns_index; } session_sdl_table_walk_ctx; static void @@ -1365,7 +1541,8 @@ send_session_sdl_v2_details (u32 fei, ip46_address_t *rmt_ip, u16 fp_len, { session_sdl_table_walk_ctx *ctx = args; vl_api_registration_t *reg = ctx->reg; - u32 appns_index = ctx->appns_index; + u32 appns_index = + *vec_elt_at_index (ctx->appns_index, vec_len (ctx->appns_index) - 1); u32 context = ctx->mp_context; vl_api_session_sdl_v2_details_t *rmp = 0; fib_prefix_t rmt; @@ -1414,7 +1591,6 @@ vl_api_session_sdl_v2_dump_t_handler (vl_api_session_sdl_v2_dump_t *mp) if (st->srtg_handle != SESSION_SRTG_HANDLE_INVALID) { ctx.appns_index = st->appns_index; - if (st->active_fib_proto == FIB_PROTOCOL_IP4) session_sdl_table_walk4 (st->srtg_handle, send_session_sdl_v2_details, &ctx); @@ -1426,12 +1602,90 @@ vl_api_session_sdl_v2_dump_t_handler (vl_api_session_sdl_v2_dump_t *mp) } static void +send_session_sdl_v3_details (u32 fei, ip46_address_t *rmt_ip, u16 fp_len, + u32 action_index, u32 fp_proto, u8 *tag, + void *args) +{ + session_sdl_table_walk_ctx *ctx = args; + vl_api_registration_t *reg = ctx->reg; + u32 context = ctx->mp_context; + vl_api_session_sdl_v3_details_t *rmp = 0; + fib_prefix_t rmt; + u32 appns_index_count, appns_index, i; + + appns_index_count = vec_len (ctx->appns_index); + rmp = vl_msg_api_alloc (sizeof (*rmp) + + appns_index_count * sizeof (appns_index)); + if (!rmp) + return; + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V3_DETAILS); + rmp->context = context; + + rmp->count = clib_host_to_net_u32 (appns_index_count); + vec_foreach_index (i, ctx->appns_index) + { + appns_index = *vec_elt_at_index (ctx->appns_index, i); + rmp->appns_index[i] = clib_host_to_net_u32 (appns_index); + } + + clib_memset (&rmt, 0, sizeof (rmt)); + if (fp_proto == FIB_PROTOCOL_IP4) + ip_set (&rmt.fp_addr, &rmt_ip->ip4, 1); + else + ip_set (&rmt.fp_addr, &rmt_ip->ip6, 0); + rmt.fp_len = fp_len; + rmt.fp_proto = fp_proto, + + ip_prefix_encode (&rmt, &rmp->rmt); + rmp->action_index = clib_host_to_net_u32 (action_index); + + if (tag) + { + clib_memcpy_fast (rmp->tag, tag, vec_len (tag)); + rmp->tag[vec_len (tag)] = 0; + } + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_session_sdl_v3_dump_t_handler (vl_api_session_sdl_v2_dump_t *mp) +{ + vl_api_registration_t *reg; + session_table_t *st; + session_sdl_table_walk_ctx ctx; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + ctx.reg = reg; + ctx.mp_context = mp->context; + + session_table_foreach ( + st, ({ + if (st->srtg_handle != SESSION_SRTG_HANDLE_INVALID) + { + ctx.appns_index = st->appns_index; + if (st->active_fib_proto == FIB_PROTOCOL_IP4) + session_sdl_table_walk4 (st->srtg_handle, + send_session_sdl_v3_details, &ctx); + else + session_sdl_table_walk6 (st->srtg_handle, + send_session_sdl_v3_details, &ctx); + } + })); +} + +static void send_session_sdl_details (u32 fei, ip46_address_t *lcl_ip, u16 fp_len, u32 action_index, u32 fp_proto, u8 *tag, void *args) { session_sdl_table_walk_ctx *ctx = args; vl_api_registration_t *reg = ctx->reg; - u32 appns_index = ctx->appns_index; + u32 appns_index = + *vec_elt_at_index (ctx->appns_index, vec_len (ctx->appns_index) - 1); u32 context = ctx->mp_context; vl_api_session_sdl_details_t *rmp = 0; fib_prefix_t lcl; @@ -1480,7 +1734,6 @@ vl_api_session_sdl_dump_t_handler (vl_api_session_sdl_dump_t *mp) if (st->srtg_handle != SESSION_SRTG_HANDLE_INVALID) { ctx.appns_index = st->appns_index; - if (st->active_fib_proto == FIB_PROTOCOL_IP4) session_sdl_table_walk4 (st->srtg_handle, send_session_sdl_details, &ctx); @@ -2209,6 +2462,10 @@ session_api_hookup (vlib_main_t *vm) am, REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V2_DUMP, 1); vl_api_set_msg_thread_safe ( am, REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V2_DETAILS, 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V3_DUMP, 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_SESSION_SDL_V3_DETAILS, 1); return 0; } diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c index c29a465d056..0ed2876469b 100644 --- a/src/vnet/session/session_cli.c +++ b/src/vnet/session/session_cli.c @@ -613,6 +613,11 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%U", format_transport_protos); goto done; } + else if (unformat (input, "transport")) + { + vlib_cli_output (vm, "%U", format_transport_state); + goto done; + } else if (unformat (input, "rt-backend")) { vlib_cli_output (vm, "%U", format_rt_backend, smm->rt_engine_type); @@ -799,7 +804,7 @@ VLIB_CLI_COMMAND (vlib_cli_show_session_command) = { .path = "show session", .short_help = "show session [protos][states][rt-backend][verbose [n]] " - "[events][listeners <proto>] " + "[transport][events][listeners <proto>] " "[<session-id>][thread <n> [[proto <p>] index <n>]][elog] " "[thread <n>][proto <proto>][state <state>][range <min> [<max>]] " "[lcl|rmt|ep <ip>[:<port>]][force-print]", @@ -1061,6 +1066,7 @@ clear_session_stats_fn (vlib_main_t *vm, unformat_input_t *input, { clib_memset (&wrk->stats, 0, sizeof (wrk->stats)); } + transport_clear_stats (); return 0; } diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c index 73b777127fd..01be281d4f7 100644 --- a/src/vnet/session/session_input.c +++ b/src/vnet/session/session_input.c @@ -217,9 +217,7 @@ app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index, s->connection_index, s->thread_index); session_free (s); /* Notify app that it has data on the new session */ - s = session_get_from_handle (evt->as_u64[1]); - session_send_io_evt_to_thread (s->rx_fifo, - SESSION_IO_EVT_BUILTIN_RX); + session_program_rx_io_evt (evt->as_u64[1]); break; case SESSION_CTRL_EVT_TRANSPORT_CLOSED: s = session_get (evt->session_index, thread_index); @@ -257,6 +255,9 @@ app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index, app->cb_fns.del_segment_callback (app_wrk->wrk_index, evt->as_u64[1]); break; + case SESSION_CTRL_EVT_RPC: + ((void (*) (session_t * s)) (evt->rpc_args.fp)) (evt->rpc_args.arg); + break; default: clib_warning ("unexpected event: %u", evt->event_type); ASSERT (0); diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 0d580ba35c6..3a99c0b5aaf 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -1383,7 +1383,7 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl, session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args) { - app_namespace_t *app_ns = app_namespace_get (args->appns_index); + app_namespace_t *app_ns = app_namespace_get_if_valid (args->appns_index); session_table_t *st; u32 fib_index; u8 fib_proto; @@ -1404,6 +1404,8 @@ vnet_session_rule_add_del (session_rule_add_del_args_t *args) fib_proto = args->table_args.rmt.fp_proto; fib_index = app_namespace_get_fib_index (app_ns, fib_proto); st = session_table_get_for_fib_index (fib_proto, fib_index); + if (!st) + return SESSION_E_INVALID; session_rules_table_init (st, fib_proto); if ((rv = session_rules_table_add_del ( st->srtg_handle, args->transport_proto, &args->table_args))) @@ -1455,7 +1457,7 @@ session_lookup_set_tables_appns (app_namespace_t * app_ns) st = session_table_get_or_alloc (fp, fib_index); if (st) { - st->appns_index = app_namespace_index (app_ns); + vec_add1 (st->appns_index, app_namespace_index (app_ns)); session_lookup_fib_table_lock (fib_index, fp); } } @@ -1540,7 +1542,6 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { u32 proto = ~0, lcl_port, rmt_port, action = 0, lcl_plen = 0, rmt_plen = 0; - clib_error_t *error = 0; u32 appns_index, scope = 0; ip46_address_t lcl_ip, rmt_ip; u8 is_ip4 = 1, conn_set = 0; @@ -1549,10 +1550,12 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, app_namespace_t *app_ns; int rv; - session_cli_return_if_not_enabled (); - if (session_rule_table_is_enabled () == 0) - return clib_error_return (0, "session rule table engine is not enabled"); + { + vlib_cli_output (vm, "session rule table engine is not enabled"); + unformat_skip_line (input); + goto done; + } clib_memset (&lcl_ip, 0, sizeof (lcl_ip)); clib_memset (&rmt_ip, 0, sizeof (rmt_ip)); @@ -1594,8 +1597,8 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, ; else { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + vlib_cli_output (vm, "unknown input `%U'", format_unformat_error, + input); goto done; } } @@ -1654,12 +1657,12 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, .scope = scope, }; if ((rv = vnet_session_rule_add_del (&args))) - error = clib_error_return (0, "rule add del returned %u", rv); + vlib_cli_output (vm, "rule add del returned %d", rv); done: vec_free (ns_id); vec_free (tag); - return error; + return 0; } VLIB_CLI_COMMAND (session_rule_command, static) = @@ -1944,23 +1947,30 @@ session_lookup_init (void) } void -session_lookup_table_cleanup (u32 fib_proto, u32 fib_index) +session_lookup_table_cleanup (u32 fib_proto, u32 fib_index, u32 ns_index) { session_table_t *st; - u32 table_index; + u32 table_index, appns_index; + int i; session_lookup_fib_table_unlock (fib_index, fib_proto); + table_index = session_lookup_get_index_for_fib (fib_proto, fib_index); + st = session_table_get (table_index); + if (st == 0) + return; if (fib_index_to_lock_count[fib_proto][fib_index] == 0) { - table_index = session_lookup_get_index_for_fib (fib_proto, fib_index); - st = session_table_get (table_index); - if (st) - { - session_table_free (st, fib_proto); - if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index) - fib_index_to_table_index[fib_proto][fib_index] = ~0; - } + session_table_free (st, fib_proto); + if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index) + fib_index_to_table_index[fib_proto][fib_index] = ~0; } + else + vec_foreach_index (i, st->appns_index) + { + appns_index = *vec_elt_at_index (st->appns_index, i); + if (ns_index == appns_index) + vec_del1 (st->appns_index, i); + } } /* diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index 14b8005d5d0..4d86d409e98 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -136,7 +136,14 @@ session_mq_listen_handler (session_worker_t *wrk, session_evt_elt_t *elt) a->sep_ext.transport_flags = mp->flags; if (mp->ext_config) - a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config); + { + transport_endpt_ext_cfg_t *ext_cfg = + session_mq_get_ext_config (app, mp->ext_config); + a->sep_ext.ext_cfgs.data = (u8 *) ext_cfg; + a->sep_ext.ext_cfgs.len = + ext_cfg->len + TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE; + a->sep_ext.ext_cfgs.tail_offset = a->sep_ext.ext_cfgs.len; + } if ((rv = vnet_listen (a))) session_worker_stat_error_inc (wrk, rv, 1); @@ -213,7 +220,14 @@ session_mq_connect_one (session_connect_msg_t *mp) a->wrk_map_index = mp->wrk_index; if (mp->ext_config) - a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config); + { + transport_endpt_ext_cfg_t *ext_cfg = + session_mq_get_ext_config (app, mp->ext_config); + a->sep_ext.ext_cfgs.data = (u8 *) ext_cfg; + a->sep_ext.ext_cfgs.len = + ext_cfg->len + TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE; + a->sep_ext.ext_cfgs.tail_offset = a->sep_ext.ext_cfgs.len; + } if ((rv = vnet_connect (a))) { diff --git a/src/vnet/session/session_sdl.c b/src/vnet/session/session_sdl.c index 9505ba1689f..f1dfac4e1ab 100644 --- a/src/vnet/session/session_sdl.c +++ b/src/vnet/session/session_sdl.c @@ -244,13 +244,17 @@ session_sdl_table_init (session_table_t *st, u8 fib_proto) session_sdl_block_t *sdlb; u8 all = fib_proto > FIB_PROTOCOL_IP6 ? 1 : 0; char name[80]; - app_namespace_t *app_ns = app_namespace_get (st->appns_index); + u32 appns_index; + app_namespace_t *app_ns; session_rules_table_group_t *srtg; /* Don't support local table */ if (st->is_local == 1) return; + appns_index = + *vec_elt_at_index (st->appns_index, vec_len (st->appns_index) - 1); + app_ns = app_namespace_get (appns_index); srtg = srtg_instance_alloc (st, 0); srt = srtg->session_rules; sdlb = &srt->sdl_block; @@ -460,18 +464,19 @@ session_sdl_command_fn (vlib_main_t *vm, unformat_input_t *input, u32 appns_index; app_namespace_t *app_ns; u32 rmt_plen = 0, action = 0; - clib_error_t *error = 0; ip46_address_t rmt_ip; u8 conn_set = 0; u8 fib_proto = -1, is_add = 1, *ns_id = 0; - u8 *tag = 0, tag_only = 0; + u8 *tag = 0; int rv; session_rule_add_del_args_t args; - session_cli_return_if_not_enabled (); - if (session_sdl_is_enabled () == 0) - return clib_error_return (0, "session sdl engine is not enabled"); + { + vlib_cli_output (vm, "session sdl engine is not enabled"); + unformat_skip_line (input); + goto done; + } while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -499,8 +504,8 @@ session_sdl_command_fn (vlib_main_t *vm, unformat_input_t *input, ; else { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + vlib_cli_output (vm, "unknown input `%U'", format_unformat_error, + input); goto done; } } @@ -536,13 +541,6 @@ session_sdl_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } - /* Delete with only tag entered. Try v4 first and then v6 if failed */ - if ((is_add == 0) && (fib_proto == (u8) ~0)) - { - fib_proto = FIB_PROTOCOL_IP4; - tag_only = 1; - } - memset (&args, 0, sizeof (args)); args.transport_proto = TRANSPORT_PROTO_TCP; args.table_args.rmt.fp_addr = rmt_ip; @@ -555,27 +553,12 @@ session_sdl_command_fn (vlib_main_t *vm, unformat_input_t *input, args.scope = SESSION_RULE_SCOPE_GLOBAL; if ((rv = vnet_session_rule_add_del (&args))) - { - /* Try tag only delete on v6 */ - if (rv && tag_only) - { - args.table_args.rmt.fp_proto = FIB_PROTOCOL_IP6; - args.table_args.lcl.fp_proto = FIB_PROTOCOL_IP6; - if ((rv = vnet_session_rule_add_del (&args))) - { - error = clib_error_return (0, "sdl add del returned %u", rv); - } - } - else - { - error = clib_error_return (0, "sdl add del returned %u", rv); - } - } + vlib_cli_output (vm, "sdl add del returned %d", rv); done: vec_free (ns_id); vec_free (tag); - return error; + return 0; } VLIB_CLI_COMMAND (session_sdl_command, static) = { diff --git a/src/vnet/session/session_table.c b/src/vnet/session/session_table.c index 5dafe0e633c..f3ec1f90bf6 100644 --- a/src/vnet/session/session_table.c +++ b/src/vnet/session/session_table.c @@ -79,6 +79,7 @@ session_table_free (session_table_t *slt, u8 fib_proto) clib_bihash_free_48_8 (&slt->v6_half_open_hash); } + vec_free (slt->appns_index); pool_put (lookup_tables, slt); } @@ -222,7 +223,17 @@ u8 * format_session_table (u8 *s, va_list *args) { session_table_t *st = va_arg (*args, session_table_t *); + u32 appns_index, i; + s = format (s, "appns index: "); + vec_foreach_index (i, st->appns_index) + { + appns_index = *vec_elt_at_index (st->appns_index, i); + if (i > 0) + s = format (s, ", "); + s = format (s, "%d", appns_index); + } + s = format (s, "\n"); if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash)) { s = format (s, "%U", format_bihash_16_8, &st->v4_session_hash, 0); diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h index aae4a1c2af5..126e849beae 100644 --- a/src/vnet/session/session_table.h +++ b/src/vnet/session/session_table.h @@ -42,7 +42,7 @@ typedef struct _session_lookup_table u8 is_local; /** Namespace this table belongs to */ - u32 appns_index; + u32 *appns_index; /** For global tables only one fib proto is active. This is a * byproduct of fib table ids not necessarily being the same for @@ -77,7 +77,7 @@ session_table_t *_get_session_tables (); #define session_table_foreach(VAR, BODY) \ pool_foreach (VAR, _get_session_tables ()) BODY -void session_lookup_table_cleanup (u32 fib_proto, u32 fib_index); +void session_lookup_table_cleanup (u32 fib_proto, u32 fib_index, u32 ns_index); #endif /* SRC_VNET_SESSION_SESSION_TABLE_H_ */ /* diff --git a/src/vnet/session/session_test.c b/src/vnet/session/session_test.c index e248dd7010c..14f17761e75 100644 --- a/src/vnet/session/session_test.c +++ b/src/vnet/session/session_test.c @@ -382,6 +382,11 @@ vl_api_session_sdl_v2_details_t_handler (vl_api_session_sdl_v2_details_t *mp) { } +static void +vl_api_session_sdl_v3_details_t_handler (vl_api_session_sdl_v3_details_t *mp) +{ +} + static int api_session_sdl_dump (vat_main_t *vam) { @@ -394,6 +399,24 @@ api_session_sdl_v2_dump (vat_main_t *vam) return -1; } +static int +api_session_sdl_v3_dump (vat_main_t *vam) +{ + return -1; +} + +static void +vl_api_session_rules_v2_details_t_handler ( + vl_api_session_rules_v2_details_t *mp) +{ +} + +static int +api_session_rules_v2_dump (vat_main_t *vam) +{ + return -1; +} + #include <vnet/session/session.api_test.c> /* diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h index 5e650727d61..935f8f189ee 100644 --- a/src/vnet/session/session_types.h +++ b/src/vnet/session/session_types.h @@ -77,7 +77,7 @@ typedef struct _session_endpoint_cfg u8 original_tp; u64 parent_handle; session_endpoint_cfg_flags_t flags; - transport_endpt_ext_cfg_t *ext_cfg; + transport_endpt_ext_cfgs_t ext_cfgs; } session_endpoint_cfg_t; #define SESSION_IP46_ZERO \ @@ -112,7 +112,8 @@ typedef struct _session_endpoint_cfg .peer = TRANSPORT_ENDPOINT_NULL, .transport_proto = 0, \ .app_wrk_index = ENDPOINT_INVALID_INDEX, \ .opaque = ENDPOINT_INVALID_INDEX, \ - .parent_handle = SESSION_INVALID_HANDLE, .ext_cfg = 0, \ + .parent_handle = SESSION_INVALID_HANDLE, \ + .ext_cfgs = TRANSPORT_ENDPT_EXT_CFGS_NULL, \ } #define session_endpoint_to_transport(_sep) ((transport_endpoint_t *)_sep) @@ -288,7 +289,7 @@ session_get_fib_proto (session_t * s) always_inline u8 session_has_transport (session_t * s) { - return (session_get_transport_proto (s) != TRANSPORT_PROTO_NONE); + return (session_get_transport_proto (s) != TRANSPORT_PROTO_CT); } static inline transport_service_type_t diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index 1c2a9261d3c..e8c9490decb 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -35,6 +35,7 @@ typedef struct transport_main_ local_endpoint_t *local_endpoints; u32 *lcl_endpts_freelist; u32 port_allocator_seed; + u16 port_alloc_max_tries; u16 port_allocator_min_src_port; u16 port_allocator_max_src_port; u8 lcl_endpts_cleanup_pending; @@ -212,14 +213,39 @@ unformat_transport_proto (unformat_input_t * input, va_list * args) u8 * format_transport_protos (u8 * s, va_list * args) { + u32 indent = format_get_indent (s) + 1; transport_proto_vft_t *tp_vft; vec_foreach (tp_vft, tp_vfts) - s = format (s, "%s\n", tp_vft->transport_options.name); + if (tp_vft->transport_options.name) + s = format (s, "%U%s\n", format_white_space, indent, + tp_vft->transport_options.name); return s; } +u8 * +format_transport_state (u8 *s, va_list *args) +{ + transport_main_t *tm = &tp_main; + + s = format (s, "registered protos:\n%U", format_transport_protos); + + s = format (s, "configs:\n"); + s = + format (s, " min_lcl_port: %u max_lcl_port: %u\n", + tm->port_allocator_min_src_port, tm->port_allocator_max_src_port); + + s = format (s, "state:\n"); + s = format (s, " lcl ports alloced: %u\n lcl ports freelist: %u \n", + pool_elts (tm->local_endpoints), + vec_len (tm->lcl_endpts_freelist)); + s = + format (s, " port_alloc_max_tries: %u\n lcl_endpts_cleanup_pending: %u\n", + tm->port_alloc_max_tries, tm->lcl_endpts_cleanup_pending); + return s; +} + u32 transport_endpoint_lookup (transport_endpoint_table_t * ht, u8 proto, ip46_address_t * ip, u16 port) @@ -606,7 +632,7 @@ transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr, transport_main_t *tm = &tp_main; u16 min = tm->port_allocator_min_src_port; u16 max = tm->port_allocator_max_src_port; - int tries, limit; + int tries, limit, port = -1; limit = max - min; @@ -616,8 +642,6 @@ transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr, /* Search for first free slot */ for (tries = 0; tries < limit; tries++) { - u16 port = 0; - /* Find a port in the specified range */ while (1) { @@ -630,7 +654,7 @@ transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr, } if (!transport_endpoint_mark_used (proto, lcl_addr, port)) - return port; + break; /* IP:port pair already in use, check if 6-tuple available */ if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port, @@ -640,9 +664,26 @@ transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr, /* 6-tuple is available so increment lcl endpoint refcount */ transport_share_local_endpoint (proto, lcl_addr, port); - return port; + break; } - return -1; + + tm->port_alloc_max_tries = clib_max (tm->port_alloc_max_tries, tries); + + return port; +} + +u16 +transport_port_alloc_max_tries () +{ + transport_main_t *tm = &tp_main; + return tm->port_alloc_max_tries; +} + +void +transport_clear_stats () +{ + transport_main_t *tm = &tp_main; + tm->port_alloc_max_tries = 0; } static session_error_t diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index e6ba1ecbc5f..289bf471af0 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -252,6 +252,8 @@ void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port); int transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port); +u16 transport_port_alloc_max_tries (); +void transport_clear_stats (); void transport_enable_disable (vlib_main_t * vm, u8 is_en); void transport_init (void); diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h index b3469fa9fdb..4a2f861814f 100644 --- a/src/vnet/session/transport_types.h +++ b/src/vnet/session/transport_types.h @@ -171,7 +171,7 @@ STATIC_ASSERT (sizeof (transport_connection_t) <= 128, #define foreach_transport_proto \ _ (TCP, "tcp", "T") \ _ (UDP, "udp", "U") \ - _ (NONE, "ct", "C") \ + _ (CT, "ct", "C") \ _ (TLS, "tls", "J") \ _ (QUIC, "quic", "Q") \ _ (DTLS, "dtls", "D") \ @@ -185,6 +185,8 @@ typedef enum _transport_proto #undef _ } transport_proto_t; +#define TRANSPORT_PROTO_NONE TRANSPORT_PROTO_CT + u8 *format_transport_proto (u8 * s, va_list * args); u8 *format_transport_proto_short (u8 * s, va_list * args); u8 *format_transport_flags (u8 *s, va_list *args); @@ -194,6 +196,7 @@ u8 *format_transport_half_open_connection (u8 * s, va_list * args); uword unformat_transport_proto (unformat_input_t * input, va_list * args); u8 *format_transport_protos (u8 * s, va_list * args); +u8 *format_transport_state (u8 *s, va_list *args); #define foreach_transport_endpoint_fields \ _(ip46_address_t, ip) /**< ip address in net order */ \ @@ -257,7 +260,8 @@ typedef enum transport_endpt_attr_flag_ _ (u64, next_output_node, NEXT_OUTPUT_NODE) \ _ (u16, mss, MSS) \ _ (u8, flags, FLAGS) \ - _ (u8, cc_algo, CC_ALGO) + _ (u8, cc_algo, CC_ALGO) \ + _ (transport_endpoint_t, ext_endpt, EXT_ENDPT) typedef enum transport_endpt_attr_type_ { @@ -281,6 +285,7 @@ typedef enum transport_endpt_ext_cfg_type_ { TRANSPORT_ENDPT_EXT_CFG_NONE, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + TRANSPORT_ENDPT_EXT_CFG_HTTP, } transport_endpt_ext_cfg_type_t; typedef struct transport_endpt_crypto_cfg_ @@ -297,10 +302,27 @@ typedef struct transport_endpt_ext_cfg_ union { transport_endpt_crypto_cfg_t crypto; + u32 opaque; /**< For general use */ u8 data[0]; }; } transport_endpt_ext_cfg_t; +#define TRANSPORT_ENDPT_EXT_CFG_HEADER_SIZE 4 + +typedef struct transport_endpt_ext_cfgs_ +{ + u32 len; /**< length of config data chunk */ + u32 tail_offset; /**< current tail in config data chunk */ + u8 *data; /**< start of config data chunk */ +} transport_endpt_ext_cfgs_t; + +#define TRANSPORT_ENDPT_EXT_CFGS_CHUNK_SIZE 512 + +#define TRANSPORT_ENDPT_EXT_CFGS_NULL \ + { \ + .len = 0, .tail_offset = 0, .data = 0, \ + } + typedef clib_bihash_24_8_t transport_endpoint_table_t; #define ENDPOINT_INVALID_INDEX ((u32)~0) diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h index c2867eb7508..40edbbaf437 100644 --- a/src/vnet/srv6/sr.h +++ b/src/vnet/srv6/sr.h @@ -43,9 +43,22 @@ #define SR_BEHAVIOR_DX4 7 #define SR_BEHAVIOR_DT6 8 #define SR_BEHAVIOR_DT4 9 -#define SR_BEHAVIOR_END_UN_PERF 10 -#define SR_BEHAVIOR_END_UN 11 -#define SR_BEHAVIOR_LAST 12 /* Must always be the last one */ +/** + * SR_BEHAVIOR_LAST + * Not used anymore. Kept not to break the API. + * We use SR_BEHAVIOR_CURRENT_LAST going forward + * */ +#define SR_BEHAVIOR_LAST 10 +#define SR_BEHAVIOR_END_UN_PERF 11 +#define SR_BEHAVIOR_END_UN 12 +#define SR_BEHAVIOR_UA 13 + +/** + * SR_BEHAVIOR_CURRENT_LAST + * MUST be updated everytime we add new behaviors. + * MUST be set to value of last added behavior + 1. + * */ +#define SR_BEHAVIOR_CURRENT_LAST 14 #define SR_STEER_L2 2 #define SR_STEER_IPV4 4 @@ -164,7 +177,8 @@ typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid); */ typedef struct { - u16 sr_localsid_function_number; /**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */ + u16 sr_localsid_function_number; /**< SR LocalSID plugin function + (>SR_BEHAVIOR_CURRENT_LAST) */ u8 *function_name; /**< Function name. (key). */ diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c index a44c3098112..e546e1db0e7 100644 --- a/src/vnet/srv6/sr_api.c +++ b/src/vnet/srv6/sr_api.c @@ -39,6 +39,8 @@ static void vl_api_sr_localsid_add_del_t_handler { vl_api_sr_localsid_add_del_reply_t *rmp; int rv = 0; + int usid_len = 0; + u16 localsid_prefix_len = 128; ip46_address_t prefix; ip6_address_t localsid; /* @@ -46,21 +48,31 @@ static void vl_api_sr_localsid_add_del_t_handler * char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table, * ip46_address_t *nh_addr, void *ls_plugin_mem) */ - if (mp->behavior == SR_BEHAVIOR_X || - mp->behavior == SR_BEHAVIOR_DX6 || - mp->behavior == SR_BEHAVIOR_DX4 || mp->behavior == SR_BEHAVIOR_DX2) + if (mp->behavior == SR_BEHAVIOR_X || mp->behavior == SR_BEHAVIOR_UA || + mp->behavior == SR_BEHAVIOR_DX6 || mp->behavior == SR_BEHAVIOR_DX4 || + mp->behavior == SR_BEHAVIOR_DX2) VALIDATE_SW_IF_INDEX (mp); + if (mp->behavior == SR_BEHAVIOR_END_UN_PERF || + mp->behavior == SR_BEHAVIOR_END_UN) + { + usid_len = 16; + localsid_prefix_len = 48; + } + + if (mp->behavior == SR_BEHAVIOR_UA) + { + usid_len = 16; + localsid_prefix_len = 64; + } + ip6_address_decode (mp->localsid, &localsid); ip_address_decode (&mp->nh_addr, &prefix); - rv = sr_cli_localsid (mp->is_del, - &localsid, 128, - mp->end_psp, - mp->behavior, - ntohl (mp->sw_if_index), - ntohl (mp->vlan_index), - ntohl (mp->fib_table), &prefix, 0, NULL); + rv = sr_cli_localsid (mp->is_del, &localsid, localsid_prefix_len, + mp->end_psp, mp->behavior, ntohl (mp->sw_if_index), + ntohl (mp->vlan_index), ntohl (mp->fib_table), &prefix, + usid_len, NULL); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY); diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c index 62b1a271576..2172fa10ef1 100644 --- a/src/vnet/srv6/sr_localsid.c +++ b/src/vnet/srv6/sr_localsid.c @@ -100,10 +100,10 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, { /* Retrieve localsid */ ls = pool_elt_at_index (sm->localsids, p[0]); - if (ls->behavior >= SR_BEHAVIOR_LAST) + if (ls->behavior >= SR_BEHAVIOR_CURRENT_LAST) { - plugin = pool_elt_at_index (sm->plugin_functions, - ls->behavior - SR_BEHAVIOR_LAST); + plugin = pool_elt_at_index ( + sm->plugin_functions, ls->behavior - SR_BEHAVIOR_CURRENT_LAST); pref_length = plugin->prefix_length; } @@ -130,7 +130,7 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, || ls->behavior == SR_BEHAVIOR_DX4) adj_unlock (ls->nh_adj); - if (ls->behavior >= SR_BEHAVIOR_LAST) + if (ls->behavior >= SR_BEHAVIOR_CURRENT_LAST) { /* Callback plugin removal function */ rv = plugin->removal (ls); @@ -149,13 +149,13 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, if (is_del) return -2; - if (behavior >= SR_BEHAVIOR_LAST) - { - sr_localsid_fn_registration_t *plugin = 0; - plugin = - pool_elt_at_index (sm->plugin_functions, behavior - SR_BEHAVIOR_LAST); - pref_length = plugin->prefix_length; - } + if (behavior >= SR_BEHAVIOR_CURRENT_LAST) + { + sr_localsid_fn_registration_t *plugin = 0; + plugin = pool_elt_at_index (sm->plugin_functions, + behavior - SR_BEHAVIOR_CURRENT_LAST); + pref_length = plugin->prefix_length; + } if (localsid_prefix_len != 0) { @@ -213,6 +213,23 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, ls->usid_next_len = 16 - ls->usid_next_index; } break; + case SR_BEHAVIOR_UA: + if (usid_len) + { + int usid_width; + clib_memcpy (&ls->usid_block, localsid_addr, sizeof (ip6_address_t)); + + usid_width = pref_length - usid_len; + ip6_address_mask_from_width (&ls->usid_block_mask, usid_width); + + ls->usid_index = usid_width / 8; + ls->usid_len = usid_len / 8; + ls->usid_next_index = ls->usid_index + ls->usid_len; + ls->usid_next_len = 16 - ls->usid_next_index; + } + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); + break; case SR_BEHAVIOR_X: ls->sw_if_index = sw_if_index; clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); @@ -241,13 +258,14 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, } /* Figure out the adjacency magic for Xconnect variants */ - if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4 - || ls->behavior == SR_BEHAVIOR_DX6) + if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_UA || + ls->behavior == SR_BEHAVIOR_DX4 || ls->behavior == SR_BEHAVIOR_DX6) { adj_index_t nh_adj_index = ADJ_INDEX_INVALID; /* Retrieve the adjacency corresponding to the (OIF, next_hop) */ - if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X) + if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_UA || + ls->behavior == SR_BEHAVIOR_X) nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, nh_addr, sw_if_index); @@ -272,17 +290,18 @@ sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, else if (ls->behavior == SR_BEHAVIOR_END_UN) dpo_set (&dpo, sr_localsid_un_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); - else if (ls->behavior == SR_BEHAVIOR_END_UN_PERF) + else if (ls->behavior == SR_BEHAVIOR_END_UN_PERF || + ls->behavior == SR_BEHAVIOR_UA) dpo_set (&dpo, sr_localsid_un_perf_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); - else if (ls->behavior > SR_BEHAVIOR_D_FIRST - && ls->behavior < SR_BEHAVIOR_LAST) + else if (ls->behavior > SR_BEHAVIOR_D_FIRST && + ls->behavior < SR_BEHAVIOR_CURRENT_LAST) dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); - else if (ls->behavior >= SR_BEHAVIOR_LAST) + else if (ls->behavior >= SR_BEHAVIOR_CURRENT_LAST) { sr_localsid_fn_registration_t *plugin = 0; plugin = pool_elt_at_index (sm->plugin_functions, - ls->behavior - SR_BEHAVIOR_LAST); + ls->behavior - SR_BEHAVIOR_CURRENT_LAST); /* Copy the unformat memory result */ ls->plugin_mem = ls_plugin_mem; /* Callback plugin creation function */ @@ -389,6 +408,11 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, behavior = SR_BEHAVIOR_END_UN_PERF; else if (unformat (input, "un.flex %u", &usid_size)) behavior = SR_BEHAVIOR_END_UN; + else if (unformat (input, "ua %u %U %U", &usid_size, + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &next_hop.ip6)) + behavior = SR_BEHAVIOR_UA; + else { /* Loop over all the plugin behavior format functions */ @@ -463,7 +487,7 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, behavior, sw_if_index, vlan_index, fib_index, &next_hop, usid_size, ls_plugin_mem); - if (behavior == SR_BEHAVIOR_END_UN_PERF) + if (behavior == SR_BEHAVIOR_END_UN_PERF || behavior == SR_BEHAVIOR_UA) { if (rv == 0) { @@ -507,17 +531,19 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, VLIB_CLI_COMMAND (sr_localsid_command, static) = { .path = "sr localsid", .short_help = "sr localsid (del) address XX:XX::YY:YY" - "(fib-table 8) behavior STRING", + "(fib-table 8) behavior STRING", .long_help = "Create SR LocalSID and binds it to a particular behavior\n" "Arguments:\n" "\tlocalSID IPv6_addr(128b) LocalSID IPv6 address\n" - "\t(fib-table X) Optional. VRF where to install SRv6 localsid\n" + "\t(fib-table X) Optional. VRF where to install SRv6 " + "localsid\n" "\tbehavior STRING Specifies the behavior\n" "\n\tBehaviors:\n" "\tEnd\t-> Endpoint.\n" "\tEnd.uN\t-> Endpoint with uSID.\n" - "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" + "\tuA\t-> Endpoint with uSID and Layer-3 cross-connect.\n" + "\tEnd.X\t-> Endpoint with Layer-3 cross-connect.\n" "\t\tParameters: '<iface> <ip6_next_hop>'\n" "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" "\t\tParameters: '<iface>'\n" @@ -525,9 +551,11 @@ VLIB_CLI_COMMAND (sr_localsid_command, static) = { "\t\tParameters: '<iface> <ip6_next_hop>'\n" "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" "\t\tParameters: '<iface> <ip4_next_hop>'\n" - "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table " + "lookup.\n" "\t\tParameters: '<ip6_fib_table>'\n" - "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table " + "lookup.\n" "\t\tParameters: '<ip4_fib_table>'\n", .function = sr_cli_localsid_command_fn, }; @@ -554,22 +582,30 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, switch (ls->behavior) { case SR_BEHAVIOR_END: - vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd", - format_ip6_address, &ls->localsid); + vlib_cli_output (vm, "\tAddress: \t%U/%u\n\tBehavior: \tEnd", + format_ip6_address, &ls->localsid, + ls->localsid_prefix_len); break; case SR_BEHAVIOR_END_UN: - vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tEnd (flex) [uSID:\t%U/%d, length: %d]", + vlib_cli_output (vm, "\tAddress: \t%U/%u\n\tBehavior: \tuN (flex)", format_ip6_address, &ls->localsid, - format_ip6_address, &ls->usid_block, - ls->usid_index * 8, ls->usid_len * 8); + ls->localsid_prefix_len); break; case SR_BEHAVIOR_END_UN_PERF: + vlib_cli_output ( + vm, "\tAddress: \t%U/%u\n\tBehavior: \tuN [End with uSID]", + format_ip6_address, &ls->localsid, ls->localsid_prefix_len, + ls->usid_len * 8); + break; + case SR_BEHAVIOR_UA: vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tEnd [uSID:\t%U/%d, length: %d]", + "\tAddress: \t%U/%u\n\tBehavior: \tuA [End with " + "uSID and Layer-3 cross-connect]" + "\n\tIface: \t%U\n\tNext hop: \t%U", format_ip6_address, &ls->localsid, - format_ip6_address, &ls->usid_block, - ls->usid_index * 8, ls->usid_len * 8); + ls->localsid_prefix_len, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip6_address, &ls->next_hop.ip6); break; case SR_BEHAVIOR_X: vlib_cli_output (vm, @@ -636,11 +672,10 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, FIB_PROTOCOL_IP4)); break; default: - if (ls->behavior >= SR_BEHAVIOR_LAST) + if (ls->behavior >= SR_BEHAVIOR_CURRENT_LAST) { - sr_localsid_fn_registration_t *plugin = - pool_elt_at_index (sm->plugin_functions, - ls->behavior - SR_BEHAVIOR_LAST); + sr_localsid_fn_registration_t *plugin = pool_elt_at_index ( + sm->plugin_functions, ls->behavior - SR_BEHAVIOR_CURRENT_LAST); vlib_cli_output (vm, "\tAddress: \t%U/%u\n" "\tBehavior: \t%s (%s)\n\t%U", @@ -781,6 +816,9 @@ format_sr_localsid_trace (u8 * s, va_list * args) case SR_BEHAVIOR_DX4: s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n"); break; + case SR_BEHAVIOR_UA: + s = format (s, "\tBehavior: uSID and IPv6 L3 xconnect\n"); + break; case SR_BEHAVIOR_X: s = format (s, "\tBehavior: IPv6 L3 xconnect\n"); break; @@ -1031,7 +1069,8 @@ end_un_srh_processing (vlib_node_runtime_t * node, } static_always_inline void -end_un_processing (ip6_header_t * ip0, ip6_sr_localsid_t * ls0) +end_un_processing (vlib_node_runtime_t *node, vlib_buffer_t *b0, + ip6_header_t *ip0, ip6_sr_localsid_t *ls0, u32 *next0) { u8 next_usid_index; u8 index; @@ -1052,6 +1091,11 @@ end_un_processing (ip6_header_t * ip0, ip6_sr_localsid_t * ls0) { ip0->dst_address.as_u8[index] = 0; } + if (ls0->behavior == SR_BEHAVIOR_UA) + { + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + } return; } @@ -2141,10 +2185,10 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node, pool_elt_at_index (sm->localsids, vnet_buffer (b3)->ip.adj_index[VLIB_TX]); - end_un_processing (ip0, ls0); - end_un_processing (ip1, ls1); - end_un_processing (ip2, ls2); - end_un_processing (ip3, ls3); + end_un_processing (node, b0, ip0, ls0, &next0); + end_un_processing (node, b1, ip1, ls1, &next1); + end_un_processing (node, b2, ip2, ls2, &next2); + end_un_processing (node, b3, ip3, ls3, &next3); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -2232,7 +2276,7 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b0)->ip.adj_index[VLIB_TX]); /* SRH processing */ - end_un_processing (ip0, ls0); + end_un_processing (node, b0, ip0, ls0, &next0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -2359,7 +2403,7 @@ sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, clib_memset (plugin, 0, sizeof (*plugin)); plugin->sr_localsid_function_number = (plugin - sm->plugin_functions); - plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST; + plugin->sr_localsid_function_number += SR_BEHAVIOR_CURRENT_LAST; plugin->prefix_length = prefix_length; plugin->ls_format = ls_format; plugin->ls_unformat = ls_unformat; @@ -2394,22 +2438,28 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm, { vec_add1 (plugins_vec, plugin); } /* Print static behaviors */ - vlib_cli_output (vm, "Default behaviors:\n" - "\tEnd\t-> Endpoint.\n" - "\tEnd.X\t-> Endpoint with Layer-3 cross-connect.\n" - "\t\tParameters: '<iface> <ip6_next_hop>'\n" - "\tEnd.T\t-> Endpoint with specific IPv6 table lookup.\n" - "\t\tParameters: '<fib_table>'\n" - "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" - "\t\tParameters: '<iface>'\n" - "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" - "\t\tParameters: '<iface> <ip6_next_hop>'\n" - "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" - "\t\tParameters: '<iface> <ip4_next_hop>'\n" - "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" - "\t\tParameters: '<ip6_fib_table>'\n" - "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" - "\t\tParameters: '<ip4_fib_table>'\n"); + vlib_cli_output ( + vm, + "Default behaviors:\n" + "\tEnd\t-> Endpoint.\n" + "\tEnd.X\t-> Endpoint with Layer-3 cross-connect.\n" + "\tuN\t-> Endpoint with uSID.\n" + "\tuA\t-> Endpoint with uSID and Layer-3 cross-connect.\n" + "\t\tParameters: '<iface> <ip6_next_hop>'\n" + "\tEnd.T\t-> Endpoint with specific IPv6 table lookup.\n" + "\t\tParameters: '<fib_table>'\n" + "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" + "\t\tParameters: '<iface>'\n" + "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" + "\t\tParameters: '<iface> <ip6_next_hop>'\n" + "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" + "\t\tParameters: '<iface> <ip4_next_hop>'\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table " + "lookup.\n" + "\t\tParameters: '<ip6_fib_table>'\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table " + "lookup.\n" + "\t\tParameters: '<ip4_fib_table>'\n"); vlib_cli_output (vm, "Plugin behaviors:\n"); for (i = 0; i < vec_len (plugins_vec); i++) { diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c index 0aa88cc273e..a9114628f95 100644 --- a/src/vnet/srv6/sr_policy_rewrite.c +++ b/src/vnet/srv6/sr_policy_rewrite.c @@ -418,7 +418,7 @@ create_sl (ip6_sr_policy_t *sr_policy, ip6_address_t *sl, { plugin = pool_elt_at_index (sm->policy_plugin_functions, - sr_policy->plugin - SR_BEHAVIOR_LAST); + sr_policy->plugin - SR_BEHAVIOR_CURRENT_LAST); segment_list->plugin = sr_policy->plugin; segment_list->plugin_mem = sr_policy->plugin_mem; @@ -828,7 +828,7 @@ sr_policy_del (ip6_address_t * bsid, u32 index) plugin = pool_elt_at_index (sm->policy_plugin_functions, - sr_policy->plugin - SR_BEHAVIOR_LAST); + sr_policy->plugin - SR_BEHAVIOR_CURRENT_LAST); plugin->removal (sr_policy); sr_policy->plugin = 0; @@ -3499,7 +3499,7 @@ sr_policy_register_function (vlib_main_t * vm, u8 * fn_name, clib_memset (plugin, 0, sizeof (*plugin)); plugin->sr_policy_function_number = (plugin - sm->policy_plugin_functions); - plugin->sr_policy_function_number += SR_BEHAVIOR_LAST; + plugin->sr_policy_function_number += SR_BEHAVIOR_CURRENT_LAST; plugin->prefix_length = prefix_length; plugin->ls_format = ls_format; plugin->ls_unformat = ls_unformat; diff --git a/src/vnet/srv6/sr_types.api b/src/vnet/srv6/sr_types.api index 967eab0bd5a..7bc22c1a0f4 100644 --- a/src/vnet/srv6/sr_types.api +++ b/src/vnet/srv6/sr_types.api @@ -35,7 +35,10 @@ enum sr_behavior : u8 SR_BEHAVIOR_API_DX4 = 7, SR_BEHAVIOR_API_DT6 = 8, SR_BEHAVIOR_API_DT4 = 9, - SR_BEHAVIOR_API_LAST = 10, /* Must always be the last one */ + SR_BEHAVIOR_API_LAST = 10, /* Not used. Kept not to break the API */ + SR_BEHAVIOR_API_END_UN_PERF = 11 [backwards_compatible], + SR_BEHAVIOR_API_END_UN = 12 [backwards_compatible], + SR_BEHAVIOR_API_UA = 13 [backwards_compatible], }; enum sr_steer : u8 diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 70b5d28e0cc..cd3e4b7700c 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -2551,7 +2551,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node, tcp_connection_t *tc; tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index, thread_index); - if (tc->state != TCP_STATE_TIME_WAIT) + if (!tc || tc->state != TCP_STATE_TIME_WAIT) { tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1); goto done; diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c index 8754b67ca31..12dcbb449e8 100644 --- a/src/vnet/tls/tls.c +++ b/src/vnet/tls/tls.c @@ -628,16 +628,18 @@ tls_connect (transport_endpoint_cfg_t * tep) application_t *app; tls_ctx_t *ctx; u32 ctx_index; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return SESSION_E_NOEXTCFG; app_wrk = app_worker_get (sep->app_wrk_index); app = application_get (app_wrk->app_index); - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; engine_type = tls_get_engine_type (ccfg->crypto_engine, app->tls_engine); if (engine_type == CRYPTO_ENGINE_NONE) { @@ -709,16 +711,18 @@ tls_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) app_listener_t *al; tls_ctx_t *lctx; u32 lctx_index; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return SESSION_E_NOEXTCFG; app_wrk = app_worker_get (sep->app_wrk_index); app = application_get (app_wrk->app_index); - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; engine_type = tls_get_engine_type (ccfg->crypto_engine, app->tls_engine); if (engine_type == CRYPTO_ENGINE_NONE) { @@ -1115,16 +1119,18 @@ dtls_connect (transport_endpoint_cfg_t *tep) application_t *app; tls_ctx_t *ctx; u32 ctx_handle; + transport_endpt_ext_cfg_t *ext_cfg; int rv; sep = (session_endpoint_cfg_t *) tep; - if (!sep->ext_cfg) + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (!ext_cfg) return -1; app_wrk = app_worker_get (sep->app_wrk_index); app = application_get (app_wrk->app_index); - ccfg = &sep->ext_cfg->crypto; + ccfg = &ext_cfg->crypto; engine_type = tls_get_engine_type (ccfg->crypto_engine, app->tls_engine); if (engine_type == CRYPTO_ENGINE_NONE) { diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c index 8deeb9e41ee..1fc055f8d50 100644 --- a/src/vnet/udp/udp.c +++ b/src/vnet/udp/udp.c @@ -467,16 +467,7 @@ udp_open_connection (transport_endpoint_cfg_t * rmt) uc->mss = rmt->mss ? rmt->mss : udp_default_mtu (um, uc->c_is_ip4); if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX) uc->sw_if_index = rmt->peer.sw_if_index; - uc->flags |= UDP_CONN_F_OWNS_PORT; - if (rmt->transport_flags & TRANSPORT_CFG_F_CONNECTED) - { - uc->flags |= UDP_CONN_F_CONNECTED; - } - else - { - clib_spinlock_init (&uc->rx_lock); - uc->c_flags |= TRANSPORT_CONNECTION_F_CLESS; - } + uc->flags |= UDP_CONN_F_OWNS_PORT | UDP_CONN_F_CONNECTED; if (!um->csum_offload) uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD; uc->next_node_index = rmt->next_node_index; diff --git a/src/vnet/udp/udp_local.h b/src/vnet/udp/udp_local.h index 16286824ef2..06c7b3f1758 100644 --- a/src/vnet/udp/udp_local.h +++ b/src/vnet/udp/udp_local.h @@ -18,42 +18,43 @@ #include <vnet/vnet.h> -#define foreach_udp4_dst_port \ -_ (53, dns) \ -_ (67, dhcp_to_server) \ -_ (68, dhcp_to_client) \ -_ (500, ikev2) \ -_ (2152, GTPU) \ -_ (3784, bfd4) \ -_ (3785, bfd_echo4) \ -_ (4341, lisp_gpe) \ -_ (4342, lisp_cp) \ -_ (4500, ipsec) \ -_ (4739, ipfix) \ -_ (4789, vxlan) \ -_ (4789, vxlan6) \ -_ (48879, vxlan_gbp) \ -_ (4790, VXLAN_GPE) \ -_ (6633, vpath_3) \ -_ (6081, geneve) \ -_ (53053, dns_reply) +#define foreach_udp4_dst_port \ + _ (53, dns) \ + _ (67, dhcp_to_server) \ + _ (68, dhcp_to_client) \ + _ (500, ikev2) \ + _ (2152, GTPU) \ + _ (3784, bfd4) \ + _ (3785, bfd_echo4) \ + _ (4341, lisp_gpe) \ + _ (4342, lisp_cp) \ + _ (4500, ipsec) \ + _ (4739, ipfix) \ + _ (4784, bfd4_mh) \ + _ (4789, vxlan) \ + _ (4789, vxlan6) \ + _ (48879, vxlan_gbp) \ + _ (4790, VXLAN_GPE) \ + _ (6633, vpath_3) \ + _ (6081, geneve) \ + _ (53053, dns_reply) - -#define foreach_udp6_dst_port \ -_ (53, dns6) \ -_ (547, dhcpv6_to_server) \ -_ (546, dhcpv6_to_client) \ -_ (2152, GTPU6) \ -_ (3784, bfd6) \ -_ (3785, bfd_echo6) \ -_ (4341, lisp_gpe6) \ -_ (4342, lisp_cp6) \ -_ (48879, vxlan6_gbp) \ -_ (4790, VXLAN6_GPE) \ -_ (6633, vpath6_3) \ -_ (6081, geneve6) \ -_ (8138, BIER) \ -_ (53053, dns_reply6) +#define foreach_udp6_dst_port \ + _ (53, dns6) \ + _ (547, dhcpv6_to_server) \ + _ (546, dhcpv6_to_client) \ + _ (2152, GTPU6) \ + _ (3784, bfd6) \ + _ (3785, bfd_echo6) \ + _ (4341, lisp_gpe6) \ + _ (4342, lisp_cp6) \ + _ (48879, vxlan6_gbp) \ + _ (4784, bfd6_mh) \ + _ (4790, VXLAN6_GPE) \ + _ (6633, vpath6_3) \ + _ (6081, geneve6) \ + _ (8138, BIER) \ + _ (53053, dns_reply6) typedef enum { diff --git a/src/vpp-api/vapi/vapi.c b/src/vpp-api/vapi/vapi.c index 26c5708342f..61801fc3726 100644 --- a/src/vpp-api/vapi/vapi.c +++ b/src/vpp-api/vapi/vapi.c @@ -927,6 +927,7 @@ vapi_sock_client_disconnect (vapi_ctx_t ctx) } clib_socket_close (&ctx->client_socket); + clib_socket_free (&ctx->client_socket); vapi_api_name_and_crc_free (ctx); return VAPI_OK; } @@ -987,7 +988,7 @@ vapi_connect_ex (vapi_ctx_t ctx, const char *name, const char *path, if (use_uds) { - if (vapi_sock_client_connect (ctx, (char *) path, name) < 0) + if (vapi_sock_client_connect (ctx, (char *) path, name) != VAPI_OK) { return VAPI_ECON_FAIL; } @@ -1358,6 +1359,7 @@ vapi_sock_disconnect (vapi_ctx_t ctx) } fail: clib_socket_close (&ctx->client_socket); + clib_socket_free (&ctx->client_socket); vapi_api_name_and_crc_free (ctx); ctx->connected = false; diff --git a/src/vpp-api/vapi/vapi_json_parser.py b/src/vpp-api/vapi/vapi_json_parser.py index c06cb8cf77b..0fd0663dcab 100644 --- a/src/vpp-api/vapi/vapi_json_parser.py +++ b/src/vpp-api/vapi/vapi_json_parser.py @@ -26,25 +26,11 @@ class Field(object): def __str__(self): if self.len is None: - return "Field(name: %s, type: %s)" % (self.name, self.type) - elif type(self.len) == dict: - return "Field(name: %s, type: %s, length: %s)" % ( - self.name, - self.type, - self.len, - ) + return f"Field(name: {self.name}, type: {self.type})" elif self.len > 0: - return "Field(name: %s, type: %s, length: %s)" % ( - self.name, - self.type, - self.len, - ) + return "Field(name: {self.name}, type: {self.type}, length: {self.len})" else: - return "Field(name: %s, type: %s, variable length stored in: %s)" % ( - self.name, - self.type, - self.nelem_field, - ) + return "Field(name: {self.name}, type: {self.type}, VLA length in: {self.nelem_field})" def is_vla(self): return self.nelem_field is not None @@ -282,6 +268,22 @@ class StructType(Type, Struct): "While parsing type `%s': array `%s' has " "variable length" % (name, field[1]) ) + elif type(field[2]) is dict: + # the concept of default values is broken beyond repair: + # + # if following is allowed: + # typedef feature1 { u32 table_id[default=0xffffffff]; } + # typedef feature2 { u32 hash_buckets[default=1024]; } + # union here_we_go { vl_api_feature1_t this; vl_api_feature2_t that; }; + # + # what does it mean to set the defaults for instance of here_we_go? + # + # because of that, we parse it here, but don't do anything about it... + if len(field[2]) != 1 or "default" not in field[2]: + raise ParseError( + f"Don't know how to parse field `{field}' of type definition for type `{t}'" + ) + p = field_class(field_name=field[1], field_type=field_type) else: p = field_class( field_name=field[1], field_type=field_type, array_len=field[2] diff --git a/src/vppinfra/devicetree.c b/src/vppinfra/devicetree.c index b725d5a0aed..df5a24f198e 100644 --- a/src/vppinfra/devicetree.c +++ b/src/vppinfra/devicetree.c @@ -192,9 +192,16 @@ done: } #endif -clib_dt_node_t * -clib_dt_get_child_node (clib_dt_node_t *n, char *name) +__clib_export clib_dt_node_t * +clib_dt_get_child_node (clib_dt_node_t *n, char *fmt, ...) { + u8 *s; + va_list va; + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + vec_add1 (s, 0); + vec_foreach_pointer (cn, n->child_nodes) { u8 *p = cn->path + vec_len (cn->path) - 1; @@ -206,15 +213,17 @@ clib_dt_get_child_node (clib_dt_node_t *n, char *name) if (p[-1] != '/') continue; - while (p[i] == name[i] && name[i] != 0) + while (p[i] == s[i] && s[i] != 0) i++; - if (name[i] != 0) + if (s[i] != 0) continue; + vec_free (s); return cn; } + vec_free (s); return 0; } diff --git a/src/vppinfra/devicetree.h b/src/vppinfra/devicetree.h index 21c2e0f7006..db7d8411a11 100644 --- a/src/vppinfra/devicetree.h +++ b/src/vppinfra/devicetree.h @@ -42,6 +42,8 @@ typedef struct clib_dt_main uword *node_by_phandle; } clib_dt_main_t; +__clib_export clib_dt_node_t *clib_dt_get_child_node (clib_dt_node_t *n, + char *fmt, ...); clib_dt_node_t *clib_dt_get_node_with_path (clib_dt_main_t *dm, char *fmt, ...); clib_dt_property_t *clib_dt_get_node_property_by_name (clib_dt_node_t *, @@ -56,7 +58,7 @@ format_function_t format_clib_dt_desc; format_function_t format_clib_dt_property_data; static_always_inline int -clib_dt_proprerty_is_u32 (clib_dt_property_t *p) +clib_dt_property_is_u32 (clib_dt_property_t *p) { if (p == 0 || p->size != 4) return 0; @@ -64,9 +66,53 @@ clib_dt_proprerty_is_u32 (clib_dt_property_t *p) } static_always_inline u32 -clib_dt_proprerty_get_u32 (clib_dt_property_t *p) +clib_dt_property_get_u32 (clib_dt_property_t *p) { return clib_net_to_host_u32 (*(u32u *) p->data); } +static_always_inline char * +clib_dt_property_get_string (clib_dt_property_t *p) +{ + return (char *) p->data; +} + +static_always_inline clib_dt_node_t * +clib_dt_get_root_node (clib_dt_node_t *n) +{ + return n->dt_main->root; +} + +static_always_inline clib_dt_node_t * +foreach_clib_dt_tree_node_helper (clib_dt_node_t *first, clib_dt_node_t **prev, + clib_dt_node_t *n) +{ + clib_dt_node_t *next; + +again: + if ((!*prev || (*prev)->parent != n) && vec_len (n->child_nodes) > 0) + next = n->child_nodes[0]; + else if (n->next) + next = n->next; + else + { + next = n->parent; + *prev = n; + n = next; + if (n == first) + return 0; + goto again; + } + + *prev = n; + return next == first ? 0 : next; +} + +#define foreach_clib_dt_child_node(_cn, _n) \ + vec_foreach_pointer (_cn, (_n)->child_nodes) + +#define foreach_clib_dt_tree_node(_n, _first) \ + for (clib_dt_node_t *__last = 0, *(_n) = _first; _n; \ + _n = foreach_clib_dt_tree_node_helper (_first, &__last, _n)) + #endif /* CLIB_DEVICETREE_H_ */ diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c index 17b4412e6c9..651ea107b4d 100644 --- a/src/vppinfra/linux/mem.c +++ b/src/vppinfra/linux/mem.c @@ -515,7 +515,8 @@ __clib_export void clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size, uword n_pages, clib_mem_page_stats_t * stats) { - int i, *status = 0; + int *status = 0; + uword i; void **ptr = 0; unsigned char incore; diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index ab9c5da30ec..6211bb51f0a 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -302,7 +302,7 @@ void clib_mem_exit (void); typedef struct { /* Address of callers: outer first, inner last. */ - uword callers[12]; + void *callers[12]; /* Count of allocations with this traceback. */ u32 n_allocations; diff --git a/src/vppinfra/mem_dlmalloc.c b/src/vppinfra/mem_dlmalloc.c index d5ff21e58c0..7944240390b 100644 --- a/src/vppinfra/mem_dlmalloc.c +++ b/src/vppinfra/mem_dlmalloc.c @@ -53,7 +53,7 @@ mheap_get_trace_internal (const clib_mem_heap_t *heap, uword offset, mheap_trace_t *t; uword i, trace_index, *p; mheap_trace_t trace = {}; - int index; + int n_callers; if (heap != tm->current_traced_mheap || mheap_trace_thread_disable) return; @@ -67,19 +67,10 @@ mheap_get_trace_internal (const clib_mem_heap_t *heap, uword offset, /* Turn off tracing for this thread to avoid embarrassment... */ mheap_trace_thread_disable = 1; - index = -2; /* skip first 2 stack frames */ - foreach_clib_stack_frame (sf) - { - if (index >= 0) - { - if (index == ARRAY_LEN (trace.callers)) - break; - trace.callers[index] = sf->ip; - } - index++; - } - - if (index < 1) + /* Skip our frame and mspace_get_aligned's frame */ + n_callers = + clib_stack_frame_get_raw (trace.callers, ARRAY_LEN (trace.callers), 2); + if (n_callers == 0) goto out; if (!tm->trace_by_callers) diff --git a/src/vppinfra/stack.c b/src/vppinfra/stack.c index 190e880c228..12b24e3189f 100644 --- a/src/vppinfra/stack.c +++ b/src/vppinfra/stack.c @@ -17,7 +17,30 @@ static __thread unw_cursor_t cursor; static __thread unw_context_t context; -#endif +#endif /* HAVE_LIBUNWIND */ + +__clib_export int +clib_stack_frame_get_raw (void **sf, int n, int skip) +{ +#if HAVE_LIBUNWIND == 1 + void *sf__[20]; + int n__; + + /* Also skip current frame. */ + skip++; + n__ = unw_backtrace (sf__, clib_min (ARRAY_LEN (sf__), n + skip)); + + if (n__ <= skip) + return 0; + else if (n__ - skip < n) + n = n__ - skip; + + clib_memcpy_fast (&sf[0], &sf__[skip], n * sizeof (sf[0])); + return n; +#else /* HAVE_LIBUNWIND */ + return 0; +#endif /* HAVE_LIBUNWIND */ +} __clib_export clib_stack_frame_t * clib_stack_frame_get (clib_stack_frame_t *sf) diff --git a/src/vppinfra/stack.h b/src/vppinfra/stack.h index 98a621d4176..5b833a3811e 100644 --- a/src/vppinfra/stack.h +++ b/src/vppinfra/stack.h @@ -17,6 +17,7 @@ typedef struct u8 is_signal_frame; } clib_stack_frame_t; +int clib_stack_frame_get_raw (void **sf, int n, int skip); clib_stack_frame_t *clib_stack_frame_get (clib_stack_frame_t *); #define foreach_clib_stack_frame(sf) \ |