From 26bd3e939e42ad13fd5f569e23d904826254b0db Mon Sep 17 00:00:00 2001 From: Nicolas PLANEL Date: Wed, 5 Mar 2025 16:37:47 +0100 Subject: dpdk: rte_eth_tx_prepare need to called before transmiting packets By the DPDK documentation rte_eth_tx_prepare() call is required to prepare the NIC and to validate the mbufs. This would fix hardware bad UDP checksum generated by ena driver issue. As performance will be impacted depending on the driver tx_prepare() callback I defined a tx-prepare flag that would be enabled by default on ena driver but could be enabled in the configuration if need for others drivers. Note: This option would normally be exclusive with intel_phdr_cksum as driver's tx_prepare would normally cover this usage. Type: fix Change-Id: Ic7c21682f7bd92b35bd9b1028129709baa2a64d4 Signed-off-by: Nicolas PLANEL Signed-off-by: Nicolas PLANEL --- src/plugins/dpdk/device/device.c | 66 ++++++++++++++++++++++++++-------------- src/plugins/dpdk/device/dpdk.h | 4 ++- src/plugins/dpdk/device/driver.c | 1 + src/plugins/dpdk/device/init.c | 2 ++ 4 files changed, 50 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c index c5abbd5f727..58ad4fda0d1 100644 --- a/src/plugins/dpdk/device/device.c +++ b/src/plugins/dpdk/device/device.c @@ -159,7 +159,7 @@ tx_burst_vector_internal (vlib_main_t *vm, dpdk_device_t *xd, { dpdk_tx_queue_t *txq; u32 n_retry; - int n_sent = 0; + u32 n_sent = 0; n_retry = 16; txq = vec_elt_at_index (xd->tx_queues, queue_id); @@ -279,9 +279,11 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (f); u32 n_packets = f->n_vectors; u32 n_left; + u32 n_prep; u32 thread_index = vm->thread_index; int queue_id = tf->queue_id; u8 is_shared = tf->shared_queue; + u8 offload_enabled = 0; u32 tx_pkts = 0; dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data, thread_index); @@ -333,6 +335,7 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) && (or_flags & VNET_BUFFER_F_OFFLOAD))) { + offload_enabled = 1; dpdk_buffer_tx_offload (xd, b[0], mb[0]); dpdk_buffer_tx_offload (xd, b[1], mb[1]); dpdk_buffer_tx_offload (xd, b[2], mb[2]); @@ -386,6 +389,7 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) && (or_flags & VNET_BUFFER_F_OFFLOAD))) { + offload_enabled = 1; dpdk_buffer_tx_offload (xd, b[0], mb[0]); dpdk_buffer_tx_offload (xd, b[1], mb[1]); } @@ -408,7 +412,13 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, b[0] = vlib_buffer_from_rte_mbuf (mb[0]); dpdk_validate_rte_mbuf (vm, b[0], 1); - dpdk_buffer_tx_offload (xd, b[0], mb[0]); + + if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) && + (b[0]->flags & VNET_BUFFER_F_OFFLOAD))) + { + offload_enabled = 1; + dpdk_buffer_tx_offload (xd, b[0], mb[0]); + } if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) if (b[0]->flags & VLIB_BUFFER_IS_TRACED) @@ -418,32 +428,44 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, n_left--; } - /* transmit as many packets as possible */ + /* prepare and transmit as many packets as possible */ tx_pkts = n_packets = mb - ptd->mbufs; - n_left = tx_burst_vector_internal (vm, xd, ptd->mbufs, n_packets, queue_id, - is_shared); + n_prep = n_packets; - { - /* If there is no callback then drop any non-transmitted packets */ - if (PREDICT_FALSE (n_left)) - { - tx_pkts -= n_left; - vlib_simple_counter_main_t *cm; - vnet_main_t *vnm = vnet_get_main (); + if (PREDICT_FALSE (offload_enabled && + (xd->flags & DPDK_DEVICE_FLAG_TX_PREPARE))) + { + n_prep = + rte_eth_tx_prepare (xd->port_id, queue_id, ptd->mbufs, n_packets); - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_TX_ERROR); + /* If mbufs are malformed then drop any non-prepared packets */ + if (PREDICT_FALSE (n_prep != n_packets)) + { + n_left = n_packets - n_prep; + } + } - vlib_increment_simple_counter (cm, thread_index, xd->sw_if_index, - n_left); + n_left += + tx_burst_vector_internal (vm, xd, ptd->mbufs, n_prep, queue_id, is_shared); - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, - n_left); + /* If there is no callback then drop any non-transmitted packets */ + if (PREDICT_FALSE (n_left)) + { + tx_pkts -= n_left; + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); - while (n_left--) - rte_pktmbuf_free (ptd->mbufs[n_packets - n_left - 1]); - } - } + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + + vlib_increment_simple_counter (cm, thread_index, xd->sw_if_index, + n_left); + + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, + n_left); + + rte_pktmbuf_free_bulk (&ptd->mbufs[tx_pkts], n_left); + } return tx_pkts; } diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index 2440439989f..70d9cc715dc 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -71,7 +71,8 @@ typedef uint16_t dpdk_portid_t; _ (11, RX_FLOW_OFFLOAD, "rx-flow-offload") \ _ (12, RX_IP4_CKSUM, "rx-ip4-cksum") \ _ (13, INT_SUPPORTED, "int-supported") \ - _ (14, INT_UNMASKABLE, "int-unmaskable") + _ (14, INT_UNMASKABLE, "int-unmaskable") \ + _ (15, TX_PREPARE, "tx-prepare") typedef enum { @@ -131,6 +132,7 @@ typedef struct u32 interface_number_from_port_id : 1; u32 use_intel_phdr_cksum : 1; u32 int_unmaskable : 1; + u32 need_tx_prepare : 1; } dpdk_driver_t; dpdk_driver_t *dpdk_driver_find (const char *name, const char **desc); diff --git a/src/plugins/dpdk/device/driver.c b/src/plugins/dpdk/device/driver.c index 2fde041684c..469a4b5de2b 100644 --- a/src/plugins/dpdk/device/driver.c +++ b/src/plugins/dpdk/device/driver.c @@ -113,6 +113,7 @@ static dpdk_driver_t dpdk_drivers[] = { .drivers = DPDK_DRIVERS ({ "net_ena", "AWS ENA VF" }), .interface_name_prefix = "VirtualFunctionEthernet", .enable_rxq_int = 1, + .need_tx_prepare = 1, }, { .drivers = DPDK_DRIVERS ({ "net_vmxnet3", "VMware VMXNET3" }), diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index aaa2c1f4a68..8aba4ec25e7 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -390,6 +390,8 @@ dpdk_lib_init (dpdk_main_t * dm) dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM, 1); if (dr->int_unmaskable) dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INT_UNMASKABLE, 1); + if (dr->need_tx_prepare) + dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_TX_PREPARE, 1); } else dpdk_log_warn ("[%u] unknown driver '%s'", port_id, di.driver_name); -- cgit