From dd3737284d5021e2e3bd0413b61aab14797e365c Mon Sep 17 00:00:00 2001 From: Steven Luong Date: Mon, 25 Feb 2019 12:10:53 -0800 Subject: vmxnet3: lro/tso and rx checksum Add lro/tso and rx checksum support. lro/tso is configured via startup.conf vmxnet3 { lro } It is disable by default due to not all versions of ESXi supports it. Change-Id: Icf224ff528884ecd9e655b4fcf4481194e8c5a63 Signed-off-by: Steven Luong --- src/plugins/vmxnet3/README.md | 1 - src/plugins/vmxnet3/cli.c | 22 ++++++ src/plugins/vmxnet3/input.c | 160 +++++++++++++++++++++++++++++++++++------- src/plugins/vmxnet3/output.c | 26 +++++-- src/plugins/vmxnet3/vmxnet3.c | 23 +++++- src/plugins/vmxnet3/vmxnet3.h | 96 +++++++++++++++++++++---- 6 files changed, 283 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/plugins/vmxnet3/README.md b/src/plugins/vmxnet3/README.md index 65a0bc89f4a..d1082aa11e4 100644 --- a/src/plugins/vmxnet3/README.md +++ b/src/plugins/vmxnet3/README.md @@ -15,7 +15,6 @@ vfio driver can still be used with recent kernels which support no-iommu mode. ##Known issues -* TSO/LRO * RSS * VLAN filter diff --git a/src/plugins/vmxnet3/cli.c b/src/plugins/vmxnet3/cli.c index e110a479988..571c3dbd042 100644 --- a/src/plugins/vmxnet3/cli.c +++ b/src/plugins/vmxnet3/cli.c @@ -205,6 +205,7 @@ show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, if (!hw_if_indices) return; + vlib_cli_output (vm, "LRO/TSO configured: %u", vmxm->lro_configured); for (i = 0; i < vec_len (hw_if_indices); i++) { hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); @@ -213,6 +214,7 @@ show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, format_vnet_hw_if_index_name, vnm, hw_if_indices[i], hw_if_indices[i]); vlib_cli_output (vm, " Version: %u", vd->version); + vlib_cli_output (vm, " LRO/TSO enable: %u", vd->lro_enable); vlib_cli_output (vm, " PCI Address: %U", format_vlib_pci_addr, &vd->pci_addr); vlib_cli_output (vm, " Mac Address: %U", format_ethernet_address, @@ -580,6 +582,26 @@ vmxnet3_cli_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (vmxnet3_cli_init); +static clib_error_t * +vmxnet3_config (vlib_main_t * vm, unformat_input_t * input) +{ + vmxnet3_main_t *vmxm = &vmxnet3_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "lro")) + vmxm->lro_configured = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + return 0; +} + +/* vmxnet3 { ... } configuration. */ +VLIB_CONFIG_FUNCTION (vmxnet3_config, "vmxnet3"); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/vmxnet3/input.c b/src/plugins/vmxnet3/input.c index dffad6c7d47..87aa29cb49a 100644 --- a/src/plugins/vmxnet3/input.c +++ b/src/plugins/vmxnet3/input.c @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include @@ -71,6 +74,126 @@ vmxnet3_rx_comp_ring_advance_next (vmxnet3_rxq_t * rxq) } } +static_always_inline void +vmxnet3_handle_offload (vmxnet3_rx_comp * rx_comp, vlib_buffer_t * hb, + u16 * next, u16 gso_size) +{ + u8 l4_hdr_sz = 0; + + if (gso_size) + { + if (rx_comp->flags & VMXNET3_RXCF_TCP) + { + tcp_header_t *tcp = + (tcp_header_t *) (hb->data + vnet_buffer (hb)->l4_hdr_offset); + l4_hdr_sz = tcp_header_bytes (tcp); + } + else if (rx_comp->flags & VMXNET3_RXCF_UDP) + { + udp_header_t *udp = + (udp_header_t *) (hb->data + vnet_buffer (hb)->l4_hdr_offset); + l4_hdr_sz = sizeof (*udp); + } + } + + if (rx_comp->flags & VMXNET3_RXCF_IP4) + { + ip4_header_t *ip4 = (ip4_header_t *) (hb->data + + sizeof (ethernet_header_t)); + + vnet_buffer (hb)->l2_hdr_offset = 0; + vnet_buffer (hb)->l3_hdr_offset = sizeof (ethernet_header_t); + vnet_buffer (hb)->l4_hdr_offset = sizeof (ethernet_header_t) + + ip4_header_bytes (ip4); + hb->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID | + VNET_BUFFER_F_L3_HDR_OFFSET_VALID | + VNET_BUFFER_F_L4_HDR_OFFSET_VALID | VNET_BUFFER_F_IS_IP4; + next[0] = VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + + /* checksum offload */ + if (!(rx_comp->index & VMXNET3_RXCI_CNC)) + { + if (!(rx_comp->flags & VMXNET3_RXCF_IPC)) + { + hb->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM; + ip4->checksum = 0; + } + if (!(rx_comp->flags & VMXNET3_RXCF_TUC)) + { + if (rx_comp->flags & VMXNET3_RXCF_TCP) + { + tcp_header_t *tcp = + (tcp_header_t *) (hb->data + + vnet_buffer (hb)->l4_hdr_offset); + hb->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; + tcp->checksum = 0; + } + else if (rx_comp->flags & VMXNET3_RXCF_UDP) + { + udp_header_t *udp = + (udp_header_t *) (hb->data + + vnet_buffer (hb)->l4_hdr_offset); + hb->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM; + udp->checksum = 0; + } + } + } + + if (gso_size) + { + vnet_buffer2 (hb)->gso_size = gso_size; + vnet_buffer2 (hb)->gso_l4_hdr_sz = l4_hdr_sz; + hb->flags |= VNET_BUFFER_F_GSO; + } + vlib_buffer_advance (hb, device_input_next_node_advance[next[0]]); + } + else if (rx_comp->flags & VMXNET3_RXCF_IP6) + { + vnet_buffer (hb)->l2_hdr_offset = 0; + vnet_buffer (hb)->l3_hdr_offset = sizeof (ethernet_header_t); + vnet_buffer (hb)->l4_hdr_offset = sizeof (ethernet_header_t) + + sizeof (ip6_header_t); + hb->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID | + VNET_BUFFER_F_L3_HDR_OFFSET_VALID | + VNET_BUFFER_F_L4_HDR_OFFSET_VALID | VNET_BUFFER_F_IS_IP6; + next[0] = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + + /* checksum offload */ + if (!(rx_comp->index & VMXNET3_RXCI_CNC)) + { + if (!(rx_comp->flags & VMXNET3_RXCF_TUC)) + { + if (rx_comp->flags & VMXNET3_RXCF_TCP) + { + tcp_header_t *tcp = + (tcp_header_t *) (hb->data + + vnet_buffer (hb)->l4_hdr_offset); + hb->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; + tcp->checksum = 0; + } + else if (rx_comp->flags & VMXNET3_RXCF_UDP) + { + udp_header_t *udp = + (udp_header_t *) (hb->data + + vnet_buffer (hb)->l4_hdr_offset); + hb->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM; + udp->checksum = 0; + } + } + } + + if (gso_size) + { + vnet_buffer2 (hb)->gso_size = gso_size; + vnet_buffer2 (hb)->gso_l4_hdr_sz = l4_hdr_sz; + hb->flags |= VNET_BUFFER_F_GSO; + } + vlib_buffer_advance (hb, device_input_next_node_advance[next[0]]); + } + else + next[0] = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; +} + static_always_inline uword vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, vmxnet3_device_t * vd, @@ -93,6 +216,7 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u8 known_next = 0, got_packet = 0; vmxnet3_rx_desc *rxd; clib_error_t *error; + u16 gso_size = 0; rxq = vec_elt_at_index (vd->rxqs, qid); comp_ring = &rxq->rx_comp_ring; @@ -164,6 +288,14 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { ASSERT (!(rxd->flags & VMXNET3_RXF_BTYPE)); /* start segment */ + if ((vd->lro_enable) && + (rx_comp->flags & VMXNET3_RXCF_CT) == VMXNET3_RXCOMP_TYPE_LRO) + { + vmxnet3_rx_comp_ext *lro = (vmxnet3_rx_comp_ext *) rx_comp; + + gso_size = lro->flags & VMXNET3_RXECF_MSS_MASK; + } + hb = b0; bi[0] = bi0; if (!(rx_comp->index & VMXNET3_RXCI_EOP)) @@ -232,8 +364,6 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (got_packet) { - ethernet_header_t *e = (ethernet_header_t *) hb->data; - if (PREDICT_FALSE (vd->per_interface_next_index != ~0)) { next_index = vd->per_interface_next_index; @@ -254,31 +384,12 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { + ethernet_header_t *e = (ethernet_header_t *) hb->data; + if (ethernet_frame_is_tagged (e->type)) next[0] = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; else - { - if (rx_comp->flags & VMXNET3_RXCF_IP4) - { - next[0] = VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; - hb->flags |= VNET_BUFFER_F_IS_IP4; - vlib_buffer_advance (hb, - device_input_next_node_advance - [next[0]]); - } - else if (rx_comp->flags & VMXNET3_RXCF_IP6) - { - next[0] = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - hb->flags |= VNET_BUFFER_F_IS_IP6; - vlib_buffer_advance (hb, - device_input_next_node_advance - [next[0]]); - } - else - { - next[0] = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - } - } + vmxnet3_handle_offload (rx_comp, hb, next, gso_size); } n_rx_packets++; @@ -286,6 +397,7 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, bi++; hb = 0; got_packet = 0; + gso_size = 0; } next: diff --git a/src/plugins/vmxnet3/output.c b/src/plugins/vmxnet3/output.c index 5c48549e60d..5de41cdba07 100644 --- a/src/plugins/vmxnet3/output.c +++ b/src/plugins/vmxnet3/output.c @@ -127,7 +127,9 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, while (PREDICT_TRUE (n_left)) { u16 space_needed = 1, i; + u32 gso_size = 0; vlib_buffer_t *b; + u32 hdr_len = 0; bi0 = buffers[0]; b0 = vlib_get_buffer (vm, bi0); @@ -178,17 +180,33 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, txq->tx_ring.bufs[desc_idx] = bi0; txd = &txq->tx_desc[desc_idx]; + txd->address = vlib_buffer_get_current_pa (vm, b0); txd->flags[0] = generation | b0->current_length; + txd->flags[1] = 0; + if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_GSO)) + { + /* + * We should not be getting GSO outbound traffic unless it is + * lro is enable + */ + ASSERT (vd->lro_enable == 1); + gso_size = vnet_buffer2 (b0)->gso_size; + hdr_len = vnet_buffer (b0)->l4_hdr_offset + + sizeof (ethernet_header_t); + } generation = txq->tx_ring.gen; - - txd->flags[1] = 0; bi0 = b0->next_buffer; } - - txd->flags[1] = VMXNET3_TXF_CQ | VMXNET3_TXF_EOP; + if (PREDICT_FALSE (gso_size != 0)) + { + txd->flags[1] = hdr_len; + txd->flags[1] |= VMXNET3_TXF_OM (VMXNET3_OM_TSO); + txd->flags[0] |= VMXNET3_TXF_MSSCOF (gso_size); + } + txd->flags[1] |= VMXNET3_TXF_CQ | VMXNET3_TXF_EOP; asm volatile ("":::"memory"); /* * Now toggle back the generation bit for the first segment. diff --git a/src/plugins/vmxnet3/vmxnet3.c b/src/plugins/vmxnet3/vmxnet3.c index a3aae99f211..6995905a181 100644 --- a/src/plugins/vmxnet3/vmxnet3.c +++ b/src/plugins/vmxnet3/vmxnet3.c @@ -218,6 +218,10 @@ vmxnet3_provision_driver_shared (vlib_main_t * vm, vmxnet3_device_t * vd) shared->misc.guest_info = VMXNET3_GOS_BITS_64; shared->misc.guest_info |= VMXNET3_GOS_TYPE_LINUX; shared->misc.version_support = VMXNET3_VERSION_SELECT; + shared->misc.upt_features = VMXNET3_F_RXCSUM; + if (vd->lro_enable) + shared->misc.upt_features |= VMXNET3_F_LRO; + shared->misc.max_num_rx_sg = 0; shared->misc.upt_version_support = VMXNET3_UPT_VERSION_SELECT; shared->misc.queue_desc_address = vmxnet3_dma_addr (vm, vd, vd->queues); shared->misc.queue_desc_len = sizeof (*tx) * vd->num_tx_queues + @@ -359,6 +363,8 @@ static clib_error_t * vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, vmxnet3_create_if_args_t * args) { + vnet_main_t *vnm = vnet_get_main (); + vmxnet3_main_t *vmxm = &vmxnet3_main; clib_error_t *error = 0; u32 ret, i, size; vlib_thread_main_t *tm = vlib_get_thread_main (); @@ -405,6 +411,13 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, return error; } + /* LRO is only supported for version >= 3 */ + if ((vmxm->lro_configured) && (vd->version >= 3)) + { + vd->lro_enable = 1; + vnm->interface_main.gso_interface_count++; + } + vmxnet3_reg_write (vd, 1, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK); ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_CMD); if (ret & 1) @@ -413,9 +426,7 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, vd->link_speed = ret >> 16; } else - { - vd->flags &= ~VMXNET3_DEVICE_F_LINK_UP; - } + vd->flags &= ~VMXNET3_DEVICE_F_LINK_UP; /* Get the mac address */ ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_MACL); @@ -698,6 +709,9 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vd->hw_if_index); hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + if (vd->lro_enable) + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO; + vnet_hw_interface_set_input_node (vnm, vd->hw_if_index, vmxnet3_input_node.index); vnet_hw_interface_assign_rx_thread (vnm, vd->hw_if_index, 0, ~0); @@ -794,6 +808,9 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd) clib_error_free (vd->error); clib_memset (vd, 0, sizeof (*vd)); pool_put (vmxm->devices, vd); + + if (vd->lro_enable) + vnm->interface_main.gso_interface_count--; } /* diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h index ffde9f0c6f8..084fbc5d3b0 100644 --- a/src/plugins/vmxnet3/vmxnet3.h +++ b/src/plugins/vmxnet3/vmxnet3.h @@ -57,6 +57,19 @@ enum #undef _ }; +#define foreach_vmxnet3_feature_flags \ + _(0, RXCSUM, "rx checksum") \ + _(1, RSS, "RSS") \ + _(2, RXVLAN, "rx VLAN") \ + _(3, LRO, "LRO") + +enum +{ +#define _(a, b, c) VMXNET3_F_##b = (1 << a), + foreach_vmxnet3_feature_flags +#undef _ +}; + #define VMXNET3_TXQ_MAX 8 #define VMXNET3_TX_START(vd) ((vd)->queues) #define VMXNET3_RX_START(vd) \ @@ -86,14 +99,41 @@ enum #define VMXNET3_RXF_BTYPE (1 << 14) /* rx body buffer type */ #define VMXNET3_RXF_GEN (1 << 31) /* rx generation */ + +#define VMXNET3_RXCF_CKSUM_MASK (0xFFFF) /* rx checksum mask */ +#define VMXNET3_RXCF_TUC (1 << 16) /* rx udp/tcp checksum correct */ +#define VMXNET3_RXCF_UDP (1 << 17) /* rx udp packet */ +#define VMXNET3_RXCF_TCP (1 << 18) /* rx tcp packet */ +#define VMXNET3_RXCF_IPC (1 << 19) /* rx ip checksum correct */ #define VMXNET3_RXCF_IP6 (1 << 20) /* rx ip6 packet */ #define VMXNET3_RXCF_IP4 (1 << 21) /* rx ip4 packet */ +#define VMXNET3_RXCF_CT (0x7F << 24) /* rx completion type 24-30, 7 bits */ #define VMXNET3_RXCF_GEN (1 << 31) /* rx completion generation */ + #define VMXNET3_RXC_INDEX (0xFFF) /* rx completion index mask */ +#define foreach_vmxnet3_offload \ + _(0, NONE, "none") \ + _(2, CSUM, "checksum") \ + _(3, TSO, "tso") + +enum +{ +#define _(a, b, c) VMXNET3_OM_##b = (a), + foreach_vmxnet3_offload +#undef _ +}; + +/* tx desc flag 0 */ #define VMXNET3_TXF_GEN (1 << 14) /* tx generation */ + +/* tx desc flag 1 */ +#define VMXNET3_TXF_OM(x) ((x) << 10) /* tx offload mode */ +#define VMXNET3_TXF_MSSCOF(x) ((x) << 18) /* tx MSS checksum offset, flags */ #define VMXNET3_TXF_EOP (1 << 12) /* tx end of packet */ #define VMXNET3_TXF_CQ (1 << 13) /* tx completion request */ + +/* tx completion flag */ #define VMXNET3_TXCF_GEN (1 << 31) /* tx completion generation */ #define VMXNET3_TXC_INDEX (0xFFF) /* tx completion index mask */ @@ -116,10 +156,17 @@ enum #define VMXNET3_GOS_TYPE_LINUX (1 << 2) #define VMXNET3_RXCL_LEN_MASK (0x3FFF) // 14 bits #define VMXNET3_RXCL_ERROR (1 << 14) -#define VMXNET3_RXCI_EOP (1 << 14) -#define VMXNET3_RXCI_SOP (1 << 15) -#define foreach_vmxnet3_device_flags \ +#define VMXNET3_RXCI_EOP (1 << 14) /* end of packet */ +#define VMXNET3_RXCI_SOP (1 << 15) /* start of packet */ +#define VMXNET3_RXCI_CNC (1 << 30) /* Checksum not calculated */ + +#define VMXNET3_RXCOMP_TYPE (3 << 24) /* RX completion descriptor */ +#define VMXNET3_RXCOMP_TYPE_LRO (4 << 24) /* RX completion descriptor for LRO */ + +#define VMXNET3_RXECF_MSS_MASK (0xFFFF) // 16 bits + +#define foreach_vmxnet3_device_flags \ _(0, INITIALIZED, "initialized") \ _(1, ERROR, "error") \ _(2, ADMIN_UP, "admin-up") \ @@ -203,9 +250,8 @@ typedef CLIB_PACKED (struct typedef CLIB_PACKED (struct { - u32 mode; - u16 multicast_len; - u16 pad; u64 multicast_address; u8 vlan_filter[512]; + u32 mode; u16 multicast_len; u16 pad; + u64 multicast_address; u8 vlan_filter[512]; }) vmxnet3_rx_filter_config; typedef CLIB_PACKED (struct @@ -246,9 +292,9 @@ typedef CLIB_PACKED (struct u64 data_address; u64 comp_address; u64 driver_data_address; u64 pad; u32 num_desc; - u32 num_data; - u32 num_comp; u32 driver_data_len; u8 intr_index; - u8 pad1[7]; + u32 num_data; u32 num_comp; u32 driver_data_len; + u8 intr_index; + u8 pad1; u16 data_address_size; u8 pad2[4]; }) vmxnet3_tx_queue_config; typedef CLIB_PACKED (struct @@ -278,10 +324,11 @@ typedef CLIB_PACKED (struct typedef CLIB_PACKED (struct { u64 desc_address[2]; - u64 comp_address; u64 driver_data_address; u64 pad; - u32 num_desc[2]; - u32 num_comp; u32 driver_data_len; u8 intr_index; - u8 pad1[7]; + u64 comp_address; u64 driver_data_address; + u64 data_address; u32 num_desc[2]; + u32 num_comp; + u32 driver_data_len; u8 intr_index; u8 pad1; + u16 data_address_size; u8 pad2[4]; }) vmxnet3_rx_queue_config; typedef CLIB_PACKED (struct @@ -356,6 +403,27 @@ typedef CLIB_PACKED (struct u32 flags; }) vmxnet3_rx_comp; +/* + * flags: + * mss -- bits 0 - 15 + * tcp/udp checksum correct-- bit 16 + * udp packet -- bit 17 + * tcp packet -- bit 18 + * ip checksum correct -- bit 19 + * ipv6 -- bit 20 + * ipv4 -- bit 21 + * ip fragment -- bit 22 + * frame crc correct -- bit 23 + * completion type -- bits 24-30 + * generation -- bit 31 + */ +typedef CLIB_PACKED (struct + { + u32 dword1; + u8 seg_cnt; u8 dup_ack_cnt; u16 ts_delta; u32 dword2; + u32 flags; + }) vmxnet3_rx_comp_ext; + /* * index: * TX desc index -- bits 0-11 @@ -486,6 +554,7 @@ typedef struct void *queues; u32 link_speed; + u8 lro_enable; vmxnet3_tx_stats *tx_stats; vmxnet3_rx_stats *rx_stats; } vmxnet3_device_t; @@ -495,6 +564,7 @@ typedef struct vmxnet3_device_t *devices; u16 msg_id_base; vlib_log_class_t log_default; + u8 lro_configured; } vmxnet3_main_t; extern vmxnet3_main_t vmxnet3_main; -- cgit 1.2.3-korg