diff options
author | Andrew Yourtchenko <ayourtch@gmail.com> | 2018-10-12 16:09:22 +0200 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2019-02-19 12:47:40 +0000 |
commit | 6a7cff7ec234af8529ff72a530076e191cc8d759 (patch) | |
tree | ea7a9bf447385172d0d3fda382aebf2d0203ecc8 /src/vnet/devices/virtio | |
parent | be30fea370ed7cfe6a4a1b154a944411ec3eabd0 (diff) |
tap gso: experimental support
This commit adds a "gso" parameter to existing "create tap..." CLI,
and a "no-gso" parameter for the compatibility with the future,
when/if defaults change.
It makes use of the lowest bit of the "tap_flags" field in the API call
in order to allow creation of GSO interfaces via API as well.
It does the necessary syscalls to enable the GSO
and checksum offload support on the kernel side and sets two flags
on the interface: virtio-specific virtio_if_t.gso_enabled,
and vnet_hw_interface_t.flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO.
The first one, if enabled, triggers the marking of the GSO-encapsulated
packets on ingress with VNET_BUFFER_F_GSO flag, and
setting vnet_buffer2(b)->gso_size to the desired L4 payload size.
VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO determines the egress packet
processing in interface-output for such packets:
When the flag is set, they are sent out almost as usual (just taking
care to set the vnet header for virtio).
When the flag is not enabled (the case for most interfaces),
the egress path performs the re-segmentation such that
the L4 payload of the transmitted packets equals gso_size.
The operations in the datapath are enabled only when there is at least
one GSO-compatible interface in the system - this is done by tracking
the count in interface_main.gso_interface_count. This way the impact
of conditional checks for the setups that do not use GSO is minimized.
"show tap" CLI shows the state of the GSO flag on the interface, and
the total count of GSO-enabled interfaces (which is used to enable
the GSO-related processing in the packet path).
This commit lacks IPv6 extension header traversal support of any kind -
the L4 payload is assumed to follow the IPv6 header. Also it performs
the offloads only for TCP (TSO - TCP segmentation offload).
The UDP fragmentation offload (UFO) is not part of it.
For debug purposes it also adds the debug CLI:
"set tap gso {<interface> | sw_if_index <sw_idx>} <enable|disable>"
Change-Id: Ifd562db89adcc2208094b3d1032cee8c307aaef9
Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
Diffstat (limited to 'src/vnet/devices/virtio')
-rw-r--r-- | src/vnet/devices/virtio/device.c | 34 | ||||
-rw-r--r-- | src/vnet/devices/virtio/node.c | 92 | ||||
-rw-r--r-- | src/vnet/devices/virtio/virtio.c | 1 | ||||
-rw-r--r-- | src/vnet/devices/virtio/virtio.h | 1 |
4 files changed, 121 insertions, 7 deletions
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c index aa6a342f90b..609ffb47de8 100644 --- a/src/vnet/devices/virtio/device.c +++ b/src/vnet/devices/virtio/device.c @@ -117,7 +117,7 @@ virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring) static_always_inline u16 add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif, virtio_vring_t * vring, u32 bi, u16 avail, u16 next, - u16 mask) + u16 mask, int do_gso) { u16 n_added = 0; int hdr_sz = vif->virtio_net_hdr_sz; @@ -127,6 +127,25 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif, struct virtio_net_hdr_v1 *hdr = vlib_buffer_get_current (b) - hdr_sz; clib_memset (hdr, 0, hdr_sz); + if (do_gso && (b->flags & VNET_BUFFER_F_GSO)) + { + if (b->flags & VNET_BUFFER_F_IS_IP4) + { + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + hdr->gso_size = vnet_buffer2 (b)->gso_size; + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x22; + hdr->csum_offset = 0x10; + } + else + { + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + hdr->gso_size = vnet_buffer2 (b)->gso_size; + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x36; + hdr->csum_offset = 0x10; + } + } if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)) { @@ -219,7 +238,8 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif, static_always_inline uword virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, virtio_if_t * vif) + vlib_frame_t * frame, virtio_if_t * vif, + int do_gso) { u8 qid = 0; u16 n_left = frame->n_vectors; @@ -246,7 +266,8 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { u16 n_added = 0; n_added = - add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask); + add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask, + do_gso); if (!n_added) break; avail += n_added; @@ -286,7 +307,12 @@ virtio_interface_tx (vlib_main_t * vm, vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance); - return virtio_interface_tx_inline (vm, node, frame, vif); + vnet_main_t *vnm = vnet_get_main (); + if (vnm->interface_main.gso_interface_count > 0) + return virtio_interface_tx_inline (vm, node, frame, vif, 1 /* do_gso */ ); + else + return virtio_interface_tx_inline (vm, node, frame, vif, + 0 /* no do_gso */ ); } static void diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index 6b82c418ffb..fcc0f8a212a 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -30,6 +30,7 @@ #include <vnet/feature/feature.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> +#include <vnet/udp/udp_packet.h> #include <vnet/devices/virtio/virtio.h> @@ -140,9 +141,86 @@ more: goto more; } +static_always_inline void +fill_gso_buffer_flags (vlib_buffer_t * b0, struct virtio_net_hdr_v1 *hdr) +{ + u8 l4_proto = 0; + u8 l4_hdr_sz = 0; + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) + + { + ethernet_header_t *eh = (ethernet_header_t *) b0->data; + u16 ethertype = clib_net_to_host_u16 (eh->type); + u16 l2hdr_sz = sizeof (ethernet_header_t); + + vnet_buffer (b0)->l2_hdr_offset = 0; + vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz; + if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4)) + { + ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz); + vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4); + l4_proto = ip4->protocol; + b0->flags |= + (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID + | VNET_BUFFER_F_L3_HDR_OFFSET_VALID | + VNET_BUFFER_F_L4_HDR_OFFSET_VALID); + b0->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM; + } + else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6)) + { + ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz); + /* FIXME IPv6 EH traversal */ + vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t); + l4_proto = ip6->protocol; + b0->flags |= + (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID + | VNET_BUFFER_F_L3_HDR_OFFSET_VALID | + VNET_BUFFER_F_L4_HDR_OFFSET_VALID); + b0->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM; + } + if (l4_proto == IP_PROTOCOL_TCP) + { + b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; + tcp_header_t *tcp = (tcp_header_t *) (b0->data + + vnet_buffer + (b0)->l4_hdr_offset); + l4_hdr_sz = tcp_header_bytes (tcp); + tcp->checksum = 0; + } + else if (l4_proto == IP_PROTOCOL_UDP) + { + b0->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM; + udp_header_t *udp = (udp_header_t *) (b0->data + + vnet_buffer + (b0)->l4_hdr_offset); + l4_hdr_sz = sizeof (*udp); + udp->checksum = 0; + } + } + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4) + { + ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM); + vnet_buffer2 (b0)->gso_size = hdr->gso_size; + vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz; + b0->flags |= VNET_BUFFER_F_GSO; + b0->flags |= VNET_BUFFER_F_IS_IP4; + } + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6) + { + ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM); + vnet_buffer2 (b0)->gso_size = hdr->gso_size; + vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz; + b0->flags |= VNET_BUFFER_F_GSO; + b0->flags |= VNET_BUFFER_F_IS_IP6; + } +} + + static_always_inline uword virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, virtio_if_t * vif, u16 qid) + vlib_frame_t * frame, virtio_if_t * vif, u16 qid, + int gso_enabled) { vnet_main_t *vnm = vnet_get_main (); u32 thread_index = vm->thread_index; @@ -187,6 +265,10 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, b0->current_length = len; b0->total_length_not_including_first_buffer = 0; b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + + if (gso_enabled) + fill_gso_buffer_flags (b0, hdr); + vnet_buffer (b0)->sw_if_index[VLIB_RX] = vif->sw_if_index; vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; @@ -286,8 +368,12 @@ virtio_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, mif = vec_elt_at_index (nm->interfaces, dq->dev_instance); if (mif->flags & VIRTIO_IF_FLAG_ADMIN_UP) { - n_rx += virtio_device_input_inline (vm, node, frame, mif, - dq->queue_id); + if (mif->gso_enabled) + n_rx += virtio_device_input_inline (vm, node, frame, mif, + dq->queue_id, 1); + else + n_rx += virtio_device_input_inline (vm, node, frame, mif, + dq->queue_id, 0); } } diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c index cfeb30246f0..2648f29af84 100644 --- a/src/vnet/devices/virtio/virtio.c +++ b/src/vnet/devices/virtio/virtio.c @@ -277,6 +277,7 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns); vlib_cli_output (vm, " fd %d", vif->fd); vlib_cli_output (vm, " tap-fd %d", vif->tap_fd); + vlib_cli_output (vm, " gso-enabled %d", vif->gso_enabled); } vlib_cli_output (vm, " Mac Address: %U", format_ethernet_address, vif->mac_addr); diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h index af61ca5968f..f72819639d7 100644 --- a/src/vnet/devices/virtio/virtio.h +++ b/src/vnet/devices/virtio/virtio.h @@ -173,6 +173,7 @@ typedef struct u8 host_ip4_prefix_len; ip6_address_t host_ip6_addr; u8 host_ip6_prefix_len; + int gso_enabled; int ifindex; } virtio_if_t; |