aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/devices/virtio
diff options
context:
space:
mode:
authorAndrew Yourtchenko <ayourtch@gmail.com>2018-10-12 16:09:22 +0200
committerDamjan Marion <dmarion@me.com>2019-02-19 12:47:40 +0000
commit6a7cff7ec234af8529ff72a530076e191cc8d759 (patch)
treeea7a9bf447385172d0d3fda382aebf2d0203ecc8 /src/vnet/devices/virtio
parentbe30fea370ed7cfe6a4a1b154a944411ec3eabd0 (diff)
tap gso: experimental support
This commit adds a "gso" parameter to existing "create tap..." CLI, and a "no-gso" parameter for the compatibility with the future, when/if defaults change. It makes use of the lowest bit of the "tap_flags" field in the API call in order to allow creation of GSO interfaces via API as well. It does the necessary syscalls to enable the GSO and checksum offload support on the kernel side and sets two flags on the interface: virtio-specific virtio_if_t.gso_enabled, and vnet_hw_interface_t.flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO. The first one, if enabled, triggers the marking of the GSO-encapsulated packets on ingress with VNET_BUFFER_F_GSO flag, and setting vnet_buffer2(b)->gso_size to the desired L4 payload size. VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO determines the egress packet processing in interface-output for such packets: When the flag is set, they are sent out almost as usual (just taking care to set the vnet header for virtio). When the flag is not enabled (the case for most interfaces), the egress path performs the re-segmentation such that the L4 payload of the transmitted packets equals gso_size. The operations in the datapath are enabled only when there is at least one GSO-compatible interface in the system - this is done by tracking the count in interface_main.gso_interface_count. This way the impact of conditional checks for the setups that do not use GSO is minimized. "show tap" CLI shows the state of the GSO flag on the interface, and the total count of GSO-enabled interfaces (which is used to enable the GSO-related processing in the packet path). This commit lacks IPv6 extension header traversal support of any kind - the L4 payload is assumed to follow the IPv6 header. Also it performs the offloads only for TCP (TSO - TCP segmentation offload). The UDP fragmentation offload (UFO) is not part of it. For debug purposes it also adds the debug CLI: "set tap gso {<interface> | sw_if_index <sw_idx>} <enable|disable>" Change-Id: Ifd562db89adcc2208094b3d1032cee8c307aaef9 Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
Diffstat (limited to 'src/vnet/devices/virtio')
-rw-r--r--src/vnet/devices/virtio/device.c34
-rw-r--r--src/vnet/devices/virtio/node.c92
-rw-r--r--src/vnet/devices/virtio/virtio.c1
-rw-r--r--src/vnet/devices/virtio/virtio.h1
4 files changed, 121 insertions, 7 deletions
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index aa6a342f90b..609ffb47de8 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c
@@ -117,7 +117,7 @@ virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring)
static_always_inline u16
add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif,
virtio_vring_t * vring, u32 bi, u16 avail, u16 next,
- u16 mask)
+ u16 mask, int do_gso)
{
u16 n_added = 0;
int hdr_sz = vif->virtio_net_hdr_sz;
@@ -127,6 +127,25 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif,
struct virtio_net_hdr_v1 *hdr = vlib_buffer_get_current (b) - hdr_sz;
clib_memset (hdr, 0, hdr_sz);
+ if (do_gso && (b->flags & VNET_BUFFER_F_GSO))
+ {
+ if (b->flags & VNET_BUFFER_F_IS_IP4)
+ {
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ hdr->gso_size = vnet_buffer2 (b)->gso_size;
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x22;
+ hdr->csum_offset = 0x10;
+ }
+ else
+ {
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ hdr->gso_size = vnet_buffer2 (b)->gso_size;
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x36;
+ hdr->csum_offset = 0x10;
+ }
+ }
if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
{
@@ -219,7 +238,8 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif,
static_always_inline uword
virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, virtio_if_t * vif)
+ vlib_frame_t * frame, virtio_if_t * vif,
+ int do_gso)
{
u8 qid = 0;
u16 n_left = frame->n_vectors;
@@ -246,7 +266,8 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u16 n_added = 0;
n_added =
- add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask);
+ add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask,
+ do_gso);
if (!n_added)
break;
avail += n_added;
@@ -286,7 +307,12 @@ virtio_interface_tx (vlib_main_t * vm,
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
- return virtio_interface_tx_inline (vm, node, frame, vif);
+ vnet_main_t *vnm = vnet_get_main ();
+ if (vnm->interface_main.gso_interface_count > 0)
+ return virtio_interface_tx_inline (vm, node, frame, vif, 1 /* do_gso */ );
+ else
+ return virtio_interface_tx_inline (vm, node, frame, vif,
+ 0 /* no do_gso */ );
}
static void
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index 6b82c418ffb..fcc0f8a212a 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c
@@ -30,6 +30,7 @@
#include <vnet/feature/feature.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
#include <vnet/devices/virtio/virtio.h>
@@ -140,9 +141,86 @@ more:
goto more;
}
+static_always_inline void
+fill_gso_buffer_flags (vlib_buffer_t * b0, struct virtio_net_hdr_v1 *hdr)
+{
+ u8 l4_proto = 0;
+ u8 l4_hdr_sz = 0;
+ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+
+ {
+ ethernet_header_t *eh = (ethernet_header_t *) b0->data;
+ u16 ethertype = clib_net_to_host_u16 (eh->type);
+ u16 l2hdr_sz = sizeof (ethernet_header_t);
+
+ vnet_buffer (b0)->l2_hdr_offset = 0;
+ vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
+ if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz);
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
+ l4_proto = ip4->protocol;
+ b0->flags |=
+ (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID
+ | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
+ }
+ else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz);
+ /* FIXME IPv6 EH traversal */
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
+ l4_proto = ip6->protocol;
+ b0->flags |=
+ (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID
+ | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
+ }
+ if (l4_proto == IP_PROTOCOL_TCP)
+ {
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ tcp_header_t *tcp = (tcp_header_t *) (b0->data +
+ vnet_buffer
+ (b0)->l4_hdr_offset);
+ l4_hdr_sz = tcp_header_bytes (tcp);
+ tcp->checksum = 0;
+ }
+ else if (l4_proto == IP_PROTOCOL_UDP)
+ {
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ udp_header_t *udp = (udp_header_t *) (b0->data +
+ vnet_buffer
+ (b0)->l4_hdr_offset);
+ l4_hdr_sz = sizeof (*udp);
+ udp->checksum = 0;
+ }
+ }
+
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4)
+ {
+ ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM);
+ vnet_buffer2 (b0)->gso_size = hdr->gso_size;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
+ b0->flags |= VNET_BUFFER_F_GSO;
+ b0->flags |= VNET_BUFFER_F_IS_IP4;
+ }
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6)
+ {
+ ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM);
+ vnet_buffer2 (b0)->gso_size = hdr->gso_size;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
+ b0->flags |= VNET_BUFFER_F_GSO;
+ b0->flags |= VNET_BUFFER_F_IS_IP6;
+ }
+}
+
+
static_always_inline uword
virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, virtio_if_t * vif, u16 qid)
+ vlib_frame_t * frame, virtio_if_t * vif, u16 qid,
+ int gso_enabled)
{
vnet_main_t *vnm = vnet_get_main ();
u32 thread_index = vm->thread_index;
@@ -187,6 +265,10 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
b0->current_length = len;
b0->total_length_not_including_first_buffer = 0;
b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ if (gso_enabled)
+ fill_gso_buffer_flags (b0, hdr);
+
vnet_buffer (b0)->sw_if_index[VLIB_RX] = vif->sw_if_index;
vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
@@ -286,8 +368,12 @@ virtio_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
mif = vec_elt_at_index (nm->interfaces, dq->dev_instance);
if (mif->flags & VIRTIO_IF_FLAG_ADMIN_UP)
{
- n_rx += virtio_device_input_inline (vm, node, frame, mif,
- dq->queue_id);
+ if (mif->gso_enabled)
+ n_rx += virtio_device_input_inline (vm, node, frame, mif,
+ dq->queue_id, 1);
+ else
+ n_rx += virtio_device_input_inline (vm, node, frame, mif,
+ dq->queue_id, 0);
}
}
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index cfeb30246f0..2648f29af84 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -277,6 +277,7 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns);
vlib_cli_output (vm, " fd %d", vif->fd);
vlib_cli_output (vm, " tap-fd %d", vif->tap_fd);
+ vlib_cli_output (vm, " gso-enabled %d", vif->gso_enabled);
}
vlib_cli_output (vm, " Mac Address: %U", format_ethernet_address,
vif->mac_addr);
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index af61ca5968f..f72819639d7 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h
@@ -173,6 +173,7 @@ typedef struct
u8 host_ip4_prefix_len;
ip6_address_t host_ip6_addr;
u8 host_ip6_prefix_len;
+ int gso_enabled;
int ifindex;
} virtio_if_t;