aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Yourtchenko <ayourtch@gmail.com>2018-10-12 16:09:22 +0200
committerDamjan Marion <dmarion@me.com>2019-02-19 12:47:40 +0000
commit6a7cff7ec234af8529ff72a530076e191cc8d759 (patch)
treeea7a9bf447385172d0d3fda382aebf2d0203ecc8
parentbe30fea370ed7cfe6a4a1b154a944411ec3eabd0 (diff)
tap gso: experimental support
This commit adds a "gso" parameter to existing "create tap..." CLI, and a "no-gso" parameter for the compatibility with the future, when/if defaults change. It makes use of the lowest bit of the "tap_flags" field in the API call in order to allow creation of GSO interfaces via API as well. It does the necessary syscalls to enable the GSO and checksum offload support on the kernel side and sets two flags on the interface: virtio-specific virtio_if_t.gso_enabled, and vnet_hw_interface_t.flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO. The first one, if enabled, triggers the marking of the GSO-encapsulated packets on ingress with VNET_BUFFER_F_GSO flag, and setting vnet_buffer2(b)->gso_size to the desired L4 payload size. VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO determines the egress packet processing in interface-output for such packets: When the flag is set, they are sent out almost as usual (just taking care to set the vnet header for virtio). When the flag is not enabled (the case for most interfaces), the egress path performs the re-segmentation such that the L4 payload of the transmitted packets equals gso_size. The operations in the datapath are enabled only when there is at least one GSO-compatible interface in the system - this is done by tracking the count in interface_main.gso_interface_count. This way the impact of conditional checks for the setups that do not use GSO is minimized. "show tap" CLI shows the state of the GSO flag on the interface, and the total count of GSO-enabled interfaces (which is used to enable the GSO-related processing in the packet path). This commit lacks IPv6 extension header traversal support of any kind - the L4 payload is assumed to follow the IPv6 header. Also it performs the offloads only for TCP (TSO - TCP segmentation offload). The UDP fragmentation offload (UFO) is not part of it. For debug purposes it also adds the debug CLI: "set tap gso {<interface> | sw_if_index <sw_idx>} <enable|disable>" Change-Id: Ifd562db89adcc2208094b3d1032cee8c307aaef9 Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
-rw-r--r--src/vnet/buffer.h35
-rw-r--r--src/vnet/devices/tap/cli.c60
-rw-r--r--src/vnet/devices/tap/tap.c65
-rw-r--r--src/vnet/devices/tap/tap.h3
-rw-r--r--src/vnet/devices/virtio/device.c34
-rw-r--r--src/vnet/devices/virtio/node.c92
-rw-r--r--src/vnet/devices/virtio/virtio.c1
-rw-r--r--src/vnet/devices/virtio/virtio.h1
-rw-r--r--src/vnet/interface.c6
-rw-r--r--src/vnet/interface.h15
-rw-r--r--src/vnet/interface_funcs.h2
-rw-r--r--src/vnet/interface_output.c433
-rw-r--r--src/vnet/ip/ip4_forward.c44
-rw-r--r--src/vnet/ip/ip6_forward.c54
14 files changed, 786 insertions, 59 deletions
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index 06696515ecd..ee04627fde3 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -66,14 +66,14 @@
_(17, FLOW_REPORT, "flow-report", 1) \
_(18, IS_DVR, "dvr", 1) \
_(19, QOS_DATA_VALID, "qos-data-valid", 0) \
- _(20, AVAIL1, "avail1", 1) \
- _(21, AVAIL2, "avail2", 1) \
- _(22, AVAIL3, "avail3", 1) \
- _(23, AVAIL4, "avail4", 1) \
- _(24, AVAIL5, "avail5", 1) \
- _(25, AVAIL6, "avail6", 1) \
- _(26, AVAIL7, "avail7", 1) \
- _(27, AVAIL8, "avail8", 1)
+ _(20, GSO, "gso", 0) \
+ _(21, AVAIL1, "avail1", 1) \
+ _(22, AVAIL2, "avail2", 1) \
+ _(23, AVAIL3, "avail3", 1) \
+ _(24, AVAIL4, "avail4", 1) \
+ _(25, AVAIL5, "avail5", 1) \
+ _(26, AVAIL6, "avail6", 1) \
+ _(27, AVAIL7, "avail7", 1)
/*
* Please allocate the FIRST available bit, redefine
@@ -396,6 +396,20 @@ typedef struct
};
} gbp;
+ /**
+ * The L4 payload size set on input on GSO enabled interfaces
+ * when we receive a GSO packet (a chain of buffers with the first one
+ * having GSO bit set), and needs to persist all the way to the interface-output,
+ * in case the egress interface is not GSO-enabled - then we need to perform
+ * the segmentation, and use this value to cut the payload appropriately.
+ */
+ u16 gso_size;
+ /* size of L4 prototol header */
+ u16 gso_l4_hdr_sz;
+
+ /* The union below has a u64 alignment, so this space is unused */
+ u32 __unused2[1];
+
union
{
struct
@@ -410,7 +424,7 @@ typedef struct
u64 pad[1];
u64 pg_replay_timestamp;
};
- u32 unused[10];
+ u32 unused[8];
};
} vnet_buffer_opaque2_t;
@@ -424,6 +438,9 @@ STATIC_ASSERT (sizeof (vnet_buffer_opaque2_t) <=
STRUCT_SIZE_OF (vlib_buffer_t, opaque2),
"VNET buffer opaque2 meta-data too large for vlib_buffer");
+#define gso_mtu_sz(b) (vnet_buffer2(b)->gso_size + vnet_buffer2(b)->gso_l4_hdr_sz + vnet_buffer(b)->l4_hdr_offset)
+
+
format_function_t format_vnet_buffer;
#endif /* included_vnet_buffer_h */
diff --git a/src/vnet/devices/tap/cli.c b/src/vnet/devices/tap/cli.c
index ee57a72268e..084fb908dc9 100644
--- a/src/vnet/devices/tap/cli.c
+++ b/src/vnet/devices/tap/cli.c
@@ -39,6 +39,7 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
int ip_addr_set = 0;
args.id = ~0;
+ args.tap_flags = 0;
/* Get a line of input. */
if (unformat_user (input, unformat_line_input, line_input))
@@ -75,6 +76,10 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
;
else if (unformat (line_input, "tx-ring-size %d", &args.tx_ring_sz))
;
+ else if (unformat (line_input, "no-gso"))
+ args.tap_flags &= ~TAP_FLAG_GSO;
+ else if (unformat (line_input, "gso"))
+ args.tap_flags |= TAP_FLAG_GSO;
else if (unformat (line_input, "hw-addr %U",
unformat_ethernet_address, args.mac_addr))
args.mac_addr_set = 1;
@@ -109,7 +114,7 @@ VLIB_CLI_COMMAND (tap_create_command, static) = {
"[rx-ring-size <size>] [tx-ring-size <size>] [host-ns <netns>] "
"[host-bridge <bridge-name>] [host-ip4-addr <ip4addr/mask>] "
"[host-ip6-addr <ip6-addr>] [host-ip4-gw <ip4-addr>] "
- "[host-ip6-gw <ip6-addr>] [host-if-name <name>]",
+ "[host-ip6-gw <ip6-addr>] [host-if-name <name>] [no-gso|gso]",
.function = tap_create_command_fn,
};
/* *INDENT-ON* */
@@ -163,6 +168,59 @@ VLIB_CLI_COMMAND (tap_delete__command, static) =
/* *INDENT-ON* */
static clib_error_t *
+tap_gso_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ vnet_main_t *vnm = vnet_get_main ();
+ int enable = 1;
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "Missing <interface>");
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "enable"))
+ enable = 1;
+ else if (unformat (line_input, "disable"))
+ enable = 0;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0,
+ "please specify interface name or sw_if_index");
+
+ rv = tap_gso_enable_disable (vm, sw_if_index, enable);
+ if (rv == VNET_API_ERROR_INVALID_SW_IF_INDEX)
+ return clib_error_return (0, "not a tap interface");
+ else if (rv != 0)
+ return clib_error_return (0, "error on configuring GSO on tap interface");
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (tap_gso__command, static) =
+{
+ .path = "set tap gso",
+ .short_help = "set tap gso {<interface> | sw_if_index <sw_idx>} <enable|disable>",
+ .function = tap_gso_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c
index 101576c274b..3739561cc59 100644
--- a/src/vnet/devices/tap/tap.c
+++ b/src/vnet/devices/tap/tap.c
@@ -176,6 +176,16 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
unsigned int offload = 0;
hdrsz = sizeof (struct virtio_net_hdr_v1);
+ if (args->tap_flags & TAP_FLAG_GSO)
+ {
+ offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+ vif->gso_enabled = 1;
+ }
+ else
+ {
+ vif->gso_enabled = 0;
+ }
+
_IOCTL (vif->tap_fd, TUNSETOFFLOAD, offload);
_IOCTL (vif->tap_fd, TUNSETVNETHDRSZ, &hdrsz);
_IOCTL (vif->fd, VHOST_SET_OWNER, 0);
@@ -386,6 +396,11 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
args->sw_if_index = vif->sw_if_index;
hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
+ if (args->tap_flags & TAP_FLAG_GSO)
+ {
+ hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO;
+ vnm->interface_main.gso_interface_count++;
+ }
vnet_hw_interface_set_input_node (vnm, vif->hw_if_index,
virtio_input_node.index);
vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0);
@@ -442,6 +457,10 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index)
if (vif->type != VIRTIO_IF_TYPE_TAP)
return VNET_API_ERROR_INVALID_INTERFACE;
+ /* decrement if this was a GSO interface */
+ if (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)
+ vnm->interface_main.gso_interface_count--;
+
/* bring down the interface */
vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0);
@@ -467,6 +486,52 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index)
}
int
+tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index, int enable_disable)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_main_t *mm = &virtio_main;
+ virtio_if_t *vif;
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ clib_error_t *err = 0;
+
+ if (hw == NULL || virtio_device_class.index != hw->dev_class_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
+
+ const unsigned int gso_on = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+ const unsigned int gso_off = 0;
+ unsigned int offload = enable_disable ? gso_on : gso_off;
+ _IOCTL (vif->tap_fd, TUNSETOFFLOAD, offload);
+ vif->gso_enabled = enable_disable ? 1 : 0;
+ if (enable_disable)
+ {
+ if ((hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0)
+ {
+ vnm->interface_main.gso_interface_count++;
+ hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO;
+ }
+ }
+ else
+ {
+ if ((hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) != 0)
+ {
+ vnm->interface_main.gso_interface_count--;
+ hw->flags &= ~VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO;
+ }
+ }
+
+error:
+ if (err)
+ {
+ clib_warning ("Error %s gso on sw_if_index %d",
+ enable_disable ? "enabling" : "disabling", sw_if_index);
+ return VNET_API_ERROR_SYSCALL_ERROR_3;
+ }
+ return 0;
+}
+
+int
tap_dump_ifs (tap_interface_details_t ** out_tapids)
{
vnet_main_t *vnm = vnet_get_main ();
diff --git a/src/vnet/devices/tap/tap.h b/src/vnet/devices/tap/tap.h
index 19dc88dd7c6..745f9fca304 100644
--- a/src/vnet/devices/tap/tap.h
+++ b/src/vnet/devices/tap/tap.h
@@ -30,6 +30,7 @@ typedef struct
u16 rx_ring_sz;
u16 tx_ring_sz;
u32 tap_flags;
+#define TAP_FLAG_GSO (1 << 0)
u8 *host_namespace;
u8 *host_if_name;
u8 host_mac_addr[6];
@@ -78,6 +79,8 @@ typedef struct
void tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args);
int tap_delete_if (vlib_main_t * vm, u32 sw_if_index);
+int tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index,
+ int enable_disable);
int tap_dump_ifs (tap_interface_details_t ** out_tapids);
#endif /* _VNET_DEVICES_VIRTIO_TAP_H_ */
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index aa6a342f90b..609ffb47de8 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c
@@ -117,7 +117,7 @@ virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring)
static_always_inline u16
add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif,
virtio_vring_t * vring, u32 bi, u16 avail, u16 next,
- u16 mask)
+ u16 mask, int do_gso)
{
u16 n_added = 0;
int hdr_sz = vif->virtio_net_hdr_sz;
@@ -127,6 +127,25 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif,
struct virtio_net_hdr_v1 *hdr = vlib_buffer_get_current (b) - hdr_sz;
clib_memset (hdr, 0, hdr_sz);
+ if (do_gso && (b->flags & VNET_BUFFER_F_GSO))
+ {
+ if (b->flags & VNET_BUFFER_F_IS_IP4)
+ {
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ hdr->gso_size = vnet_buffer2 (b)->gso_size;
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x22;
+ hdr->csum_offset = 0x10;
+ }
+ else
+ {
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ hdr->gso_size = vnet_buffer2 (b)->gso_size;
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x36;
+ hdr->csum_offset = 0x10;
+ }
+ }
if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
{
@@ -219,7 +238,8 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif,
static_always_inline uword
virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, virtio_if_t * vif)
+ vlib_frame_t * frame, virtio_if_t * vif,
+ int do_gso)
{
u8 qid = 0;
u16 n_left = frame->n_vectors;
@@ -246,7 +266,8 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u16 n_added = 0;
n_added =
- add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask);
+ add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask,
+ do_gso);
if (!n_added)
break;
avail += n_added;
@@ -286,7 +307,12 @@ virtio_interface_tx (vlib_main_t * vm,
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
- return virtio_interface_tx_inline (vm, node, frame, vif);
+ vnet_main_t *vnm = vnet_get_main ();
+ if (vnm->interface_main.gso_interface_count > 0)
+ return virtio_interface_tx_inline (vm, node, frame, vif, 1 /* do_gso */ );
+ else
+ return virtio_interface_tx_inline (vm, node, frame, vif,
+ 0 /* no do_gso */ );
}
static void
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index 6b82c418ffb..fcc0f8a212a 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c
@@ -30,6 +30,7 @@
#include <vnet/feature/feature.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
#include <vnet/devices/virtio/virtio.h>
@@ -140,9 +141,86 @@ more:
goto more;
}
+static_always_inline void
+fill_gso_buffer_flags (vlib_buffer_t * b0, struct virtio_net_hdr_v1 *hdr)
+{
+ u8 l4_proto = 0;
+ u8 l4_hdr_sz = 0;
+ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+
+ {
+ ethernet_header_t *eh = (ethernet_header_t *) b0->data;
+ u16 ethertype = clib_net_to_host_u16 (eh->type);
+ u16 l2hdr_sz = sizeof (ethernet_header_t);
+
+ vnet_buffer (b0)->l2_hdr_offset = 0;
+ vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
+ if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz);
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
+ l4_proto = ip4->protocol;
+ b0->flags |=
+ (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID
+ | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
+ }
+ else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz);
+ /* FIXME IPv6 EH traversal */
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
+ l4_proto = ip6->protocol;
+ b0->flags |=
+ (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID
+ | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
+ }
+ if (l4_proto == IP_PROTOCOL_TCP)
+ {
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ tcp_header_t *tcp = (tcp_header_t *) (b0->data +
+ vnet_buffer
+ (b0)->l4_hdr_offset);
+ l4_hdr_sz = tcp_header_bytes (tcp);
+ tcp->checksum = 0;
+ }
+ else if (l4_proto == IP_PROTOCOL_UDP)
+ {
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+ udp_header_t *udp = (udp_header_t *) (b0->data +
+ vnet_buffer
+ (b0)->l4_hdr_offset);
+ l4_hdr_sz = sizeof (*udp);
+ udp->checksum = 0;
+ }
+ }
+
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4)
+ {
+ ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM);
+ vnet_buffer2 (b0)->gso_size = hdr->gso_size;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
+ b0->flags |= VNET_BUFFER_F_GSO;
+ b0->flags |= VNET_BUFFER_F_IS_IP4;
+ }
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6)
+ {
+ ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM);
+ vnet_buffer2 (b0)->gso_size = hdr->gso_size;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
+ b0->flags |= VNET_BUFFER_F_GSO;
+ b0->flags |= VNET_BUFFER_F_IS_IP6;
+ }
+}
+
+
static_always_inline uword
virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, virtio_if_t * vif, u16 qid)
+ vlib_frame_t * frame, virtio_if_t * vif, u16 qid,
+ int gso_enabled)
{
vnet_main_t *vnm = vnet_get_main ();
u32 thread_index = vm->thread_index;
@@ -187,6 +265,10 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
b0->current_length = len;
b0->total_length_not_including_first_buffer = 0;
b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ if (gso_enabled)
+ fill_gso_buffer_flags (b0, hdr);
+
vnet_buffer (b0)->sw_if_index[VLIB_RX] = vif->sw_if_index;
vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
@@ -286,8 +368,12 @@ virtio_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
mif = vec_elt_at_index (nm->interfaces, dq->dev_instance);
if (mif->flags & VIRTIO_IF_FLAG_ADMIN_UP)
{
- n_rx += virtio_device_input_inline (vm, node, frame, mif,
- dq->queue_id);
+ if (mif->gso_enabled)
+ n_rx += virtio_device_input_inline (vm, node, frame, mif,
+ dq->queue_id, 1);
+ else
+ n_rx += virtio_device_input_inline (vm, node, frame, mif,
+ dq->queue_id, 0);
}
}
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index cfeb30246f0..2648f29af84 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -277,6 +277,7 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns);
vlib_cli_output (vm, " fd %d", vif->fd);
vlib_cli_output (vm, " tap-fd %d", vif->tap_fd);
+ vlib_cli_output (vm, " gso-enabled %d", vif->gso_enabled);
}
vlib_cli_output (vm, " Mac Address: %U", format_ethernet_address,
vif->mac_addr);
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index af61ca5968f..f72819639d7 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h
@@ -173,6 +173,7 @@ typedef struct
u8 host_ip4_prefix_len;
ip6_address_t host_ip6_addr;
u8 host_ip6_prefix_len;
+ int gso_enabled;
int ifindex;
} virtio_if_t;
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index 12204bd5718..dbfe49694f5 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -894,6 +894,7 @@ vnet_register_interface (vnet_main_t * vnm,
static char *e[] = {
"interface is down",
"interface is deleted",
+ "no buffers to segment GSO",
};
r.n_errors = ARRAY_LEN (e);
@@ -1328,6 +1329,11 @@ vnet_interface_init (vlib_main_t * vm)
}
}
+ im->gso_interface_count = 0;
+ /* init per-thread data */
+ vec_validate_aligned (im->per_thread_data, vlib_num_workers (),
+ CLIB_CACHE_LINE_BYTES);
+
if ((error = vlib_call_init_function (vm, vnet_interface_cli_init)))
return error;
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index 174e5347ad2..5c418593a42 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -475,6 +475,9 @@ typedef enum vnet_hw_interface_flags_t_
/* tx checksum offload */
VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD = (1 << 17),
+
+ /* gso */
+ VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO = (1 << 18),
} vnet_hw_interface_flags_t;
#define VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT 1
@@ -791,6 +794,12 @@ typedef struct
typedef struct
{
+ u32 *split_buffers;
+ u32 padding[14];
+} vnet_interface_per_thread_data_t;
+
+typedef struct
+{
/* Hardware interfaces. */
vnet_hw_interface_t *hw_interfaces;
@@ -827,6 +836,12 @@ typedef struct
u32 pcap_pkts_to_capture;
uword *pcap_drop_filter_hash;
+ /* per-thread data */
+ vnet_interface_per_thread_data_t *per_thread_data;
+
+ /* enable GSO processing in packet path if this count is > 0 */
+ u32 gso_interface_count;
+
/* feature_arc_index */
u8 output_feature_arc_index;
} vnet_interface_main_t;
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index 9a674b180b9..ef1fc16eded 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -442,6 +442,8 @@ typedef enum
{
VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN,
VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED,
+ VNET_INTERFACE_OUTPUT_ERROR_NO_BUFFERS_FOR_GSO,
+ VNET_INTERFACE_OUTPUT_ERROR_UNHANDLED_GSO_TYPE,
} vnet_interface_output_error_t;
/* Format for interface output traces. */
diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c
index beeb62a2db4..251ff34ac9a 100644
--- a/src/vnet/interface_output.c
+++ b/src/vnet/interface_output.c
@@ -47,7 +47,10 @@
typedef struct
{
u32 sw_if_index;
- u8 data[128 - sizeof (u32)];
+ u32 flags;
+ u16 gso_size;
+ u8 gso_l4_hdr_sz;
+ u8 data[128 - 3 * sizeof (u32)];
}
interface_output_trace_t;
@@ -69,24 +72,30 @@ format_vnet_interface_output_trace (u8 * s, va_list * va)
(vnm->interface_main.sw_interfaces, t->sw_if_index))
{
/* the interface may have been deleted by the time the trace is printed */
- s = format (s, "sw_if_index: %d\n%U%U",
- t->sw_if_index,
- format_white_space, indent,
- node->format_buffer ? node->
- format_buffer : format_hex_bytes, t->data,
- sizeof (t->data));
+ s = format (s, "sw_if_index: %d ", t->sw_if_index);
}
else
{
si = vnet_get_sw_interface (vnm, t->sw_if_index);
-
- s = format (s, "%U\n%U%U",
- format_vnet_sw_interface_name, vnm, si,
- format_white_space, indent,
- node->format_buffer ? node->
- format_buffer : format_hex_bytes, t->data,
- sizeof (t->data));
+ s =
+ format (s, "%U ", format_vnet_sw_interface_name, vnm, si,
+ t->flags);
+ }
+#define _(bit, name, v, x) \
+ if (v && (t->flags & VNET_BUFFER_F_##name)) \
+ s = format (s, "%s ", v);
+ foreach_vnet_buffer_flag
+#undef _
+ if (t->flags & VNET_BUFFER_F_GSO)
+ {
+ s = format (s, "\n%Ugso_sz %d gso_l4_hdr_sz %d",
+ format_white_space, indent + 2, t->gso_size,
+ t->gso_l4_hdr_sz);
}
+ s =
+ format (s, "\n%U%U", format_white_space, indent,
+ node->format_buffer ? node->format_buffer : format_hex_bytes,
+ t->data, sizeof (t->data));
}
return s;
}
@@ -121,6 +130,9 @@ vnet_interface_output_trace (vlib_main_t * vm,
{
t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ t0->flags = b0->flags;
+ t0->gso_size = vnet_buffer2 (b0)->gso_size;
+ t0->gso_l4_hdr_sz = vnet_buffer2 (b0)->gso_l4_hdr_sz;
clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0),
sizeof (t0->data));
}
@@ -128,6 +140,9 @@ vnet_interface_output_trace (vlib_main_t * vm,
{
t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ t1->flags = b1->flags;
+ t1->gso_size = vnet_buffer2 (b1)->gso_size;
+ t1->gso_l4_hdr_sz = vnet_buffer2 (b1)->gso_l4_hdr_sz;
clib_memcpy_fast (t1->data, vlib_buffer_get_current (b1),
sizeof (t1->data));
}
@@ -149,6 +164,9 @@ vnet_interface_output_trace (vlib_main_t * vm,
{
t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ t0->flags = b0->flags;
+ t0->gso_size = vnet_buffer2 (b0)->gso_size;
+ t0->gso_l4_hdr_sz = vnet_buffer2 (b0)->gso_l4_hdr_sz;
clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0),
sizeof (t0->data));
}
@@ -192,9 +210,17 @@ calc_checksums (vlib_main_t * vm, vlib_buffer_t * b)
{
int bogus;
if (b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
- th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ {
+ th->checksum = 0;
+ th->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ }
if (b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)
- uh->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ {
+ uh->checksum = 0;
+ uh->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ }
}
b->flags &= ~VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
@@ -202,12 +228,245 @@ calc_checksums (vlib_main_t * vm, vlib_buffer_t * b)
b->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
}
+static_always_inline u16
+tso_alloc_tx_bufs (vlib_main_t * vm,
+ vnet_interface_per_thread_data_t * ptd,
+ vlib_buffer_t * b0, u16 l4_hdr_sz)
+{
+ u32 n_bytes_b0 = vlib_buffer_length_in_chain (vm, b0);
+ u16 gso_size = vnet_buffer2 (b0)->gso_size;
+ u16 l234_sz = vnet_buffer (b0)->l4_hdr_offset + l4_hdr_sz;
+ /* rounded-up division */
+ u16 n_bufs = (n_bytes_b0 - l234_sz + (gso_size - 1)) / gso_size;
+ u16 n_alloc;
+
+ ASSERT (n_bufs > 0);
+ vec_validate (ptd->split_buffers, n_bufs - 1);
+
+ n_alloc = vlib_buffer_alloc (vm, ptd->split_buffers, n_bufs);
+ if (n_alloc < n_bufs)
+ {
+ vlib_buffer_free (vm, ptd->split_buffers, n_alloc);
+ return 0;
+ }
+ return 1;
+}
+
+static_always_inline void
+tso_init_buf_from_template_base (vlib_buffer_t * nb0, vlib_buffer_t * b0,
+ u32 flags, u16 length)
+{
+ nb0->current_data = 0;
+ nb0->total_length_not_including_first_buffer = 0;
+ nb0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags;
+ clib_memcpy_fast (&nb0->opaque, &b0->opaque, sizeof (nb0->opaque));
+ clib_memcpy_fast (nb0->data, b0->data, length);
+ nb0->current_length = length;
+}
+
+static_always_inline void
+tso_init_buf_from_template (vlib_main_t * vm, vlib_buffer_t * nb0,
+ vlib_buffer_t * b0, u16 template_data_sz,
+ u16 gso_size, u8 ** p_dst_ptr, u16 * p_dst_left,
+ u32 next_tcp_seq, u32 flags)
+{
+ tso_init_buf_from_template_base (nb0, b0, flags, template_data_sz);
+
+ *p_dst_left =
+ clib_min (gso_size,
+ vlib_buffer_get_default_data_size (vm) - template_data_sz);
+ *p_dst_ptr = nb0->data + template_data_sz;
+
+ tcp_header_t *tcp =
+ (tcp_header_t *) (nb0->data + vnet_buffer (nb0)->l4_hdr_offset);
+ tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq);
+}
+
+static_always_inline void
+tso_fixup_segmented_buf (vlib_buffer_t * b0, u8 tcp_flags, int is_ip6)
+{
+ u16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset;
+ u16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset;
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l3_hdr_offset);
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l3_hdr_offset);
+ tcp_header_t *tcp = (tcp_header_t *) (b0->data + l4_hdr_offset);
+
+ tcp->flags = tcp_flags;
+
+ if (is_ip6)
+ ip6->payload_length =
+ clib_host_to_net_u16 (b0->current_length -
+ vnet_buffer (b0)->l4_hdr_offset);
+ else
+ ip4->length =
+ clib_host_to_net_u16 (b0->current_length -
+ vnet_buffer (b0)->l3_hdr_offset);
+}
+
+/**
+ * Allocate the necessary number of ptd->split_buffers,
+ * and segment the possibly chained buffer(s) from b0 into
+ * there.
+ *
+ * Return the cumulative number of bytes sent or zero
+ * if allocation failed.
+ */
+
+static_always_inline u32
+tso_segment_buffer (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd,
+ int do_tx_offloads, u32 sbi0, vlib_buffer_t * sb0,
+ u32 n_bytes_b0)
+{
+ u32 n_tx_bytes = 0;
+ int is_ip4 = sb0->flags & VNET_BUFFER_F_IS_IP4;
+ int is_ip6 = sb0->flags & VNET_BUFFER_F_IS_IP6;
+ ASSERT (is_ip4 || is_ip6);
+ ASSERT (sb0->flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID);
+ ASSERT (sb0->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID);
+ ASSERT (sb0->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ u16 gso_size = vnet_buffer2 (sb0)->gso_size;
+
+ int l4_hdr_sz = vnet_buffer2 (sb0)->gso_l4_hdr_sz;
+ u8 save_tcp_flags = 0;
+ u8 tcp_flags_no_fin_psh = 0;
+ u32 next_tcp_seq = 0;
+
+ tcp_header_t *tcp =
+ (tcp_header_t *) (sb0->data + vnet_buffer (sb0)->l4_hdr_offset);
+ next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number);
+ /* store original flags for last packet and reset FIN and PSH */
+ save_tcp_flags = tcp->flags;
+ tcp_flags_no_fin_psh = tcp->flags & ~(TCP_FLAG_FIN | TCP_FLAG_PSH);
+ tcp->checksum = 0;
+
+ u32 default_bflags =
+ sb0->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT);
+ u16 l234_sz = vnet_buffer (sb0)->l4_hdr_offset + l4_hdr_sz;
+ int first_data_size = clib_min (gso_size, sb0->current_length - l234_sz);
+ next_tcp_seq += first_data_size;
+
+ if (PREDICT_FALSE (!tso_alloc_tx_bufs (vm, ptd, sb0, l4_hdr_sz)))
+ return 0;
+
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[0]);
+ tso_init_buf_from_template_base (b0, sb0, default_bflags,
+ l4_hdr_sz + first_data_size);
+
+ u32 total_src_left = n_bytes_b0 - l234_sz - first_data_size;
+ if (total_src_left)
+ {
+ /* Need to copy more segments */
+ u8 *src_ptr, *dst_ptr;
+ u16 src_left, dst_left;
+ /* current source buffer */
+ vlib_buffer_t *csb0 = sb0;
+ u32 csbi0 = sbi0;
+ /* current dest buffer */
+ vlib_buffer_t *cdb0;
+ u16 dbi = 1; /* the buffer [0] is b0 */
+
+ src_ptr = sb0->data + l234_sz + first_data_size;
+ src_left = sb0->current_length - l234_sz - first_data_size;
+ b0->current_length = l234_sz + first_data_size;
+
+ tso_fixup_segmented_buf (b0, tcp_flags_no_fin_psh, is_ip6);
+ if (do_tx_offloads)
+ calc_checksums (vm, b0);
+
+ /* grab a second buffer and prepare the loop */
+ ASSERT (dbi < vec_len (ptd->split_buffers));
+ cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]);
+ tso_init_buf_from_template (vm, cdb0, b0, l234_sz, gso_size, &dst_ptr,
+ &dst_left, next_tcp_seq, default_bflags);
+
+ /* an arbitrary large number to catch the runaway loops */
+ int nloops = 2000;
+ while (total_src_left)
+ {
+ ASSERT (nloops-- > 0);
+ u16 bytes_to_copy = clib_min (src_left, dst_left);
+
+ clib_memcpy_fast (dst_ptr, src_ptr, bytes_to_copy);
+
+ src_left -= bytes_to_copy;
+ src_ptr += bytes_to_copy;
+ total_src_left -= bytes_to_copy;
+ dst_left -= bytes_to_copy;
+ dst_ptr += bytes_to_copy;
+ next_tcp_seq += bytes_to_copy;
+ cdb0->current_length += bytes_to_copy;
+
+ if (0 == src_left)
+ {
+ int has_next = (csb0->flags & VLIB_BUFFER_NEXT_PRESENT);
+ u32 next_bi = csb0->next_buffer;
+
+ /* init src to the next buffer in chain */
+ if (has_next)
+ {
+ csbi0 = next_bi;
+ csb0 = vlib_get_buffer (vm, csbi0);
+ src_left = csb0->current_length;
+ src_ptr = csb0->data;
+ }
+ else
+ {
+ ASSERT (total_src_left == 0);
+ break;
+ }
+ }
+ if (0 == dst_left && total_src_left)
+ {
+ if (do_tx_offloads)
+ calc_checksums (vm, cdb0);
+ n_tx_bytes += cdb0->current_length;
+ ASSERT (dbi < vec_len (ptd->split_buffers));
+ cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]);
+ tso_init_buf_from_template (vm, cdb0, b0, l234_sz,
+ gso_size, &dst_ptr, &dst_left,
+ next_tcp_seq, default_bflags);
+ }
+ }
+
+ tso_fixup_segmented_buf (cdb0, save_tcp_flags, is_ip6);
+ if (do_tx_offloads)
+ calc_checksums (vm, cdb0);
+
+ n_tx_bytes += cdb0->current_length;
+ }
+ n_tx_bytes += b0->current_length;
+ return n_tx_bytes;
+}
+
+static_always_inline void
+drop_one_buffer_and_count (vlib_main_t * vm, vnet_main_t * vnm,
+ vlib_node_runtime_t * node, u32 * pbi0,
+ u32 drop_error_code)
+{
+ u32 thread_index = vm->thread_index;
+ vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
+
+ vlib_simple_counter_main_t *cm;
+ cm =
+ vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_TX_ERROR);
+ vlib_increment_simple_counter (cm, thread_index, rt->sw_if_index, 1);
+
+ vlib_error_drop_buffers (vm, node, pbi0,
+ /* buffer stride */ 1,
+ /* n_buffers */ 1,
+ VNET_INTERFACE_OUTPUT_NEXT_DROP,
+ node->node_index, drop_error_code);
+}
+
static_always_inline uword
-vnet_interface_output_node_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, vnet_main_t * vnm,
- vnet_hw_interface_t * hi,
- int do_tx_offloads)
+vnet_interface_output_node_inline_gso (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vnet_main_t * vnm,
+ vnet_hw_interface_t * hi,
+ int do_tx_offloads,
+ int do_segmentation)
{
vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
vnet_sw_interface_t *si;
@@ -219,6 +478,8 @@ vnet_interface_output_node_inline (vlib_main_t * vm,
u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX;
u32 current_config_index = ~0;
u8 arc = im->output_feature_arc_index;
+ vnet_interface_per_thread_data_t *ptd =
+ vec_elt_at_index (im->per_thread_data, thread_index);
n_buffers = frame->n_vectors;
@@ -300,15 +561,30 @@ vnet_interface_output_node_inline (vlib_main_t * vm,
to_tx[1] = bi1;
to_tx[2] = bi2;
to_tx[3] = bi3;
- from += 4;
- to_tx += 4;
- n_left_to_tx -= 4;
+ if (!do_segmentation)
+ {
+ from += 4;
+ to_tx += 4;
+ n_left_to_tx -= 4;
+ }
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
b2 = vlib_get_buffer (vm, bi2);
b3 = vlib_get_buffer (vm, bi3);
+ if (do_segmentation)
+ {
+ or_flags = b0->flags | b1->flags | b2->flags | b3->flags;
+
+ /* go to single loop if we need TSO segmentation */
+ if (PREDICT_FALSE (or_flags & VNET_BUFFER_F_GSO))
+ break;
+ from += 4;
+ to_tx += 4;
+ n_left_to_tx -= 4;
+ }
+
/* Be grumpy about zero length buffers for benefit of
driver tx function. */
ASSERT (b0->current_length > 0);
@@ -376,7 +652,8 @@ vnet_interface_output_node_inline (vlib_main_t * vm,
n_bytes_b3);
}
- or_flags = b0->flags | b1->flags | b2->flags | b3->flags;
+ if (!do_segmentation)
+ or_flags = b0->flags | b1->flags | b2->flags | b3->flags;
if (do_tx_offloads)
{
@@ -422,6 +699,85 @@ vnet_interface_output_node_inline (vlib_main_t * vm,
b0->current_config_index = current_config_index;
}
+ if (do_segmentation)
+ {
+ if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_GSO))
+ {
+ /*
+ * Undo the enqueue of the b0 - it is not going anywhere,
+ * and will be freed either after it's segmented or
+ * when dropped, if there is no buffers to segment into.
+ */
+ to_tx -= 1;
+ n_left_to_tx += 1;
+ /* undo the counting. */
+ n_bytes -= n_bytes_b0;
+ n_packets -= 1;
+
+ u32 n_tx_bytes = 0;
+
+ n_tx_bytes =
+ tso_segment_buffer (vm, ptd, do_tx_offloads, bi0, b0,
+ n_bytes_b0);
+
+ if (PREDICT_FALSE (n_tx_bytes == 0))
+ {
+ drop_one_buffer_and_count (vm, vnm, node, from - 1,
+ VNET_INTERFACE_OUTPUT_ERROR_NO_BUFFERS_FOR_GSO);
+ continue;
+ }
+
+ u16 n_tx_bufs = vec_len (ptd->split_buffers);
+ u32 *from_tx_seg = ptd->split_buffers;
+
+ while (n_tx_bufs > 0)
+ {
+ if (n_tx_bufs >= n_left_to_tx)
+ {
+ while (n_left_to_tx > 0)
+ {
+ to_tx[0] = from_tx_seg[0];
+ to_tx += 1;
+ from_tx_seg += 1;
+ n_left_to_tx -= 1;
+ n_tx_bufs -= 1;
+ n_packets += 1;
+ }
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_tx);
+ vlib_get_new_next_frame (vm, node, next_index,
+ to_tx, n_left_to_tx);
+ }
+ else
+ {
+ while (n_tx_bufs > 0)
+ {
+ to_tx[0] = from_tx_seg[0];
+ to_tx += 1;
+ from_tx_seg += 1;
+ n_left_to_tx -= 1;
+ n_tx_bufs -= 1;
+ n_packets += 1;
+ }
+ }
+ }
+ n_bytes += n_tx_bytes;
+ if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index))
+ {
+
+ vlib_increment_combined_counter
+ (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX, thread_index, tx_swif0,
+ _vec_len (ptd->split_buffers), n_tx_bytes);
+ }
+ /* The buffers were enqueued. Reset the length */
+ _vec_len (ptd->split_buffers) = 0;
+ /* Free the now segmented buffer */
+ vlib_buffer_free_one (vm, bi0);
+ continue;
+ }
+ }
+
if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index))
{
@@ -446,6 +802,33 @@ vnet_interface_output_node_inline (vlib_main_t * vm,
return n_buffers;
}
+static_always_inline uword
+vnet_interface_output_node_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vnet_main_t * vnm,
+ vnet_hw_interface_t * hi,
+ int do_tx_offloads)
+{
+ /*
+ * The 3-headed "if" is here because we want to err on the side
+ * of not impacting the non-GSO performance - so for the more
+ * common case of no GSO interfaces we want to prevent the
+ * segmentation codepath from being there altogether.
+ */
+ if (PREDICT_TRUE (vnm->interface_main.gso_interface_count == 0))
+ return vnet_interface_output_node_inline_gso (vm, node, frame, vnm, hi,
+ do_tx_offloads,
+ /* do_segmentation */ 0);
+ else if (hi->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)
+ return vnet_interface_output_node_inline_gso (vm, node, frame, vnm, hi,
+ do_tx_offloads,
+ /* do_segmentation */ 0);
+ else
+ return vnet_interface_output_node_inline_gso (vm, node, frame, vnm, hi,
+ do_tx_offloads,
+ /* do_segmentation */ 1);
+}
+
uword
vnet_interface_output_node (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index b3ae29a2790..ec4eda4e96a 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -2186,10 +2186,11 @@ ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
always_inline uword
-ip4_rewrite_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- int do_counters, int is_midchain, int is_mcast)
+ip4_rewrite_inline_with_gso (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast,
+ int do_gso)
{
ip_lookup_main_t *lm = &ip4_main.lookup_main;
u32 *from = vlib_frame_vector_args (frame);
@@ -2267,12 +2268,20 @@ ip4_rewrite_inline (vlib_main_t * vm,
CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
/* Check MTU of outgoing interface. */
- ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
+ u16 ip0_len = clib_net_to_host_u16 (ip0->length);
+ u16 ip1_len = clib_net_to_host_u16 (ip1->length);
+
+ if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
+ ip0_len = gso_mtu_sz (b[0]);
+ if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
+ ip1_len = gso_mtu_sz (b[1]);
+
+ ip4_mtu_check (b[0], ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
next + 0, &error0);
- ip4_mtu_check (b[1], clib_net_to_host_u16 (ip1->length),
+ ip4_mtu_check (b[1], ip1_len,
adj1[0].rewrite_header.max_l3_packet_bytes,
ip1->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
@@ -2395,7 +2404,11 @@ ip4_rewrite_inline (vlib_main_t * vm,
vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
/* Check MTU of outgoing interface. */
- ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
+ u16 ip0_len = clib_net_to_host_u16 (ip0->length);
+ if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
+ ip0_len = gso_mtu_sz (b[0]);
+
+ ip4_mtu_check (b[0], ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
@@ -2465,6 +2478,23 @@ ip4_rewrite_inline (vlib_main_t * vm,
return frame->n_vectors;
}
+always_inline uword
+ip4_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
+ return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
+ is_midchain, is_mcast,
+ 1 /* do_gso */ );
+ else
+ return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
+ is_midchain, is_mcast,
+ 0 /* no do_gso */ );
+}
+
/** @brief IPv4 rewrite node.
@node ip4-rewrite
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 8e96647f995..f599392742c 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1622,10 +1622,11 @@ ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
}
always_inline uword
-ip6_rewrite_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- int do_counters, int is_midchain, int is_mcast)
+ip6_rewrite_inline_with_gso (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast,
+ int do_gso)
{
ip_lookup_main_t *lm = &ip6_main.lookup_main;
u32 *from = vlib_frame_vector_args (frame);
@@ -1771,12 +1772,23 @@ ip6_rewrite_inline (vlib_main_t * vm,
}
/* Check MTU of outgoing interface. */
- ip6_mtu_check (p0, clib_net_to_host_u16 (ip0->payload_length) +
- sizeof (ip6_header_t),
+ u16 ip0_len =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ sizeof (ip6_header_t);
+ u16 ip1_len =
+ clib_net_to_host_u16 (ip1->payload_length) +
+ sizeof (ip6_header_t);
+ if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
+ ip0_len = gso_mtu_sz (p0);
+ if (do_gso && (p1->flags & VNET_BUFFER_F_GSO))
+ ip1_len = gso_mtu_sz (p1);
+
+
+
+ ip6_mtu_check (p0, ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
is_locally_originated0, &next0, &error0);
- ip6_mtu_check (p1, clib_net_to_host_u16 (ip1->payload_length) +
- sizeof (ip6_header_t),
+ ip6_mtu_check (p1, ip1_len,
adj1[0].rewrite_header.max_l3_packet_bytes,
is_locally_originated1, &next1, &error1);
@@ -1915,8 +1927,13 @@ ip6_rewrite_inline (vlib_main_t * vm,
}
/* Check MTU of outgoing interface. */
- ip6_mtu_check (p0, clib_net_to_host_u16 (ip0->payload_length) +
- sizeof (ip6_header_t),
+ u16 ip0_len =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ sizeof (ip6_header_t);
+ if (do_gso && (p0->flags & VNET_BUFFER_F_GSO))
+ ip0_len = gso_mtu_sz (p0);
+
+ ip6_mtu_check (p0, ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
is_locally_originated0, &next0, &error0);
@@ -1974,6 +1991,23 @@ ip6_rewrite_inline (vlib_main_t * vm,
return frame->n_vectors;
}
+always_inline uword
+ip6_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
+ return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
+ is_midchain, is_mcast,
+ 1 /* do_gso */ );
+ else
+ return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters,
+ is_midchain, is_mcast,
+ 0 /* no do_gso */ );
+}
+
VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)