diff options
author | Andrew Yourtchenko <ayourtch@gmail.com> | 2018-10-12 16:09:22 +0200 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2019-02-19 12:47:40 +0000 |
commit | 6a7cff7ec234af8529ff72a530076e191cc8d759 (patch) | |
tree | ea7a9bf447385172d0d3fda382aebf2d0203ecc8 /src/vnet/ip | |
parent | be30fea370ed7cfe6a4a1b154a944411ec3eabd0 (diff) |
tap gso: experimental support
This commit adds a "gso" parameter to existing "create tap..." CLI,
and a "no-gso" parameter for the compatibility with the future,
when/if defaults change.
It makes use of the lowest bit of the "tap_flags" field in the API call
in order to allow creation of GSO interfaces via API as well.
It does the necessary syscalls to enable the GSO
and checksum offload support on the kernel side and sets two flags
on the interface: virtio-specific virtio_if_t.gso_enabled,
and vnet_hw_interface_t.flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO.
The first one, if enabled, triggers the marking of the GSO-encapsulated
packets on ingress with VNET_BUFFER_F_GSO flag, and
setting vnet_buffer2(b)->gso_size to the desired L4 payload size.
VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO determines the egress packet
processing in interface-output for such packets:
When the flag is set, they are sent out almost as usual (just taking
care to set the vnet header for virtio).
When the flag is not enabled (the case for most interfaces),
the egress path performs the re-segmentation such that
the L4 payload of the transmitted packets equals gso_size.
The operations in the datapath are enabled only when there is at least
one GSO-compatible interface in the system - this is done by tracking
the count in interface_main.gso_interface_count. This way the impact
of conditional checks for the setups that do not use GSO is minimized.
"show tap" CLI shows the state of the GSO flag on the interface, and
the total count of GSO-enabled interfaces (which is used to enable
the GSO-related processing in the packet path).
This commit lacks IPv6 extension header traversal support of any kind -
the L4 payload is assumed to follow the IPv6 header. Also it performs
the offloads only for TCP (TSO - TCP segmentation offload).
The UDP fragmentation offload (UFO) is not part of it.
For debug purposes it also adds the debug CLI:
"set tap gso {<interface> | sw_if_index <sw_idx>} <enable|disable>"
Change-Id: Ifd562db89adcc2208094b3d1032cee8c307aaef9
Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
Diffstat (limited to 'src/vnet/ip')
-rw-r--r-- | src/vnet/ip/ip4_forward.c | 44 | ||||
-rw-r--r-- | src/vnet/ip/ip6_forward.c | 54 |
2 files changed, 81 insertions, 17 deletions
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index b3ae29a2790..ec4eda4e96a 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -2186,10 +2186,11 @@ ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next, always_inline uword -ip4_rewrite_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - int do_counters, int is_midchain, int is_mcast) +ip4_rewrite_inline_with_gso (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int do_counters, int is_midchain, int is_mcast, + int do_gso) { ip_lookup_main_t *lm = &ip4_main.lookup_main; u32 *from = vlib_frame_vector_args (frame); @@ -2267,12 +2268,20 @@ ip4_rewrite_inline (vlib_main_t * vm, CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD); /* Check MTU of outgoing interface. */ - ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length), + u16 ip0_len = clib_net_to_host_u16 (ip0->length); + u16 ip1_len = clib_net_to_host_u16 (ip1->length); + + if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO)) + ip0_len = gso_mtu_sz (b[0]); + if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO)) + ip1_len = gso_mtu_sz (b[1]); + + ip4_mtu_check (b[0], ip0_len, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), next + 0, &error0); - ip4_mtu_check (b[1], clib_net_to_host_u16 (ip1->length), + ip4_mtu_check (b[1], ip1_len, adj1[0].rewrite_header.max_l3_packet_bytes, ip1->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), @@ -2395,7 +2404,11 @@ ip4_rewrite_inline (vlib_main_t * vm, vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0; /* Check MTU of outgoing interface. */ - ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length), + u16 ip0_len = clib_net_to_host_u16 (ip0->length); + if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO)) + ip0_len = gso_mtu_sz (b[0]); + + ip4_mtu_check (b[0], ip0_len, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), @@ -2465,6 +2478,23 @@ ip4_rewrite_inline (vlib_main_t * vm, return frame->n_vectors; } +always_inline uword +ip4_rewrite_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int do_counters, int is_midchain, int is_mcast) +{ + vnet_main_t *vnm = vnet_get_main (); + if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0)) + return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters, + is_midchain, is_mcast, + 1 /* do_gso */ ); + else + return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters, + is_midchain, is_mcast, + 0 /* no do_gso */ ); +} + /** @brief IPv4 rewrite node. @node ip4-rewrite diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 8e96647f995..f599392742c 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1622,10 +1622,11 @@ ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes, } always_inline uword -ip6_rewrite_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - int do_counters, int is_midchain, int is_mcast) +ip6_rewrite_inline_with_gso (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int do_counters, int is_midchain, int is_mcast, + int do_gso) { ip_lookup_main_t *lm = &ip6_main.lookup_main; u32 *from = vlib_frame_vector_args (frame); @@ -1771,12 +1772,23 @@ ip6_rewrite_inline (vlib_main_t * vm, } /* Check MTU of outgoing interface. */ - ip6_mtu_check (p0, clib_net_to_host_u16 (ip0->payload_length) + - sizeof (ip6_header_t), + u16 ip0_len = + clib_net_to_host_u16 (ip0->payload_length) + + sizeof (ip6_header_t); + u16 ip1_len = + clib_net_to_host_u16 (ip1->payload_length) + + sizeof (ip6_header_t); + if (do_gso && (p0->flags & VNET_BUFFER_F_GSO)) + ip0_len = gso_mtu_sz (p0); + if (do_gso && (p1->flags & VNET_BUFFER_F_GSO)) + ip1_len = gso_mtu_sz (p1); + + + + ip6_mtu_check (p0, ip0_len, adj0[0].rewrite_header.max_l3_packet_bytes, is_locally_originated0, &next0, &error0); - ip6_mtu_check (p1, clib_net_to_host_u16 (ip1->payload_length) + - sizeof (ip6_header_t), + ip6_mtu_check (p1, ip1_len, adj1[0].rewrite_header.max_l3_packet_bytes, is_locally_originated1, &next1, &error1); @@ -1915,8 +1927,13 @@ ip6_rewrite_inline (vlib_main_t * vm, } /* Check MTU of outgoing interface. */ - ip6_mtu_check (p0, clib_net_to_host_u16 (ip0->payload_length) + - sizeof (ip6_header_t), + u16 ip0_len = + clib_net_to_host_u16 (ip0->payload_length) + + sizeof (ip6_header_t); + if (do_gso && (p0->flags & VNET_BUFFER_F_GSO)) + ip0_len = gso_mtu_sz (p0); + + ip6_mtu_check (p0, ip0_len, adj0[0].rewrite_header.max_l3_packet_bytes, is_locally_originated0, &next0, &error0); @@ -1974,6 +1991,23 @@ ip6_rewrite_inline (vlib_main_t * vm, return frame->n_vectors; } +always_inline uword +ip6_rewrite_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int do_counters, int is_midchain, int is_mcast) +{ + vnet_main_t *vnm = vnet_get_main (); + if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0)) + return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters, + is_midchain, is_mcast, + 1 /* do_gso */ ); + else + return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters, + is_midchain, is_mcast, + 0 /* no do_gso */ ); +} + VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) |