diff options
author | Aloys Augustin <aloaugus@cisco.com> | 2021-09-15 16:06:04 +0200 |
---|---|---|
committer | Beno�t Ganne <bganne@cisco.com> | 2021-09-27 09:54:56 +0000 |
commit | 86490da4ce4cc5b872dd625a9eb3f19ed10f848e (patch) | |
tree | ba2d7732d4c7eeab49ac3cf841be2ec35b2c91cd | |
parent | b21fd4b7e09455f199495e7160d5537195e36046 (diff) |
gso: do not gro on small packets
This has two benefits: fix an issue where small packets can be
padded to 64 bytes by the ethernet layer, and we included these bytes
in the tcp stream; and also if we receive a small packet, it signals
that this tcp stream is likely more latency-sensitive than throughput-
sensitive, and skipping gro buffering should be beneficial.
Type: fix
Change-Id: I165b97673d8cdce95ebf0a66c362ae9f9e6f3f34
Signed-off-by: Aloys Augustin <aloaugus@cisco.com>
-rw-r--r-- | src/vnet/gso/gro_func.h | 44 | ||||
-rw-r--r-- | test/test_gro.py | 102 |
2 files changed, 143 insertions, 3 deletions
diff --git a/src/vnet/gso/gro_func.h b/src/vnet/gso/gro_func.h index b29d4a5f944..c9464bdc063 100644 --- a/src/vnet/gso/gro_func.h +++ b/src/vnet/gso/gro_func.h @@ -21,10 +21,14 @@ #include <vnet/gso/hdr_offset_parser.h> #include <vnet/ip/ip4.h> #include <vnet/ip/ip6.h> +#include <vnet/ip/ip6_inlines.h> #include <vnet/udp/udp_packet.h> #include <vnet/tcp/tcp_packet.h> #include <vnet/vnet.h> +#define GRO_MIN_PACKET_SIZE 256 +#define GRO_PADDED_PACKET_SIZE 64 + static_always_inline u8 gro_is_bad_packet (vlib_buffer_t * b, u8 flags, i16 l234_sz) { @@ -160,6 +164,34 @@ gro_validate_checksum (vlib_main_t * vm, vlib_buffer_t * b0, } static_always_inline u32 +gro_fix_padded_packet_len (vlib_buffer_t *b0, generic_header_offset_t *gho0, + ip4_header_t *ip4_0, ip6_header_t *ip6_0, + u32 pkt_len0, u16 l234_sz0) +{ + u32 tcp_payload_len0 = 0; + if (gho0->gho_flags & GHO_F_IP4) + { + tcp_payload_len0 = clib_net_to_host_u16 (ip4_0->length) - + ip4_header_bytes (ip4_0) - gho0->l4_hdr_sz; + } + else + { + tcp_payload_len0 = + clib_net_to_host_u16 (ip6_0->payload_length) - gho0->l4_hdr_sz; + } + + ASSERT (l234_sz0 + tcp_payload_len0 <= pkt_len0); + + if (PREDICT_FALSE (l234_sz0 + tcp_payload_len0 < pkt_len0)) + { + /* small packet with padding at the end, remove padding */ + b0->current_length = l234_sz0 + tcp_payload_len0; + pkt_len0 = b0->current_length; + } + return pkt_len0; +} + +static_always_inline u32 gro_get_packet_data (vlib_main_t *vm, vlib_buffer_t *b0, generic_header_offset_t *gho0, gro_flow_key_t *flow_key0, u8 is_l2) @@ -222,6 +254,11 @@ gro_get_packet_data (vlib_main_t *vm, vlib_buffer_t *b0, if (PREDICT_FALSE (pkt_len0 >= TCP_MAX_GSO_SZ)) return 0; + if (PREDICT_FALSE (pkt_len0 <= GRO_PADDED_PACKET_SIZE)) + { + pkt_len0 = + gro_fix_padded_packet_len (b0, gho0, ip4_0, ip6_0, pkt_len0, l234_sz0); + } return pkt_len0; } @@ -264,8 +301,8 @@ gro_coalesce_buffers (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1, pkt_len0 = vlib_buffer_length_in_chain (vm, b0); pkt_len1 = vlib_buffer_length_in_chain (vm, b1); - if (((gho0.gho_flags & GHO_F_TCP) == 0) - || ((gho1.gho_flags & GHO_F_TCP) == 0)) + if (((gho0.gho_flags & GHO_F_TCP) == 0 || pkt_len0 <= GRO_MIN_PACKET_SIZE) || + ((gho1.gho_flags & GHO_F_TCP) == 0 || pkt_len1 <= GRO_MIN_PACKET_SIZE)) return 0; ip4_0 = @@ -483,7 +520,8 @@ vnet_gro_flow_table_inline (vlib_main_t * vm, gro_flow_table_t * flow_table, } tcp0 = (tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset); - if (PREDICT_TRUE ((tcp0->flags & TCP_FLAG_PSH) == 0)) + if (PREDICT_TRUE (((tcp0->flags & TCP_FLAG_PSH) == 0) && + (pkt_len0 > GRO_MIN_PACKET_SIZE))) gro_flow = gro_flow_table_find_or_add_flow (flow_table, &flow_key0); else { diff --git a/test/test_gro.py b/test/test_gro.py index 33215d65fa7..6c68299d7f4 100644 --- a/test/test_gro.py +++ b/test/test_gro.py @@ -138,5 +138,107 @@ class TestGRO(VppTestCase): self.assertEqual(rx[TCP].sport, 1234) self.assertEqual(rx[TCP].dport, 4321) + # + # Same test with IPv6 + # + p = [] + s = 0 + for n in range(0, 88): + p.append((Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / + IPv6(src=self.pg0.remote_ip6, dst=self.pg2.remote_ip6) / + TCP(sport=1234, dport=4321, seq=s, ack=n, flags='A') / + Raw(b'\xa5' * 1460))) + s += 1460 + p[-1][TCP].flags = 'AP' # push to flush second packet + + rxs = self.send_and_expect(self.pg0, p, self.pg2, n_rx=2) + + i = 0 + for rx in rxs: + i += 1 + self.assertEqual(rx[Ether].src, self.pg2.local_mac) + self.assertEqual(rx[Ether].dst, self.pg2.remote_mac) + self.assertEqual(rx[IPv6].src, self.pg0.remote_ip6) + self.assertEqual(rx[IPv6].dst, self.pg2.remote_ip6) + self.assertEqual(rx[IPv6].plen, 64260) # 1460 * 44 + 20 < 65536 + self.assertEqual(rx[TCP].sport, 1234) + self.assertEqual(rx[TCP].dport, 4321) + self.assertEqual(rx[TCP].ack, (44*i - 1)) + + # + # Send a series of 1500 bytes packets each followed by a packet with a + # PSH flag. Verify that GRO stops everytime a PSH flag is encountered + # + p = [] + s = 0 + for n in range(0, n_packets): + p.append((Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg2.remote_ip4, + flags='DF') / + TCP(sport=1234, dport=4321, seq=s, ack=2*n, flags='A') / + Raw(b'\xa5' * 1460))) + s += 1460 + p.append((Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg2.remote_ip4, + flags='DF') / + TCP(sport=1234, dport=4321, seq=s, ack=2*n+1, + flags='AP') / + Raw(b'\xa5' * 1340))) + s += 1340 + + rxs = self.send_and_expect(self.pg0, p, self.pg2, n_rx=n_packets) + + i = 0 + for rx in rxs: + self.assertEqual(rx[Ether].src, self.pg2.local_mac) + self.assertEqual(rx[Ether].dst, self.pg2.remote_mac) + self.assertEqual(rx[IP].src, self.pg0.remote_ip4) + self.assertEqual(rx[IP].dst, self.pg2.remote_ip4) + self.assertEqual(rx[IP].len, 40 + 1460 + 1340) + self.assertEqual(rx[TCP].sport, 1234) + self.assertEqual(rx[TCP].dport, 4321) + self.assertEqual(rx[TCP].ack, (2*i + 1)) + i += 1 + + # + # Send a series of 1500 bytes packets each followed by a short packet + # with padding. Verify that GRO removes the padding and stops on short + # packets + # + p = [] + s = 0 + for n in range(0, n_packets): + i = self.pg0 + p.append((Ether(src=i.remote_mac, dst=i.local_mac) / + IP(src=i.remote_ip4, dst=self.pg2.remote_ip4, + flags='DF') / + TCP(sport=1234, dport=4321, seq=s, ack=2*n, flags='A') / + Raw(b'\xa5' * 1459))) + s += 1459 + p2 = (Ether(src=i.remote_mac, dst=i.local_mac) / + IP(src=i.remote_ip4, dst=self.pg2.remote_ip4, + flags='DF', len=41) / + TCP(sport=1234, dport=4321, seq=s, ack=2*n+1, flags='A') / + Raw(b'\xa5')) + # first compute csum of pkt w/o padding to work around scapy bug + p2 = Ether(bytes(p2)) + p.append(p2 / Raw(b'\xa5' * 5)) # 1 byte data + 5 bytes padding + s += 1 + + rxs = self.send_and_expect(self.pg0, p, self.pg2, n_rx=n_packets) + + i = 0 + for rx in rxs: + self.assertEqual(rx[Ether].src, self.pg2.local_mac) + self.assertEqual(rx[Ether].dst, self.pg2.remote_mac) + self.assertEqual(rx[IP].src, self.pg0.remote_ip4) + self.assertEqual(rx[IP].dst, self.pg2.remote_ip4) + self.assertEqual(rx[IP].len, 40 + 1459 + 1) + self.assertEqual(rx[TCP].sport, 1234) + self.assertEqual(rx[TCP].dport, 4321) + self.assertEqual(rx[TCP].ack, (2*i + 1)) + i += 1 + + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) |