From 4146c65f0dd0b5412746064f230b70ec894d2980 Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Wed, 8 Aug 2018 22:23:19 +0200 Subject: IP fragmentation to handle buffer chains. Change-Id: Iff557f566ebc9ab170d75da1233997d83b8c8a66 Signed-off-by: Ole Troan --- src/vnet/ip/ip_frag.c | 185 +++++++++++++++++++++++++++++--------------------- src/vnet/ipip/ipip.h | 11 +-- src/vnet/ipip/node.c | 8 +++ test/test_ipip.py | 51 ++++++++++++++ 4 files changed, 171 insertions(+), 84 deletions(-) diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index 8d495af63ed..667a92bbfa5 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -46,33 +46,41 @@ format_ip_frag_trace (u8 * s, va_list * args) static u32 running_fragment_id; +/* + * Limitation: Does follow buffer chains in the packet to fragment, + * but does not generate buffer chains. I.e. a fragment is always + * contained with in a single buffer and limited to the max buffer + * size. + */ void -ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer, +ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, ip_frag_error_t * error) { - vlib_buffer_t *p; + vlib_buffer_t *from_b; ip4_header_t *ip4; - u16 mtu, ptr, len, max, rem, offset, ip_frag_id, ip_frag_offset; - u8 *packet, more; - - vec_add1 (*buffer, pi); - p = vlib_get_buffer (vm, pi); - offset = vnet_buffer (p)->ip_frag.header_offset; - mtu = vnet_buffer (p)->ip_frag.mtu; - packet = (u8 *) vlib_buffer_get_current (p); - ip4 = (ip4_header_t *) (packet + offset); - - rem = clib_net_to_host_u16 (ip4->length) - sizeof (*ip4); - ptr = 0; - max = (mtu - sizeof (*ip4) - vnet_buffer (p)->ip_frag.header_offset) & ~0x7; - - if (rem > (p->current_length - offset - sizeof (*ip4))) + u16 mtu, len, max, rem, offset, ip_frag_id, ip_frag_offset; + u8 *org_from_packet, more; + + from_b = vlib_get_buffer (vm, from_bi); + offset = vnet_buffer (from_b)->ip_frag.header_offset; + mtu = vnet_buffer (from_b)->ip_frag.mtu; + org_from_packet = vlib_buffer_get_current (from_b); + ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b) + offset; + + rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t); + max = + (mtu - sizeof (ip4_header_t) - + vnet_buffer (from_b)->ip_frag.header_offset) & ~0x7; + + if (rem > + (vlib_buffer_length_in_chain (vm, from_b) - offset - + sizeof (ip4_header_t))) { *error = IP_FRAG_ERROR_MALFORMED; return; } - if (mtu < sizeof (*ip4)) + if (mtu < sizeof (ip4_header_t)) { *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER; return; @@ -85,12 +93,6 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer, return; } - if (p->flags & VLIB_BUFFER_NEXT_PRESENT) - { - *error = IP_FRAG_ERROR_MALFORMED; - return; - } - if (ip4_is_fragment (ip4)) { ip_frag_id = ip4->fragment_id; @@ -106,84 +108,109 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer, more = 0; } - //Do the actual fragmentation + u8 *from_data = (void *) (ip4 + 1); + vlib_buffer_t *org_from_b = from_b; + u16 ptr = 0, fo = 0; + u16 left_in_from_buffer = + from_b->current_length - offset - sizeof (ip4_header_t); + + /* Do the actual fragmentation */ while (rem) { - u32 bi; - vlib_buffer_t *b; - ip4_header_t *fip4; - - len = - (rem > - (mtu - sizeof (*ip4) - - vnet_buffer (p)->ip_frag.header_offset)) ? max : rem; - - if (ptr == 0) + u32 to_bi; + vlib_buffer_t *to_b; + ip4_header_t *to_ip4; + u8 *to_data; + + len = (rem > (mtu - sizeof (ip4_header_t) - offset) ? max : rem); + if (len != rem) /* Last fragment does not need to divisible by 8 */ + len &= ~0x7; + if (!vlib_buffer_alloc (vm, &to_bi, 1)) { - bi = pi; - b = p; - fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset); + *error = IP_FRAG_ERROR_MEMORY; + /* XXX: Free already allocated buffers? */ + return; } - else + vec_add1 (*buffer, to_bi); + to_b = vlib_get_buffer (vm, to_bi); + vnet_buffer (to_b)->sw_if_index[VLIB_RX] = + vnet_buffer (org_from_b)->sw_if_index[VLIB_RX]; + vnet_buffer (to_b)->sw_if_index[VLIB_TX] = + vnet_buffer (org_from_b)->sw_if_index[VLIB_TX]; + /* Copy adj_index in case DPO based node is sending for the + * fragmentation, the packet would be sent back to the proper + * DPO next node and Index + */ + vnet_buffer (to_b)->ip.adj_index[VLIB_RX] = + vnet_buffer (org_from_b)->ip.adj_index[VLIB_RX]; + vnet_buffer (to_b)->ip.adj_index[VLIB_TX] = + vnet_buffer (org_from_b)->ip.adj_index[VLIB_TX]; + + /* Copy offset and ip4 header */ + clib_memcpy (to_b->data, org_from_packet, + offset + sizeof (ip4_header_t)); + to_ip4 = vlib_buffer_get_current (to_b) + offset; + to_data = (void *) (to_ip4 + 1); + + /* Spin through buffer chain copying data */ + // XXX: Make sure we don't overflow source buffer!!! + if (len > left_in_from_buffer) { - if (!vlib_buffer_alloc (vm, &bi, 1)) + clib_memcpy (to_data, from_data + ptr, left_in_from_buffer); + + /* Move buffer */ + if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT)) { - *error = IP_FRAG_ERROR_MEMORY; + *error = IP_FRAG_ERROR_MALFORMED; return; } - vec_add1 (*buffer, bi); - b = vlib_get_buffer (vm, bi); - vnet_buffer (b)->sw_if_index[VLIB_RX] = - vnet_buffer (p)->sw_if_index[VLIB_RX]; - vnet_buffer (b)->sw_if_index[VLIB_TX] = - vnet_buffer (p)->sw_if_index[VLIB_TX]; - /* Copy Adj_index in case DPO based node is sending for the fragmentation, - the packet would be sent back to the proper DPO next node and Index */ - vnet_buffer (b)->ip.adj_index[VLIB_RX] = - vnet_buffer (p)->ip.adj_index[VLIB_RX]; - vnet_buffer (b)->ip.adj_index[VLIB_TX] = - vnet_buffer (p)->ip.adj_index[VLIB_TX]; - fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset); - - //Copy offset and ip4 header - clib_memcpy (b->data, packet, offset + sizeof (*ip4)); - //Copy data - clib_memcpy (((u8 *) (fip4)) + sizeof (*fip4), - packet + offset + sizeof (*fip4) + ptr, len); + from_b = vlib_get_buffer (vm, from_b->next_buffer); + from_data = (u8 *) vlib_buffer_get_current (from_b); + clib_memcpy (to_data + left_in_from_buffer, from_data, + len - left_in_from_buffer); + ptr = len - left_in_from_buffer; + left_in_from_buffer = + from_b->current_length - (len - left_in_from_buffer); + } + else + { + clib_memcpy (to_data, from_data + ptr, len); + left_in_from_buffer -= len; + ptr += len; } - b->current_length = offset + len + sizeof (*fip4); + to_b->current_length = offset + len + sizeof (ip4_header_t); - fip4->fragment_id = ip_frag_id; - fip4->flags_and_fragment_offset = - clib_host_to_net_u16 ((ptr >> 3) + ip_frag_offset); - fip4->flags_and_fragment_offset |= + to_ip4->fragment_id = ip_frag_id; + to_ip4->flags_and_fragment_offset = + clib_host_to_net_u16 ((fo >> 3) + ip_frag_offset); + to_ip4->flags_and_fragment_offset |= clib_host_to_net_u16 (((len != rem) || more) << 13); - // ((len0 != rem0) || more0) << 13 is optimization for - // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0 - fip4->length = clib_host_to_net_u16 (len + sizeof (*fip4)); - fip4->checksum = ip4_header_checksum (fip4); + to_ip4->length = clib_host_to_net_u16 (len + sizeof (ip4_header_t)); + to_ip4->checksum = ip4_header_checksum (to_ip4); - if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) + if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) { - //Encapsulating ipv4 header + /* Encapsulating ipv4 header */ ip4_header_t *encap_header4 = - (ip4_header_t *) vlib_buffer_get_current (b); - encap_header4->length = clib_host_to_net_u16 (b->current_length); + (ip4_header_t *) vlib_buffer_get_current (to_b); + encap_header4->length = clib_host_to_net_u16 (to_b->current_length); encap_header4->checksum = ip4_header_checksum (encap_header4); } - else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) + else if (vnet_buffer (org_from_b)-> + ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) { - //Encapsulating ipv6 header + /* Encapsulating ipv6 header */ ip6_header_t *encap_header6 = - (ip6_header_t *) vlib_buffer_get_current (b); + (ip6_header_t *) vlib_buffer_get_current (to_b); encap_header6->payload_length = - clib_host_to_net_u16 (b->current_length - + clib_host_to_net_u16 (to_b->current_length - sizeof (*encap_header6)); } - rem -= len; - ptr += len; + fo += len; } + /* Free original packet chain */ + vlib_buffer_free_one (vm, from_bi); } void diff --git a/src/vnet/ipip/ipip.h b/src/vnet/ipip/ipip.h index 6afb188f8ee..28833df9755 100644 --- a/src/vnet/ipip/ipip.h +++ b/src/vnet/ipip/ipip.h @@ -26,11 +26,12 @@ extern vnet_hw_interface_class_t ipip_hw_interface_class; -#define foreach_ipip_error \ - /* Must be first. */ \ - _(DECAP_PKTS, "packets decapsulated") \ - _(BAD_PROTOCOL, "bad protocol") \ - _(NO_TUNNEL, "no tunnel") +#define foreach_ipip_error \ + /* Must be first. */ \ + _(DECAP_PKTS, "packets decapsulated") \ + _(BAD_PROTOCOL, "bad protocol") \ + _(NO_TUNNEL, "no tunnel") \ + _(FRAGMENTED_PACKET, "fragmented outer packet") typedef enum { diff --git a/src/vnet/ipip/node.c b/src/vnet/ipip/node.c index d55b91a0b93..60d6223d5f3 100644 --- a/src/vnet/ipip/node.c +++ b/src/vnet/ipip/node.c @@ -108,6 +108,14 @@ ipip_input (vlib_main_t * vm, vlib_node_runtime_t * node, else { ip40 = vlib_buffer_get_current (b0); + /* Check for outer fragmentation */ + if (ip40->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)) + { + next0 = IPIP_INPUT_NEXT_DROP; + b0->error = node->errors[IPIP_ERROR_FRAGMENTED_PACKET]; + goto drop; + } vlib_buffer_advance (b0, sizeof (*ip40)); ip_set (&src0, &ip40->src_address, true); ip_set (&dst0, &ip40->dst_address, true); diff --git a/test/test_ipip.py b/test/test_ipip.py index 00721ec90a0..582ab5be84c 100644 --- a/test/test_ipip.py +++ b/test/test_ipip.py @@ -3,9 +3,11 @@ import unittest from scapy.layers.inet6 import IPv6, Ether, IP, UDP +from scapy.all import fragment from framework import VppTestCase, VppTestRunner from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto from socket import AF_INET, AF_INET6, inet_pton +import StringIO """ Testipip is a subclass of VPPTestCase classes. @@ -14,6 +16,20 @@ IPIP tests. """ +def reassemble(listoffragments): + buffer = StringIO.StringIO() + first = listoffragments[0] + buffer.seek(20) + for pkt in listoffragments: + buffer.seek(pkt[IP].frag*8) + buffer.write(pkt[IP].payload) + first.len = len(buffer.getvalue()) + 20 + first.flags = 0 + del(first.chksum) + header = str(first[IP])[:20] + return first[IP].__class__(header + buffer.getvalue()) + + class TestIPIP(VppTestCase): """ IPIP Test Case """ @@ -126,6 +142,38 @@ class TestIPIP(VppTestCase): for p in rx: self.validate(p[1], p6_reply) + # Fragmentation / Reassembly and Re-fragmentation + rv = self.vapi.ip_reassembly_enable_disable( + sw_if_index=self.pg1.sw_if_index, + enable_ip4=1) + # Decapsulation + p_ether = Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) + p_payload = UDP(sport=1234, dport=1234) / self.payload(3123) + p_ip4 = IP(src="1.2.3.4", dst=self.pg0.remote_ip4) + outer_ip4 = (p_ether / IP(src=self.pg1.remote_ip4, + dst=self.pg0.local_ip4) / p_ip4 / p_payload) + frags = fragment(outer_ip4, 1400) + p4_reply = (p_ip4 / p_payload) + p4_reply.ttl -= 1 + + self.pg_enable_capture() + self.pg1.add_stream(frags) + self.pg_start() + rx = self.pg0.get_capture(1) + for p in rx: + self.validate(p[1], p4_reply) + + # Now try with re-fragmentation + self.vapi.sw_interface_set_mtu(self.pg0.sw_if_index, [576, 0, 0, 0]) + self.pg_enable_capture() + self.pg1.add_stream(frags) + self.pg_start() + rx = self.pg0.get_capture(6) + reass_pkt = reassemble(rx) + p4_reply.ttl -= 1 + p4_reply.id = 256 + self.validate(reass_pkt, p4_reply) + def test_ipip6(self): """ ip{v4,v6} over ip6 test """ p_ether = Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) @@ -213,6 +261,9 @@ class TestIPIP(VppTestCase): sw_if_index = rv.sw_if_index self.vapi.ipip_del_tunnel(sw_if_index) + def payload(self, len): + return 'x' * len + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) -- cgit 1.2.3-korg