aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOle Troan <ot@cisco.com>2018-08-08 22:23:19 +0200
committerDave Barach <openvpp@barachs.net>2018-08-10 12:02:51 +0000
commit4146c65f0dd0b5412746064f230b70ec894d2980 (patch)
tree9266f7de360d808711002292f30f6e3db6aea4f6
parent3074629b25556b04b8ac7e72a06849e39ed14ad4 (diff)
IP fragmentation to handle buffer chains.
Change-Id: Iff557f566ebc9ab170d75da1233997d83b8c8a66 Signed-off-by: Ole Troan <ot@cisco.com>
-rw-r--r--src/vnet/ip/ip_frag.c185
-rw-r--r--src/vnet/ipip/ipip.h11
-rw-r--r--src/vnet/ipip/node.c8
-rw-r--r--test/test_ipip.py51
4 files changed, 171 insertions, 84 deletions
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index 8d495af63ed..667a92bbfa5 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -46,33 +46,41 @@ format_ip_frag_trace (u8 * s, va_list * args)
static u32 running_fragment_id;
+/*
+ * Limitation: Does follow buffer chains in the packet to fragment,
+ * but does not generate buffer chains. I.e. a fragment is always
+ * contained with in a single buffer and limited to the max buffer
+ * size.
+ */
void
-ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
+ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
ip_frag_error_t * error)
{
- vlib_buffer_t *p;
+ vlib_buffer_t *from_b;
ip4_header_t *ip4;
- u16 mtu, ptr, len, max, rem, offset, ip_frag_id, ip_frag_offset;
- u8 *packet, more;
-
- vec_add1 (*buffer, pi);
- p = vlib_get_buffer (vm, pi);
- offset = vnet_buffer (p)->ip_frag.header_offset;
- mtu = vnet_buffer (p)->ip_frag.mtu;
- packet = (u8 *) vlib_buffer_get_current (p);
- ip4 = (ip4_header_t *) (packet + offset);
-
- rem = clib_net_to_host_u16 (ip4->length) - sizeof (*ip4);
- ptr = 0;
- max = (mtu - sizeof (*ip4) - vnet_buffer (p)->ip_frag.header_offset) & ~0x7;
-
- if (rem > (p->current_length - offset - sizeof (*ip4)))
+ u16 mtu, len, max, rem, offset, ip_frag_id, ip_frag_offset;
+ u8 *org_from_packet, more;
+
+ from_b = vlib_get_buffer (vm, from_bi);
+ offset = vnet_buffer (from_b)->ip_frag.header_offset;
+ mtu = vnet_buffer (from_b)->ip_frag.mtu;
+ org_from_packet = vlib_buffer_get_current (from_b);
+ ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b) + offset;
+
+ rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
+ max =
+ (mtu - sizeof (ip4_header_t) -
+ vnet_buffer (from_b)->ip_frag.header_offset) & ~0x7;
+
+ if (rem >
+ (vlib_buffer_length_in_chain (vm, from_b) - offset -
+ sizeof (ip4_header_t)))
{
*error = IP_FRAG_ERROR_MALFORMED;
return;
}
- if (mtu < sizeof (*ip4))
+ if (mtu < sizeof (ip4_header_t))
{
*error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
return;
@@ -85,12 +93,6 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
return;
}
- if (p->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
- }
-
if (ip4_is_fragment (ip4))
{
ip_frag_id = ip4->fragment_id;
@@ -106,84 +108,109 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
more = 0;
}
- //Do the actual fragmentation
+ u8 *from_data = (void *) (ip4 + 1);
+ vlib_buffer_t *org_from_b = from_b;
+ u16 ptr = 0, fo = 0;
+ u16 left_in_from_buffer =
+ from_b->current_length - offset - sizeof (ip4_header_t);
+
+ /* Do the actual fragmentation */
while (rem)
{
- u32 bi;
- vlib_buffer_t *b;
- ip4_header_t *fip4;
-
- len =
- (rem >
- (mtu - sizeof (*ip4) -
- vnet_buffer (p)->ip_frag.header_offset)) ? max : rem;
-
- if (ptr == 0)
+ u32 to_bi;
+ vlib_buffer_t *to_b;
+ ip4_header_t *to_ip4;
+ u8 *to_data;
+
+ len = (rem > (mtu - sizeof (ip4_header_t) - offset) ? max : rem);
+ if (len != rem) /* Last fragment does not need to divisible by 8 */
+ len &= ~0x7;
+ if (!vlib_buffer_alloc (vm, &to_bi, 1))
{
- bi = pi;
- b = p;
- fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);
+ *error = IP_FRAG_ERROR_MEMORY;
+ /* XXX: Free already allocated buffers? */
+ return;
}
- else
+ vec_add1 (*buffer, to_bi);
+ to_b = vlib_get_buffer (vm, to_bi);
+ vnet_buffer (to_b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (org_from_b)->sw_if_index[VLIB_RX];
+ vnet_buffer (to_b)->sw_if_index[VLIB_TX] =
+ vnet_buffer (org_from_b)->sw_if_index[VLIB_TX];
+ /* Copy adj_index in case DPO based node is sending for the
+ * fragmentation, the packet would be sent back to the proper
+ * DPO next node and Index
+ */
+ vnet_buffer (to_b)->ip.adj_index[VLIB_RX] =
+ vnet_buffer (org_from_b)->ip.adj_index[VLIB_RX];
+ vnet_buffer (to_b)->ip.adj_index[VLIB_TX] =
+ vnet_buffer (org_from_b)->ip.adj_index[VLIB_TX];
+
+ /* Copy offset and ip4 header */
+ clib_memcpy (to_b->data, org_from_packet,
+ offset + sizeof (ip4_header_t));
+ to_ip4 = vlib_buffer_get_current (to_b) + offset;
+ to_data = (void *) (to_ip4 + 1);
+
+ /* Spin through buffer chain copying data */
+ // XXX: Make sure we don't overflow source buffer!!!
+ if (len > left_in_from_buffer)
{
- if (!vlib_buffer_alloc (vm, &bi, 1))
+ clib_memcpy (to_data, from_data + ptr, left_in_from_buffer);
+
+ /* Move buffer */
+ if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
{
- *error = IP_FRAG_ERROR_MEMORY;
+ *error = IP_FRAG_ERROR_MALFORMED;
return;
}
- vec_add1 (*buffer, bi);
- b = vlib_get_buffer (vm, bi);
- vnet_buffer (b)->sw_if_index[VLIB_RX] =
- vnet_buffer (p)->sw_if_index[VLIB_RX];
- vnet_buffer (b)->sw_if_index[VLIB_TX] =
- vnet_buffer (p)->sw_if_index[VLIB_TX];
- /* Copy Adj_index in case DPO based node is sending for the fragmentation,
- the packet would be sent back to the proper DPO next node and Index */
- vnet_buffer (b)->ip.adj_index[VLIB_RX] =
- vnet_buffer (p)->ip.adj_index[VLIB_RX];
- vnet_buffer (b)->ip.adj_index[VLIB_TX] =
- vnet_buffer (p)->ip.adj_index[VLIB_TX];
- fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);
-
- //Copy offset and ip4 header
- clib_memcpy (b->data, packet, offset + sizeof (*ip4));
- //Copy data
- clib_memcpy (((u8 *) (fip4)) + sizeof (*fip4),
- packet + offset + sizeof (*fip4) + ptr, len);
+ from_b = vlib_get_buffer (vm, from_b->next_buffer);
+ from_data = (u8 *) vlib_buffer_get_current (from_b);
+ clib_memcpy (to_data + left_in_from_buffer, from_data,
+ len - left_in_from_buffer);
+ ptr = len - left_in_from_buffer;
+ left_in_from_buffer =
+ from_b->current_length - (len - left_in_from_buffer);
+ }
+ else
+ {
+ clib_memcpy (to_data, from_data + ptr, len);
+ left_in_from_buffer -= len;
+ ptr += len;
}
- b->current_length = offset + len + sizeof (*fip4);
+ to_b->current_length = offset + len + sizeof (ip4_header_t);
- fip4->fragment_id = ip_frag_id;
- fip4->flags_and_fragment_offset =
- clib_host_to_net_u16 ((ptr >> 3) + ip_frag_offset);
- fip4->flags_and_fragment_offset |=
+ to_ip4->fragment_id = ip_frag_id;
+ to_ip4->flags_and_fragment_offset =
+ clib_host_to_net_u16 ((fo >> 3) + ip_frag_offset);
+ to_ip4->flags_and_fragment_offset |=
clib_host_to_net_u16 (((len != rem) || more) << 13);
- // ((len0 != rem0) || more0) << 13 is optimization for
- // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0
- fip4->length = clib_host_to_net_u16 (len + sizeof (*fip4));
- fip4->checksum = ip4_header_checksum (fip4);
+ to_ip4->length = clib_host_to_net_u16 (len + sizeof (ip4_header_t));
+ to_ip4->checksum = ip4_header_checksum (to_ip4);
- if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
+ if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
{
- //Encapsulating ipv4 header
+ /* Encapsulating ipv4 header */
ip4_header_t *encap_header4 =
- (ip4_header_t *) vlib_buffer_get_current (b);
- encap_header4->length = clib_host_to_net_u16 (b->current_length);
+ (ip4_header_t *) vlib_buffer_get_current (to_b);
+ encap_header4->length = clib_host_to_net_u16 (to_b->current_length);
encap_header4->checksum = ip4_header_checksum (encap_header4);
}
- else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
+ else if (vnet_buffer (org_from_b)->
+ ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
{
- //Encapsulating ipv6 header
+ /* Encapsulating ipv6 header */
ip6_header_t *encap_header6 =
- (ip6_header_t *) vlib_buffer_get_current (b);
+ (ip6_header_t *) vlib_buffer_get_current (to_b);
encap_header6->payload_length =
- clib_host_to_net_u16 (b->current_length -
+ clib_host_to_net_u16 (to_b->current_length -
sizeof (*encap_header6));
}
-
rem -= len;
- ptr += len;
+ fo += len;
}
+ /* Free original packet chain */
+ vlib_buffer_free_one (vm, from_bi);
}
void
diff --git a/src/vnet/ipip/ipip.h b/src/vnet/ipip/ipip.h
index 6afb188f8ee..28833df9755 100644
--- a/src/vnet/ipip/ipip.h
+++ b/src/vnet/ipip/ipip.h
@@ -26,11 +26,12 @@
extern vnet_hw_interface_class_t ipip_hw_interface_class;
-#define foreach_ipip_error \
- /* Must be first. */ \
- _(DECAP_PKTS, "packets decapsulated") \
- _(BAD_PROTOCOL, "bad protocol") \
- _(NO_TUNNEL, "no tunnel")
+#define foreach_ipip_error \
+ /* Must be first. */ \
+ _(DECAP_PKTS, "packets decapsulated") \
+ _(BAD_PROTOCOL, "bad protocol") \
+ _(NO_TUNNEL, "no tunnel") \
+ _(FRAGMENTED_PACKET, "fragmented outer packet")
typedef enum
{
diff --git a/src/vnet/ipip/node.c b/src/vnet/ipip/node.c
index d55b91a0b93..60d6223d5f3 100644
--- a/src/vnet/ipip/node.c
+++ b/src/vnet/ipip/node.c
@@ -108,6 +108,14 @@ ipip_input (vlib_main_t * vm, vlib_node_runtime_t * node,
else
{
ip40 = vlib_buffer_get_current (b0);
+ /* Check for outer fragmentation */
+ if (ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))
+ {
+ next0 = IPIP_INPUT_NEXT_DROP;
+ b0->error = node->errors[IPIP_ERROR_FRAGMENTED_PACKET];
+ goto drop;
+ }
vlib_buffer_advance (b0, sizeof (*ip40));
ip_set (&src0, &ip40->src_address, true);
ip_set (&dst0, &ip40->dst_address, true);
diff --git a/test/test_ipip.py b/test/test_ipip.py
index 00721ec90a0..582ab5be84c 100644
--- a/test/test_ipip.py
+++ b/test/test_ipip.py
@@ -3,9 +3,11 @@
import unittest
from scapy.layers.inet6 import IPv6, Ether, IP, UDP
+from scapy.all import fragment
from framework import VppTestCase, VppTestRunner
from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
from socket import AF_INET, AF_INET6, inet_pton
+import StringIO
""" Testipip is a subclass of VPPTestCase classes.
@@ -14,6 +16,20 @@ IPIP tests.
"""
+def reassemble(listoffragments):
+ buffer = StringIO.StringIO()
+ first = listoffragments[0]
+ buffer.seek(20)
+ for pkt in listoffragments:
+ buffer.seek(pkt[IP].frag*8)
+ buffer.write(pkt[IP].payload)
+ first.len = len(buffer.getvalue()) + 20
+ first.flags = 0
+ del(first.chksum)
+ header = str(first[IP])[:20]
+ return first[IP].__class__(header + buffer.getvalue())
+
+
class TestIPIP(VppTestCase):
""" IPIP Test Case """
@@ -126,6 +142,38 @@ class TestIPIP(VppTestCase):
for p in rx:
self.validate(p[1], p6_reply)
+ # Fragmentation / Reassembly and Re-fragmentation
+ rv = self.vapi.ip_reassembly_enable_disable(
+ sw_if_index=self.pg1.sw_if_index,
+ enable_ip4=1)
+ # Decapsulation
+ p_ether = Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac)
+ p_payload = UDP(sport=1234, dport=1234) / self.payload(3123)
+ p_ip4 = IP(src="1.2.3.4", dst=self.pg0.remote_ip4)
+ outer_ip4 = (p_ether / IP(src=self.pg1.remote_ip4,
+ dst=self.pg0.local_ip4) / p_ip4 / p_payload)
+ frags = fragment(outer_ip4, 1400)
+ p4_reply = (p_ip4 / p_payload)
+ p4_reply.ttl -= 1
+
+ self.pg_enable_capture()
+ self.pg1.add_stream(frags)
+ self.pg_start()
+ rx = self.pg0.get_capture(1)
+ for p in rx:
+ self.validate(p[1], p4_reply)
+
+ # Now try with re-fragmentation
+ self.vapi.sw_interface_set_mtu(self.pg0.sw_if_index, [576, 0, 0, 0])
+ self.pg_enable_capture()
+ self.pg1.add_stream(frags)
+ self.pg_start()
+ rx = self.pg0.get_capture(6)
+ reass_pkt = reassemble(rx)
+ p4_reply.ttl -= 1
+ p4_reply.id = 256
+ self.validate(reass_pkt, p4_reply)
+
def test_ipip6(self):
""" ip{v4,v6} over ip6 test """
p_ether = Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac)
@@ -213,6 +261,9 @@ class TestIPIP(VppTestCase):
sw_if_index = rv.sw_if_index
self.vapi.ipip_del_tunnel(sw_if_index)
+ def payload(self, len):
+ return 'x' * len
+
if __name__ == '__main__':
unittest.main(testRunner=VppTestRunner)