From ec5371e3e31b7860d6b3996fd10420566a4377f2 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Fri, 4 Mar 2022 11:45:41 +0000 Subject: ip: Fixes for IPv6 and MPLS fragmentation Type: fix - IPv6 fragmentation did not work if the packet spaneed multiple buffers, because the 'len' calculation to did max out at the size of a buffer - IPv6 fragmentation did not work when the l2unfragmentable size was non-zero, it was not used in the correct places - IPv6oMPLS fragmentation would fragment all IPv6, it should do so only for link local - IPv6oMPLS should send back TooBig ICMP6 for non locally generated Signed-off-by: Neale Ranns Change-Id: Ie8f02cdfdd7b7e8474e62b6d0acda8f20c371184 --- src/vnet/ip/ip_frag.c | 23 +++++----- src/vnet/mpls/mpls_output.c | 105 +++++++++++++++++++++++++++----------------- test/test_ip6.py | 7 +++ test/test_ipip.py | 53 ++++++++++++++++++++++ test/test_mpls.py | 56 ++++++++++++++++++++++- 5 files changed, 191 insertions(+), 53 deletions(-) diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index b9bc90dcc11..9c223c3d0ae 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -25,10 +25,10 @@ typedef struct { - u8 ipv6; u16 mtu; u8 next; u16 n_fragments; + u16 pkt_size; } ip_frag_trace_t; static u8 * @@ -37,8 +37,8 @@ format_ip_frag_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *); - s = format (s, "IPv%s mtu: %u fragments: %u next: %d", - t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next); + s = format (s, "mtu: %u pkt-size: %u fragments: %u next: %d", t->mtu, + t->pkt_size, t->n_fragments, t->next); return s; } @@ -286,7 +286,7 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, ip_frag_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); tr->mtu = mtu; - tr->ipv6 = is_ip6 ? 1 : 0; + tr->pkt_size = vlib_buffer_length_in_chain (vm, p0); tr->n_fragments = vec_len (buffer); tr->next = vnet_buffer (p0)->ip_frag.next_index; } @@ -385,13 +385,17 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, ip6_header_t *ip6; u16 len, max, rem, ip_frag_id; u8 *org_from_packet; + u16 head_bytes; from_b = vlib_get_buffer (vm, from_bi); org_from_packet = vlib_buffer_get_current (from_b); ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize; + head_bytes = + (sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t) + l2unfragmentablesize); rem = clib_net_to_host_u16 (ip6->payload_length); - max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct?? + max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) & + ~0x7; if (rem > (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t))) @@ -423,9 +427,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, ip6_frag_hdr_t *to_frag_hdr; u8 *to_data; - len = - (rem > - (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) ? max : rem); + len = (rem > max ? max : rem); if (len != rem) /* Last fragment does not need to divisible by 8 */ len &= ~0x7; if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0) @@ -438,7 +440,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, /* Copy ip6 header */ clib_memcpy_fast (to_b->data, org_from_packet, l2unfragmentablesize + sizeof (ip6_header_t)); - to_ip6 = vlib_buffer_get_current (to_b); + to_ip6 = vlib_buffer_get_current (to_b) + l2unfragmentablesize; to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1); to_data = (void *) (to_frag_hdr + 1); @@ -484,8 +486,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, to_ptr += bytes_to_copy; } - to_b->current_length = - len + sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t); + to_b->current_length = len + head_bytes; to_ip6->payload_length = clib_host_to_net_u16 (len + sizeof (ip6_frag_hdr_t)); to_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index 3699163a155..faeba748e89 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -377,11 +377,12 @@ typedef struct mpls_frag_trace_t_ typedef enum { - MPLS_FRAG_NEXT_REWRITE, - MPLS_FRAG_NEXT_REWRITE_MIDCHAIN, - MPLS_FRAG_NEXT_ICMP_ERROR, - MPLS_FRAG_NEXT_DROP, - MPLS_FRAG_N_NEXT, + MPLS_FRAG_NEXT_REWRITE, + MPLS_FRAG_NEXT_REWRITE_MIDCHAIN, + MPLS_FRAG_NEXT_ICMP4_ERROR, + MPLS_FRAG_NEXT_ICMP6_ERROR, + MPLS_FRAG_NEXT_DROP, + MPLS_FRAG_N_NEXT, } mpls_frag_next_t; static uword @@ -390,9 +391,7 @@ mpls_frag (vlib_main_t * vm, vlib_frame_t * frame) { u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags; - vlib_node_runtime_t * error_node; - error_node = vlib_node_get_runtime (vm, mpls_output_node.index); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; @@ -410,8 +409,7 @@ mpls_frag (vlib_main_t * vm, mpls_frag_next_t next0; u32 pi0, adj_index0; ip_frag_error_t error0 = IP_FRAG_ERROR_NONE; - i16 encap_size; - u16 mtu; + i16 encap_size, mtu; u8 is_ip4; pi0 = to_next[0] = from[0]; @@ -422,6 +420,7 @@ mpls_frag (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj0 = adj_get (adj_index0); + /* the size of the MPLS stack */ encap_size = vnet_buffer (p0)->l3_hdr_offset - p0->current_data; mtu = adj0->rewrite_header.max_l3_packet_bytes - encap_size; @@ -430,7 +429,18 @@ mpls_frag (vlib_main_t * vm, if (is_ip4) error0 = ip4_frag_do_fragment (vm, pi0, mtu, encap_size, &frags); else - error0 = ip6_frag_do_fragment (vm, pi0, mtu, encap_size, &frags); + { + if (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)) + { + /* only fragment locally generated IPv6 */ + error0 = IP_FRAG_ERROR_DONT_FRAGMENT_SET; + } + else + { + error0 = + ip6_frag_do_fragment (vm, pi0, mtu, encap_size, &frags); + } + } if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -443,24 +453,41 @@ mpls_frag (vlib_main_t * vm, if (PREDICT_TRUE (error0 == IP_FRAG_ERROR_NONE)) { /* Free original buffer chain */ - vlib_buffer_free_one (vm, pi0); /* Free original packet */ + vlib_buffer_free_one (vm, pi0); next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ? MPLS_FRAG_NEXT_REWRITE_MIDCHAIN : MPLS_FRAG_NEXT_REWRITE); } - else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET) - { - icmp4_error_set_vnet_buffer ( - p0, ICMP4_destination_unreachable, - ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, - vnet_buffer (p0)->ip_frag.mtu); - next0 = MPLS_FRAG_NEXT_ICMP_ERROR; - } else { - vlib_error_count (vm, mpls_output_node.index, error0, 1); - vec_add1 (frags, pi0); /* Get rid of the original buffer */ - next0 = MPLS_FRAG_NEXT_DROP; + vlib_error_count (vm, node->node_index, error0, 1); + + if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET) + { + vlib_buffer_advance (p0, encap_size); + if (is_ip4) + { + icmp4_error_set_vnet_buffer ( + p0, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + vnet_buffer (p0)->ip_frag.mtu); + next0 = MPLS_FRAG_NEXT_ICMP4_ERROR; + } + else + { + icmp6_error_set_vnet_buffer ( + p0, ICMP6_packet_too_big, 0, + adj0->rewrite_header.max_l3_packet_bytes); + next0 = MPLS_FRAG_NEXT_ICMP6_ERROR; + } + } + else + { + next0 = MPLS_FRAG_NEXT_DROP; + } + + /* Get rid of the original buffer */ + vec_add1 (frags, pi0); } /* Send fragments that were added in the frame */ @@ -480,9 +507,6 @@ mpls_frag (vlib_main_t * vm, to_next += 1; n_left_to_next -= 1; - p0 = vlib_get_buffer (vm, i); - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 ( vm, node, next_index, to_next, n_left_to_next, i, next0); } @@ -511,22 +535,21 @@ format_mpls_frag_trace (u8 * s, va_list * args) } VLIB_REGISTER_NODE (mpls_frag_node) = { - .function = mpls_frag, - .name = "mpls-frag", - .vector_size = sizeof (u32), - .format_trace = format_mpls_frag_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = IP_FRAG_N_ERROR, - .error_strings = mpls_frag_error_strings, - - .n_next_nodes = MPLS_FRAG_N_NEXT, - .next_nodes = { - [MPLS_FRAG_NEXT_REWRITE] = "mpls-output", - [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain", - [MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [MPLS_FRAG_NEXT_DROP] = "mpls-drop" - }, + .function = mpls_frag, + .name = "mpls-frag", + .vector_size = sizeof (u32), + .format_trace = format_mpls_frag_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = IP_FRAG_N_ERROR, + .error_strings = mpls_frag_error_strings, + + .n_next_nodes = MPLS_FRAG_N_NEXT, + .next_nodes = { [MPLS_FRAG_NEXT_REWRITE] = "mpls-output", + [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain", + [MPLS_FRAG_NEXT_ICMP4_ERROR] = "ip4-icmp-error", + [MPLS_FRAG_NEXT_ICMP6_ERROR] = "ip6-icmp-error", + [MPLS_FRAG_NEXT_DROP] = "mpls-drop" }, }; /* diff --git a/test/test_ip6.py b/test/test_ip6.py index bac50a3df98..2e972db5dd3 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -3170,6 +3170,12 @@ class TestIPv6PathMTU(VppTestCase): self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, [2800, 0, 0, 0]) + p_6k = (Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_mac) / + IPv6(src=self.pg0.remote_ip6, + dst=tun.remote_ip6) / + UDP(sport=1234, dport=5678) / + Raw(b'0xa' * 2000)) p_2k = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IPv6(src=self.pg0.remote_ip6, @@ -3189,6 +3195,7 @@ class TestIPv6PathMTU(VppTestCase): self.pg1.remote_ip6).add_vpp_config() # this is now the interface MTU frags + self.send_and_expect(self.pg0, [p_6k], self.pg1, n_rx=4) self.send_and_expect(self.pg0, [p_2k], self.pg1, n_rx=2) self.send_and_expect(self.pg0, [p_1k], self.pg1) diff --git a/test/test_ipip.py b/test/test_ipip.py index 83395e0bd72..8e16c80f05f 100644 --- a/test/test_ipip.py +++ b/test/test_ipip.py @@ -1150,6 +1150,59 @@ class TestIPIP6(VppTestCase): p6_reply.id = 256 self.validate(reass_pkt, p6_reply) + def test_ip6_mpls_frag(self): + """ Test fragmenting IPv6 over MPLS """ + + # IPv6 packets must be locally generated to be fragmented + # the use of tunnel encaps + tun_dst = VppIpRoute( + self, "1000::1", 128, + [VppRoutePath(self.pg1.remote_ip6, + self.pg1.sw_if_index, + labels=[VppMplsLabel(32)])]).add_vpp_config() + + tun = VppIpIpTunInterface( + self, + self.pg0, + self.pg0.local_ip6, + "1000::1").add_vpp_config() + + tun.admin_up() + tun.config_ip6() + tun.config_ip4() + + self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, + [2000, 0, 0, 0]) + + p_6k = (Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_mac) / + IPv6(src=self.pg0.remote_ip6, + dst=tun.remote_ip6) / + UDP(sport=1234, dport=5678) / + Raw(b'0xa' * 2000)) + p_2k = (Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_mac) / + IPv6(src=self.pg0.remote_ip6, + dst=tun.remote_ip6) / + UDP(sport=1234, dport=5678) / + Raw(b'0xa' * 1000)) + p_1k = (Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_mac) / + IPv6(src=self.pg0.remote_ip6, + dst=tun.remote_ip6) / + UDP(sport=1234, dport=5678) / + Raw(b'0xa' * 600)) + + # this is now the interface MTU frags + rxs = self.send_and_expect(self.pg0, [p_6k], self.pg1, n_rx=4) + self.assertEqual(rxs[0][UDP].dport, 5678) + for rx in rxs: + self.assertEqual(rx[MPLS].label, 32) + self.assertEqual(rx[IPv6].dst, "1000::1") + self.assertEqual(rx[IPv6].dst, "1000::1") + self.send_and_expect(self.pg0, [p_2k], self.pg1, n_rx=2) + self.send_and_expect(self.pg0, [p_1k], self.pg1) + def test_ipip_create(self): """ ipip create / delete interface test """ rv = ipip_add_tunnel(self, '1.2.3.4', '2.3.4.5') diff --git a/test/test_mpls.py b/test/test_mpls.py index 4cc7a0759f2..d94676b67da 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -18,7 +18,8 @@ import scapy.compat from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP from scapy.layers.inet import IP, UDP, ICMP -from scapy.layers.inet6 import IPv6, ICMPv6TimeExceeded, ICMPv6EchoRequest +from scapy.layers.inet6 import IPv6, ICMPv6TimeExceeded, ICMPv6EchoRequest, \ + ICMPv6PacketTooBig from scapy.contrib.mpls import MPLS NUM_PKTS = 67 @@ -425,6 +426,31 @@ class TestMPLS(VppTestCase): except: raise + def verify_capture_fragmented_labelled_ip6(self, src_if, capture, sent, + mpls_labels, ip_ttl=None): + try: + capture = verify_filter(capture, sent) + + for i in range(len(capture)): + tx = sent[0] + rx = capture[i] + tx_ip = tx[IPv6] + rx.show() + rx_ip = IPv6(rx[MPLS].payload) + rx_ip.show() + + verify_mpls_stack(self, rx, mpls_labels) + + self.assertEqual(rx_ip.src, tx_ip.src) + self.assertEqual(rx_ip.dst, tx_ip.dst) + if not ip_ttl: + # IP processing post pop has decremented the hop-limit + self.assertEqual(rx_ip.hlim + 1, tx_ip.hlim) + else: + self.assertEqual(rx_ip.hlim, ip_ttl) + except: + raise + def test_swap(self): """ MPLS label swap tests """ @@ -908,6 +934,11 @@ class TestMPLS(VppTestCase): self.pg0.sw_if_index, labels=[VppMplsLabel(32)])]) route_10_0_0_1.add_vpp_config() + route_1000_1 = VppIpRoute(self, "1000::1", 128, + [VppRoutePath(self.pg0.remote_ip6, + self.pg0.sw_if_index, + labels=[VppMplsLabel(32)])]) + route_1000_1.add_vpp_config() # # a stream that matches the route for 10.0.0.1 @@ -924,6 +955,29 @@ class TestMPLS(VppTestCase): self.verify_capture_fragmented_labelled_ip4(self.pg0, rx, tx, [VppMplsLabel(32)]) + # packets with DF bit set generate ICMP + for t in tx: + t[IP].flags = 'DF' + rxs = self.send_and_expect_some(self.pg0, tx, self.pg0) + + for rx in rxs: + rx[ICMP].code = "fragmentation-needed" + + self.assertEqual(self.statistics.get_err_counter( + "/err/mpls-frag/can't fragment this packet"), + len(tx)) + # + # a stream that matches the route for 1000::1/128 + # PG0 is in the default table + # + tx = self.create_stream_ip6(self.pg0, "1000::1") + for i in range(0, 257): + self.extend_packet(tx[i], 10000) + + rxs = self.send_and_expect_some(self.pg0, tx, self.pg0) + for rx in rxs: + rx[ICMPv6PacketTooBig].mtu = 9000 + # # cleanup # -- cgit 1.2.3-korg