diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/plugins/map/ip4_map.c | 268 | ||||
-rw-r--r-- | src/plugins/map/ip4_map_t.c | 6 | ||||
-rw-r--r-- | src/plugins/map/ip6_map.c | 8 | ||||
-rw-r--r-- | src/plugins/map/ip6_map_t.c | 6 | ||||
-rw-r--r-- | src/plugins/map/test/test_map.py | 74 | ||||
-rw-r--r-- | src/vnet/ip/ip4_forward.c | 16 | ||||
-rw-r--r-- | src/vnet/ip/ip6_forward.c | 15 | ||||
-rw-r--r-- | src/vnet/ip/ip_frag.c | 181 | ||||
-rw-r--r-- | src/vnet/ip/ip_frag.h | 45 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_output.c | 218 |
10 files changed, 427 insertions, 410 deletions
diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c index ad94907e499..f2a00907a57 100644 --- a/src/plugins/map/ip4_map.c +++ b/src/plugins/map/ip4_map.c @@ -26,8 +26,6 @@ enum ip4_map_next_e #ifdef MAP_SKIP_IP6_LOOKUP IP4_MAP_NEXT_IP6_REWRITE, #endif - IP4_MAP_NEXT_IP4_FRAGMENT, - IP4_MAP_NEXT_IP6_FRAGMENT, IP4_MAP_NEXT_ICMP_ERROR, IP4_MAP_NEXT_DROP, IP4_MAP_N_NEXT, @@ -117,17 +115,26 @@ ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error) } static u32 -ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error) +ip4_map_fragment (vlib_main_t * vm, u32 bi, u16 mtu, bool df, u32 ** buffers, + u8 * error) { map_main_t *mm = &map_main; + vlib_buffer_t *b = vlib_get_buffer (vm, bi); if (mm->frag_inner) { - // TODO: Fix inner fragmentation after removed inner support from ip-frag. - ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu, - IP4_FRAG_NEXT_IP6_LOOKUP, - IP_FRAG_FLAG_IP6_HEADER); - return (IP4_MAP_NEXT_IP4_FRAGMENT); + /* IPv4 fragmented packets inside of IPv6 */ + ip4_frag_do_fragment (vm, bi, mtu, sizeof (ip6_header_t), buffers); + + /* Fixup */ + u32 *i; + vec_foreach (i, *buffers) + { + vlib_buffer_t *p = vlib_get_buffer (vm, *i); + ip6_header_t *ip6 = vlib_buffer_get_current (p); + ip6->payload_length = + clib_host_to_net_u16 (p->current_length - sizeof (ip6_header_t)); + } } else { @@ -140,10 +147,11 @@ ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error) *error = MAP_ERROR_DF_SET; return (IP4_MAP_NEXT_ICMP_ERROR); } - ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP, - IP_FRAG_FLAG_IP6_HEADER); - return (IP4_MAP_NEXT_IP6_FRAGMENT); + + /* Create IPv6 fragments here */ + ip6_frag_do_fragment (vm, bi, mtu, 0, buffers); } + return (IP4_MAP_NEXT_IP6_LOOKUP); } /* @@ -165,189 +173,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) while (n_left_from > 0) { vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Dual loop */ - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - map_domain_t *d0, *d1; - u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE; - ip4_header_t *ip40, *ip41; - u16 port0 = 0, port1 = 0; - ip6_header_t *ip6h0, *ip6h1; - u32 map_domain_index0 = ~0, map_domain_index1 = ~0; - u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 = - IP4_MAP_NEXT_IP6_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, STORE); - vlib_prefetch_buffer_header (p3, STORE); - /* IPv4 + 8 = 28. possibly plus -40 */ - CLIB_PREFETCH (p2->data - 40, 68, STORE); - CLIB_PREFETCH (p3->data - 40, 68, STORE); - } - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - ip40 = vlib_buffer_get_current (p0); - ip41 = vlib_buffer_get_current (p1); - d0 = - ip4_map_get_domain (&ip40->dst_address, &map_domain_index0, - &error0); - d1 = - ip4_map_get_domain (&ip41->dst_address, &map_domain_index1, - &error1); - - /* - * Shared IPv4 address - */ - port0 = ip4_map_port_and_security_check (d0, p0, &error0); - port1 = ip4_map_port_and_security_check (d1, p1, &error1); - - /* Decrement IPv4 TTL */ - ip4_map_decrement_ttl (ip40, &error0); - ip4_map_decrement_ttl (ip41, &error1); - bool df0 = - ip40->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); - bool df1 = - ip41->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); - - /* MAP calc */ - u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32); - u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32); - u16 dp40 = clib_net_to_host_u16 (port0); - u16 dp41 = clib_net_to_host_u16 (port1); - u64 dal60 = map_get_pfx (d0, da40, dp40); - u64 dal61 = map_get_pfx (d1, da41, dp41); - u64 dar60 = map_get_sfx (d0, da40, dp40); - u64 dar61 = map_get_sfx (d1, da41, dp41); - if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) - error0 = MAP_ERROR_NO_BINDING; - if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE) - error1 = MAP_ERROR_NO_BINDING; - - /* construct ipv6 header */ - vlib_buffer_advance (p0, -sizeof (ip6_header_t)); - vlib_buffer_advance (p1, -sizeof (ip6_header_t)); - ip6h0 = vlib_buffer_get_current (p0); - ip6h1 = vlib_buffer_get_current (p1); - vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - ip6h0->ip_version_traffic_class_and_flow_label = - ip4_map_vtcfl (ip40, p0); - ip6h1->ip_version_traffic_class_and_flow_label = - ip4_map_vtcfl (ip41, p1); - ip6h0->payload_length = ip40->length; - ip6h1->payload_length = ip41->length; - ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; - ip6h1->protocol = IP_PROTOCOL_IP_IN_IP; - ip6h0->hop_limit = 0x40; - ip6h1->hop_limit = 0x40; - ip6h0->src_address = d0->ip6_src; - ip6h1->src_address = d1->ip6_src; - ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60); - ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60); - ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61); - ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61); - - /* - * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop - */ - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) - { - if (PREDICT_FALSE - (d0->mtu - && (clib_net_to_host_u16 (ip6h0->payload_length) + - sizeof (*ip6h0) > d0->mtu))) - { - next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0); - } - else - { - next0 = - ip4_map_ip6_lookup_bypass (p0, - ip40) ? - IP4_MAP_NEXT_IP6_REWRITE : next0; - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip6h0->payload_length) + - 40); - } - } - else - { - next0 = IP4_MAP_NEXT_DROP; - } - - /* - * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop - */ - if (PREDICT_TRUE (error1 == MAP_ERROR_NONE)) - { - if (PREDICT_FALSE - (d1->mtu - && (clib_net_to_host_u16 (ip6h1->payload_length) + - sizeof (*ip6h1) > d1->mtu))) - { - next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1); - } - else - { - next1 = - ip4_map_ip6_lookup_bypass (p1, - ip41) ? - IP4_MAP_NEXT_IP6_REWRITE : next1; - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - map_domain_index1, 1, - clib_net_to_host_u16 - (ip6h1->payload_length) + - 40); - } - } - else - { - next1 = IP4_MAP_NEXT_DROP; - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_add_trace (vm, node, p0, map_domain_index0, port0); - } - if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) - { - map_add_trace (vm, node, p1, map_domain_index1, port0); - } - - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, next0, - next1); - } - while (n_left_from > 0 && n_left_to_next > 0) { u32 pi0; @@ -359,12 +184,13 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip6_header_t *ip6h0; u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP; u32 map_domain_index0 = ~0; + u32 *buffer0 = 0; + bool free_original_buffer0 = false; + u32 *frag_from0, frag_left0; pi0 = to_next[0] = from[0]; from += 1; n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; p0 = vlib_get_buffer (vm, pi0); ip40 = vlib_buffer_get_current (p0); @@ -413,7 +239,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop + * ip6-lookup, ip6-rewrite, error-drop */ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) { @@ -422,7 +248,14 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) && (clib_net_to_host_u16 (ip6h0->payload_length) + sizeof (*ip6h0) > d0->mtu))) { - next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0); + next0 = + ip4_map_fragment (vm, pi0, d0->mtu, df0, &buffer0, + &error0); + + if (error0 == MAP_ERROR_NONE) + { + free_original_buffer0 = true; + } } else { @@ -450,8 +283,41 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0->error = error_node->errors[error0]; exit: - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); + /* Send fragments that were added in the frame */ + if (free_original_buffer0) + { + vlib_buffer_free_one (vm, pi0); /* Free original packet */ + } + else + { + vec_add1 (buffer0, pi0); + } + + frag_from0 = buffer0; + frag_left0 = vec_len (buffer0); + + while (frag_left0 > 0) + { + while (frag_left0 > 0 && n_left_to_next > 0) + { + u32 i0; + i0 = to_next[0] = frag_from0[0]; + frag_from0 += 1; + frag_left0 -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_get_buffer (vm, i0)->error = + error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + i0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + vec_reset_length (buffer0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } @@ -491,8 +357,6 @@ VLIB_REGISTER_NODE(ip4_map_node) = { #ifdef MAP_SKIP_IP6_LOOKUP [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance", #endif - [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", - [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag", [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP4_MAP_NEXT_DROP] = "error-drop", }, diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c index 621fb0615dc..c254efc78b9 100644 --- a/src/plugins/map/ip4_map_t.c +++ b/src/plugins/map/ip4_map_t.c @@ -168,7 +168,7 @@ ip4_map_t_icmp (vlib_main_t * vm, if (vnet_buffer (p0)->map_t.mtu < p0->current_length) { vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP6_LOOKUP; next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG; } err0: @@ -287,7 +287,7 @@ ip4_map_t_fragmented (vlib_main_t * vm, { vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; vnet_buffer (p0)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; + IP_FRAG_NEXT_IP6_LOOKUP; next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG; } } @@ -453,7 +453,7 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, //Send to fragmentation node if necessary vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; vnet_buffer (p0)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; + IP_FRAG_NEXT_IP6_LOOKUP; next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; } } diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c index 96f81efc1cb..47958f92a38 100644 --- a/src/plugins/map/ip6_map.c +++ b/src/plugins/map/ip6_map.c @@ -314,7 +314,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vnet_buffer (p0)->ip_frag.flags = 0; vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; + IP_FRAG_NEXT_IP4_LOOKUP; vnet_buffer (p0)->ip_frag.mtu = d0->mtu; next0 = IP6_MAP_NEXT_IP4_FRAGMENT; } @@ -346,7 +346,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vnet_buffer (p1)->ip_frag.flags = 0; vnet_buffer (p1)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; + IP_FRAG_NEXT_IP4_LOOKUP; vnet_buffer (p1)->ip_frag.mtu = d1->mtu; next1 = IP6_MAP_NEXT_IP4_FRAGMENT; } @@ -497,7 +497,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vnet_buffer (p0)->ip_frag.flags = 0; vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; + IP_FRAG_NEXT_IP4_LOOKUP; vnet_buffer (p0)->ip_frag.mtu = d0->mtu; next0 = IP6_MAP_NEXT_IP4_FRAGMENT; } @@ -622,7 +622,7 @@ ip6_map_post_ip4_reass (vlib_main_t * vm, && error0 == MAP_ERROR_NONE)) { vnet_buffer (p0)->ip_frag.flags = 0; - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP4_LOOKUP; vnet_buffer (p0)->ip_frag.mtu = d0->mtu; next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT; } diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c index 6e9c0d7640c..ef7b91349e5 100644 --- a/src/plugins/map/ip6_map_t.c +++ b/src/plugins/map/ip6_map_t.c @@ -169,7 +169,7 @@ ip6_map_t_icmp (vlib_main_t * vm, { // Send to fragmentation node if necessary vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP4_LOOKUP; next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG; } err0: @@ -288,7 +288,7 @@ ip6_map_t_fragmented (vlib_main_t * vm, // Send to fragmentation node if necessary vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; + IP_FRAG_NEXT_IP4_LOOKUP; next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; } } @@ -441,7 +441,7 @@ ip6_map_t_tcp_udp (vlib_main_t * vm, // Send to fragmentation node if necessary vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; + IP_FRAG_NEXT_IP4_LOOKUP; next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; } } diff --git a/src/plugins/map/test/test_map.py b/src/plugins/map/test/test_map.py index a7e5f165576..c1fe05e1150 100644 --- a/src/plugins/map/test/test_map.py +++ b/src/plugins/map/test/test_map.py @@ -140,7 +140,7 @@ class TestMAP(VppTestCase): IP(src=self.pg0.remote_ip4, dst=self.pg0.remote_ip4) / UDP(sport=20000, dport=10000) / Raw(b'\xa5' * 100)) - rx = self.send_and_expect(self.pg0, v4*1, self.pg0) + rx = self.send_and_expect(self.pg0, v4 * 4, self.pg0) v4_reply = v4[1] v4_reply.ttl -= 1 for p in rx: @@ -154,7 +154,7 @@ class TestMAP(VppTestCase): UDP(sport=20000, dport=10000) / Raw(b'\xa5' * 100)) - self.send_and_assert_encapped_one(v4, "3000::1", map_translated_addr) + self.send_and_assert_encapped(v4 * 4, "3000::1", map_translated_addr) # # Verify reordered fragments are able to pass as well @@ -294,6 +294,76 @@ class TestMAP(VppTestCase): pre_res_route.remove_vpp_config() self.vapi.ppcli("map params pre-resolve del ip6-nh 4001::1") + def test_map_e_inner_frag(self): + """ MAP-E Inner fragmentation """ + + # + # Add a route to the MAP-BR + # + map_br_pfx = "2001::" + map_br_pfx_len = 32 + map_route = VppIpRoute(self, + map_br_pfx, + map_br_pfx_len, + [VppRoutePath(self.pg1.remote_ip6, + self.pg1.sw_if_index)]) + map_route.add_vpp_config() + + # + # Add a domain that maps from pg0 to pg1 + # + map_dst = '2001::/32' + map_src = '3000::1/128' + client_pfx = '192.168.0.0/16' + map_translated_addr = '2001:0:101:7000:0:c0a8:101:7' + tag = 'MAP-E tag.' + self.vapi.map_add_domain(ip4_prefix=client_pfx, + ip6_prefix=map_dst, + ip6_src=map_src, + ea_bits_len=20, + psid_offset=4, + psid_length=4, + mtu=1000, + tag=tag) + + # Enable MAP on interface. + self.vapi.map_if_enable_disable(is_enable=1, + sw_if_index=self.pg0.sw_if_index, + is_translation=0) + + # Enable inner fragmentation + self.vapi.map_param_set_fragmentation(inner=1) + + v4 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(src=self.pg0.remote_ip4, dst='192.168.1.1') / + UDP(sport=20000, dport=10000) / + Raw(b'\xa5' * 1300)) + + self.pg_send(self.pg0, v4*1) + rx = self.pg1.get_capture(2) + + frags = fragment_rfc791(v4[1], 1000) + frags[0].id = 0 + frags[1].id = 0 + frags[0].ttl -= 1 + frags[1].ttl -= 1 + frags[0].chksum = 0 + frags[1].chksum = 0 + + v6_reply1 = (IPv6(src='3000::1', dst=map_translated_addr, hlim=63) / + frags[0]) + v6_reply2 = (IPv6(src='3000::1', dst=map_translated_addr, hlim=63) / + frags[1]) + rx[0][1].fl = 0 + rx[1][1].fl = 0 + rx[0][1][IP].id = 0 + rx[1][1][IP].id = 0 + rx[0][1][IP].chksum = 0 + rx[1][1][IP].chksum = 0 + + self.validate(rx[0][1], v6_reply1) + self.validate(rx[1][1], v6_reply2) + def validate(self, rx, expected): self.assertEqual(rx, expected.__class__(scapy.compat.raw(expected))) diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 1550b313915..44a681926b3 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -2293,8 +2293,8 @@ typedef enum always_inline void ip4_mtu_check (vlib_buffer_t * b, u16 packet_len, - u16 adj_packet_bytes, bool df, u16 * next, u32 * error, - u8 is_midchain) + u16 adj_packet_bytes, bool df, u16 * next, + u8 is_midchain, u32 * error) { if (packet_len > adj_packet_bytes) { @@ -2312,8 +2312,8 @@ ip4_mtu_check (vlib_buffer_t * b, u16 packet_len, /* IP fragmentation */ ip_frag_set_vnet_buffer (b, adj_packet_bytes, (is_midchain ? - IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN : - IP4_FRAG_NEXT_IP4_REWRITE), 0); + IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN : + IP_FRAG_NEXT_IP_REWRITE), 0); *next = IP4_REWRITE_NEXT_FRAGMENT; } } @@ -2486,12 +2486,12 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), - next + 0, &error0, is_midchain); + next + 0, is_midchain, &error0); ip4_mtu_check (b[1], ip1_len, adj1[0].rewrite_header.max_l3_packet_bytes, ip1->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), - next + 1, &error1, is_midchain); + next + 1, is_midchain, &error1); if (is_mcast) { @@ -2660,7 +2660,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), - next + 0, &error0, is_midchain); + next + 0, is_midchain, &error0); if (is_mcast) { @@ -2758,7 +2758,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), - next + 0, &error0, is_midchain); + next + 0, is_midchain, &error0); if (is_mcast) { diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 50de501fe0d..9656621c13b 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1652,7 +1652,7 @@ typedef enum always_inline void ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes, u16 adj_packet_bytes, bool is_locally_generated, - u32 * next, u32 * error) + u32 * next, u8 is_midchain, u32 * error) { if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes) { @@ -1660,7 +1660,9 @@ ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes, { /* IP fragmentation */ ip_frag_set_vnet_buffer (b, adj_packet_bytes, - IP6_FRAG_NEXT_IP6_REWRITE, 0); + (is_midchain ? + IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN : + IP_FRAG_NEXT_IP_REWRITE), 0); *next = IP6_REWRITE_NEXT_FRAGMENT; *error = IP6_ERROR_MTU_EXCEEDED; } @@ -1840,10 +1842,12 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, ip6_mtu_check (p0, ip0_len, adj0[0].rewrite_header.max_l3_packet_bytes, - is_locally_originated0, &next0, &error0); + is_locally_originated0, &next0, is_midchain, + &error0); ip6_mtu_check (p1, ip1_len, adj1[0].rewrite_header.max_l3_packet_bytes, - is_locally_originated1, &next1, &error1); + is_locally_originated1, &next1, is_midchain, + &error1); /* Don't adjust the buffer for hop count issue; icmp-error node * wants to see the IP header */ @@ -2011,7 +2015,8 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, ip6_mtu_check (p0, ip0_len, adj0[0].rewrite_header.max_l3_packet_bytes, - is_locally_originated0, &next0, &error0); + is_locally_originated0, &next0, is_midchain, + &error0); /* Don't adjust the buffer for hop count issue; icmp-error node * wants to see the IP header */ diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index 54efb63c986..9aa8777514f 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -23,26 +23,6 @@ #include <vnet/ip/ip.h> -/* - * Copy the mpls header if present. - * The current is pointing to the ip header. - * Adjust the buffer and point to the mpls headers on these fragments - * before sending the packet back to mpls-output node. - */ -static inline void -copy_mpls_hdr (vlib_buffer_t * to_b, vlib_buffer_t * from_b) -{ - if ((vnet_buffer (from_b)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER) - { - u8 mpls_hdr_length = vnet_buffer (from_b)->mpls.mpls_hdr_length; - u8 *org_from_mpls_packet = - from_b->data + (from_b->current_data - mpls_hdr_length); - clib_memcpy_fast ((to_b->data - mpls_hdr_length), org_from_mpls_packet, - mpls_hdr_length); - vlib_buffer_advance (to_b, -vnet_buffer (to_b)->mpls.mpls_hdr_length); - } -} - typedef struct { u8 ipv6; @@ -87,14 +67,6 @@ frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from) vnet_buffer2 (to)->qos = vnet_buffer2 (from)->qos; to->flags |= VNET_BUFFER_F_QOS_DATA_VALID; } - - /* Copy mpls opaque data */ - if ((vnet_buffer (from)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER) - { - vnet_buffer (to)->mpls.pyld_proto = vnet_buffer (from)->mpls.pyld_proto; - vnet_buffer (to)->mpls.mpls_hdr_length = - vnet_buffer (from)->mpls.mpls_hdr_length; - } } static vlib_buffer_t * @@ -116,20 +88,20 @@ frag_buffer_alloc (vlib_buffer_t * org_b, u32 * bi) * but does not generate buffer chains. I.e. a fragment is always * contained with in a single buffer and limited to the max buffer * size. + * from_bi: current pointer must point to IPv4 header */ -void -ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, - ip_frag_error_t * error) +ip_frag_error_t +ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, + u16 l2unfragmentablesize, u32 ** buffer) { vlib_buffer_t *from_b; ip4_header_t *ip4; - u16 mtu, len, max, rem, ip_frag_id, ip_frag_offset; + u16 len, max, rem, ip_frag_id, ip_frag_offset; u8 *org_from_packet, more; from_b = vlib_get_buffer (vm, from_bi); - mtu = vnet_buffer (from_b)->ip_frag.mtu; org_from_packet = vlib_buffer_get_current (from_b); - ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b); + ip4 = vlib_buffer_get_current (from_b) + l2unfragmentablesize; rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t); max = @@ -139,21 +111,18 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, if (rem > (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t))) { - *error = IP_FRAG_ERROR_MALFORMED; - return; + return IP_FRAG_ERROR_MALFORMED; } if (mtu < sizeof (ip4_header_t)) { - *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER; - return; + return IP_FRAG_ERROR_CANT_FRAGMENT_HEADER; } if (ip4->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT)) { - *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET; - return; + return IP_FRAG_ERROR_DONT_FRAGMENT_SET; } if (ip4_is_fragment (ip4)) @@ -174,7 +143,8 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, u8 *from_data = (void *) (ip4 + 1); vlib_buffer_t *org_from_b = from_b; u16 fo = 0; - u16 left_in_from_buffer = from_b->current_length - sizeof (ip4_header_t); + u16 left_in_from_buffer = + from_b->current_length - (l2unfragmentablesize + sizeof (ip4_header_t)); u16 ptr = 0; /* Do the actual fragmentation */ @@ -190,17 +160,19 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, len &= ~0x7; if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0) { - *error = IP_FRAG_ERROR_MEMORY; - return; + return IP_FRAG_ERROR_MEMORY; } vec_add1 (*buffer, to_bi); frag_set_sw_if_index (to_b, org_from_b); /* Copy ip4 header */ - clib_memcpy_fast (to_b->data, org_from_packet, sizeof (ip4_header_t)); - to_ip4 = vlib_buffer_get_current (to_b); + to_data = vlib_buffer_get_current (to_b); + clib_memcpy_fast (to_data, org_from_packet, + l2unfragmentablesize + sizeof (ip4_header_t)); + to_ip4 = (ip4_header_t *) (to_data + l2unfragmentablesize); to_data = (void *) (to_ip4 + 1); vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data; + vlib_buffer_copy_trace_flag (vm, from_b, to_bi); to_b->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID; if (from_b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) @@ -232,8 +204,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, /* Move buffer */ if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT)) { - *error = IP_FRAG_ERROR_MALFORMED; - return; + return IP_FRAG_ERROR_MALFORMED; } from_b = vlib_get_buffer (vm, from_b->next_buffer); from_data = (u8 *) vlib_buffer_get_current (from_b); @@ -242,8 +213,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, to_ptr += bytes_to_copy; } - to_b->current_length = len + sizeof (ip4_header_t); to_b->flags |= VNET_BUFFER_F_IS_IP4; + to_b->current_length = + len + sizeof (ip4_header_t) + l2unfragmentablesize; to_ip4->fragment_id = ip_frag_id; to_ip4->flags_and_fragment_offset = @@ -256,31 +228,11 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, /* we've just done the IP checksum .. */ to_b->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM; - if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) - { - /* Encapsulating ipv4 header */ - ip4_header_t *encap_header4 = - (ip4_header_t *) vlib_buffer_get_current (to_b); - encap_header4->length = clib_host_to_net_u16 (to_b->current_length); - encap_header4->checksum = ip4_header_checksum (encap_header4); - } - else if (vnet_buffer (org_from_b)-> - ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) - { - /* Encapsulating ipv6 header */ - ip6_header_t *encap_header6 = - (ip6_header_t *) vlib_buffer_get_current (to_b); - encap_header6->payload_length = - clib_host_to_net_u16 (to_b->current_length - - sizeof (*encap_header6)); - } - - /* Copy mpls header if present */ - copy_mpls_hdr (to_b, org_from_b); - rem -= len; fo += len; } + + return IP_FRAG_ERROR_NONE; } void @@ -322,19 +274,19 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, pi0 = from[0]; from += 1; n_left_from -= 1; - error0 = IP_FRAG_ERROR_NONE; p0 = vlib_get_buffer (vm, pi0); + u16 mtu = vnet_buffer (p0)->ip_frag.mtu; if (is_ip6) - ip6_frag_do_fragment (vm, pi0, &buffer, &error0); + error0 = ip6_frag_do_fragment (vm, pi0, mtu, 0, &buffer); else - ip4_frag_do_fragment (vm, pi0, &buffer, &error0); + error0 = ip4_frag_do_fragment (vm, pi0, mtu, 0, &buffer); if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) { ip_frag_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->mtu = vnet_buffer (p0)->ip_frag.mtu; + tr->mtu = mtu; tr->ipv6 = is_ip6 ? 1 : 0; tr->n_fragments = vec_len (buffer); tr->next = vnet_buffer (p0)->ip_frag.next_index; @@ -345,20 +297,13 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable, ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, vnet_buffer (p0)->ip_frag.mtu); - next0 = IP4_FRAG_NEXT_ICMP_ERROR; + next0 = IP_FRAG_NEXT_ICMP_ERROR; } else { - if (is_ip6) - next0 = - (error0 == - IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)-> - ip_frag.next_index : IP6_FRAG_NEXT_DROP; - else - next0 = - (error0 == - IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)-> - ip_frag.next_index : IP4_FRAG_NEXT_DROP; + next0 = (error0 == IP_FRAG_ERROR_NONE ? + vnet_buffer (p0)->ip_frag.next_index : + IP_FRAG_NEXT_DROP); } if (error0 == IP_FRAG_ERROR_NONE) @@ -431,18 +376,20 @@ ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* * Fragments the packet given in from_bi. Fragments are returned in the buffer vector. * Caller must ensure the original packet is freed. + * from_bi: current pointer must point to IPv6 header */ -void -ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, - ip_frag_error_t * error) +ip_frag_error_t +ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, + u16 l2unfragmentablesize, u32 ** buffer) { vlib_buffer_t *from_b; ip6_header_t *ip6; - u16 mtu, len, max, rem, ip_frag_id; + u16 len, max, rem, ip_frag_id; + u8 *org_from_packet; from_b = vlib_get_buffer (vm, from_bi); - mtu = vnet_buffer (from_b)->ip_frag.mtu; - ip6 = (ip6_header_t *) vlib_buffer_get_current (from_b); + org_from_packet = vlib_buffer_get_current (from_b); + ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize; rem = clib_net_to_host_u16 (ip6->payload_length); max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct?? @@ -450,21 +397,20 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, if (rem > (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t))) { - *error = IP_FRAG_ERROR_MALFORMED; - return; + return IP_FRAG_ERROR_MALFORMED; } /* TODO: Look through header chain for fragmentation header */ if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { - *error = IP_FRAG_ERROR_MALFORMED; - return; + return IP_FRAG_ERROR_MALFORMED; } u8 *from_data = (void *) (ip6 + 1); vlib_buffer_t *org_from_b = from_b; u16 fo = 0; - u16 left_in_from_buffer = from_b->current_length - sizeof (ip6_header_t); + u16 left_in_from_buffer = + from_b->current_length - (l2unfragmentablesize + sizeof (ip6_header_t)); u16 ptr = 0; ip_frag_id = ++running_fragment_id; // Fix @@ -485,14 +431,14 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, len &= ~0x7; if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0) { - *error = IP_FRAG_ERROR_MEMORY; - return; + return IP_FRAG_ERROR_MEMORY; } vec_add1 (*buffer, to_bi); frag_set_sw_if_index (to_b, org_from_b); /* Copy ip6 header */ - clib_memcpy_fast (to_b->data, ip6, sizeof (ip6_header_t)); + clib_memcpy_fast (to_b->data, org_from_packet, + l2unfragmentablesize + sizeof (ip6_header_t)); to_ip6 = vlib_buffer_get_current (to_b); to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1); to_data = (void *) (to_frag_hdr + 1); @@ -530,8 +476,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, /* Move buffer */ if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT)) { - *error = IP_FRAG_ERROR_MALFORMED; - return; + return IP_FRAG_ERROR_MALFORMED; } from_b = vlib_get_buffer (vm, from_b->next_buffer); from_data = (u8 *) vlib_buffer_get_current (from_b); @@ -551,12 +496,11 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, to_frag_hdr->next_hdr = ip6->protocol; to_frag_hdr->rsv = 0; - /* Copy mpls header if present */ - copy_mpls_hdr (to_b, org_from_b); - rem -= len; fo += len; } + + return IP_FRAG_ERROR_NONE; } static char *ip4_frag_error_strings[] = { @@ -576,15 +520,14 @@ VLIB_REGISTER_NODE (ip4_frag_node) = { .n_errors = IP_FRAG_N_ERROR, .error_strings = ip4_frag_error_strings, - .n_next_nodes = IP4_FRAG_N_NEXT, + .n_next_nodes = IP_FRAG_N_NEXT, .next_nodes = { - [IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite", - [IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN] = "ip4-midchain", - [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", - [IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output", - [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [IP4_FRAG_NEXT_DROP] = "ip4-drop" + [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite", + [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain", + [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [IP_FRAG_NEXT_DROP] = "ip4-drop" }, }; /* *INDENT-ON* */ @@ -600,14 +543,14 @@ VLIB_REGISTER_NODE (ip6_frag_node) = { .n_errors = IP_FRAG_N_ERROR, .error_strings = ip4_frag_error_strings, - .n_next_nodes = IP6_FRAG_N_NEXT, + .n_next_nodes = IP_FRAG_N_NEXT, .next_nodes = { - [IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite", - [IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN] = "ip6-midchain", - [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", - [IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output", - [IP6_FRAG_NEXT_DROP] = "ip6-drop" + [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite", + [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain", + [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop", + [IP_FRAG_NEXT_DROP] = "ip6-drop" }, }; /* *INDENT-ON* */ diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h index ce4236b8465..86462e6c7d2 100644 --- a/src/vnet/ip/ip_frag.h +++ b/src/vnet/ip/ip_frag.h @@ -39,7 +39,6 @@ #define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header #define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header -#define IP_FRAG_FLAG_MPLS_HEADER 0x04 //Encapsulating MPLS header #define IP4_FRAG_NODE_NAME "ip4-frag" #define IP6_FRAG_NODE_NAME "ip6-frag" @@ -49,26 +48,14 @@ extern vlib_node_registration_t ip6_frag_node; typedef enum { - IP4_FRAG_NEXT_IP4_REWRITE, - IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN, - IP4_FRAG_NEXT_IP4_LOOKUP, - IP4_FRAG_NEXT_IP6_LOOKUP, - IP4_FRAG_NEXT_MPLS_OUTPUT, - IP4_FRAG_NEXT_ICMP_ERROR, - IP4_FRAG_NEXT_DROP, - IP4_FRAG_N_NEXT -} ip4_frag_next_t; - -typedef enum -{ - IP6_FRAG_NEXT_IP4_LOOKUP, - IP6_FRAG_NEXT_IP6_LOOKUP, - IP6_FRAG_NEXT_IP6_REWRITE, - IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN, - IP6_FRAG_NEXT_MPLS_OUTPUT, - IP6_FRAG_NEXT_DROP, - IP6_FRAG_N_NEXT -} ip6_frag_next_t; + IP_FRAG_NEXT_IP_REWRITE, + IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN, + IP_FRAG_NEXT_IP4_LOOKUP, + IP_FRAG_NEXT_IP6_LOOKUP, + IP_FRAG_NEXT_ICMP_ERROR, + IP_FRAG_NEXT_DROP, + IP_FRAG_N_NEXT +} ip_frag_next_t; #define foreach_ip_frag_error \ /* Must be first. */ \ @@ -91,12 +78,16 @@ typedef enum void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu, u8 next_index, u8 flags); -void -ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer, - ip_frag_error_t * error); -void -ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer, - ip_frag_error_t * error); + +extern ip_frag_error_t ip4_frag_do_fragment (vlib_main_t * vm, + u32 from_bi, + u16 mtu, + u16 encapsize, u32 ** buffer); +extern ip_frag_error_t ip6_frag_do_fragment (vlib_main_t * vm, + u32 from_bi, + u16 mtu, + u16 encapsize, u32 ** buffer); + #endif /* ifndef IP_FRAG_H */ /* diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index 5ede22aa410..247f531df9f 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -34,8 +34,7 @@ typedef enum { #define foreach_mpls_output_next \ _(DROP, "error-drop") \ -_(IP4_FRAG, "ip4-frag") \ -_(IP6_FRAG, "ip6-frag") +_(FRAG, "mpls-frag") typedef enum { #define _(s,n) MPLS_OUTPUT_NEXT_##s, @@ -58,31 +57,6 @@ format_mpls_output_trace (u8 * s, va_list * args) return s; } -/* - * Save the mpls header length and adjust the current to ip header - */ -static inline u32 -set_mpls_fragmentation(vlib_buffer_t * p0, ip_adjacency_t * adj0) -{ - u32 next0; - - /* advance size of (all) mpls header to ip header before fragmenting */ - /* save the current pointing to first mpls header. */ - vnet_buffer (p0)->mpls.mpls_hdr_length = vnet_buffer(p0)->l3_hdr_offset - p0->current_data; - vlib_buffer_advance (p0, vnet_buffer (p0)->mpls.mpls_hdr_length); - - /* IP fragmentation */ - ip_frag_set_vnet_buffer (p0, adj0[0].rewrite_header.max_l3_packet_bytes, - IP4_FRAG_NEXT_MPLS_OUTPUT, - ((vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4) ? IP_FRAG_FLAG_IP4_HEADER:IP_FRAG_FLAG_IP6_HEADER)); - - /* Tell ip_frag to retain certain mpls parameters after fragmentation of mpls packet */ - vnet_buffer (p0)->ip_frag.flags = (vnet_buffer (p0)->ip_frag.flags | IP_FRAG_FLAG_MPLS_HEADER); - next0 = (vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4)? MPLS_OUTPUT_NEXT_IP4_FRAG:MPLS_OUTPUT_NEXT_IP6_FRAG; - - return next0; -} - static inline uword mpls_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -196,7 +170,7 @@ mpls_output_inline (vlib_main_t * vm, else { error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = set_mpls_fragmentation (p0, adj0); + next0 = MPLS_OUTPUT_NEXT_FRAG; vlib_node_increment_counter (vm, mpls_output_node.index, MPLS_ERROR_PKTS_NEED_FRAG, 1); @@ -219,7 +193,7 @@ mpls_output_inline (vlib_main_t * vm, else { error1 = IP4_ERROR_MTU_EXCEEDED; - next1 = set_mpls_fragmentation (p1, adj1); + next1 = MPLS_OUTPUT_NEXT_FRAG; vlib_node_increment_counter (vm, mpls_output_node.index, MPLS_ERROR_PKTS_NEED_FRAG, 1); @@ -308,7 +282,7 @@ mpls_output_inline (vlib_main_t * vm, else { error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = set_mpls_fragmentation (p0, adj0); + next0 = MPLS_OUTPUT_NEXT_FRAG; vlib_node_increment_counter (vm, mpls_output_node.index, MPLS_ERROR_PKTS_NEED_FRAG, 1); @@ -371,11 +345,9 @@ VLIB_REGISTER_NODE (mpls_output_node) = { .n_next_nodes = MPLS_OUTPUT_N_NEXT, .next_nodes = { -#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n, - foreach_mpls_output_next -#undef _ - }, - + [MPLS_OUTPUT_NEXT_DROP] = "mpls-drop", + [MPLS_OUTPUT_NEXT_FRAG] = "mpls-frag", + }, .format_trace = format_mpls_output_trace, }; @@ -390,12 +362,184 @@ VLIB_REGISTER_NODE (mpls_midchain_node) = { .name = "mpls-midchain", .vector_size = sizeof (u32), - .format_trace = format_mpls_output_trace, + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, .sibling_of = "mpls-output", + .format_trace = format_mpls_output_trace, }; -/** +static char *mpls_frag_error_strings[] = { +#define _(sym,string) string, + foreach_ip_frag_error +#undef _ +}; + +typedef struct mpls_frag_trace_t_ +{ + u16 pkt_size; + u16 mtu; +} mpls_frag_trace_t; + +typedef enum +{ + MPLS_FRAG_NEXT_REWRITE, + MPLS_FRAG_NEXT_REWRITE_MIDCHAIN, + MPLS_FRAG_NEXT_ICMP_ERROR, + MPLS_FRAG_NEXT_DROP, + MPLS_FRAG_N_NEXT, +} mpls_frag_next_t; + +static uword +mpls_frag (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags; + vlib_node_runtime_t * error_node; + + error_node = vlib_node_get_runtime (vm, mpls_output_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + frags = NULL; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + mpls_frag_next_t next0; + u32 pi0, adj_index0; + ip_frag_error_t error0 = IP_FRAG_ERROR_NONE; + i16 encap_size; + u8 is_ip4; + + pi0 = to_next[0] = from[0]; + p0 = vlib_get_buffer (vm, pi0); + from += 1; + n_left_from -= 1; + is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4; + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj0 = adj_get(adj_index0); + + /* the size of the MPLS stack */ + encap_size = vnet_buffer(p0)->l3_hdr_offset - p0->current_data; + + /* IP fragmentation */ + if (is_ip4) + error0 = ip4_frag_do_fragment (vm, pi0, + adj0->rewrite_header.max_l3_packet_bytes, + encap_size, &frags); + else + error0 = ip6_frag_do_fragment (vm, pi0, + adj0->rewrite_header.max_l3_packet_bytes, + encap_size, &frags); + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_frag_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->mtu = adj0->rewrite_header.max_l3_packet_bytes; + tr->pkt_size = vlib_buffer_length_in_chain(vm, p0); + } + + if (PREDICT_TRUE(error0 == IP_FRAG_ERROR_NONE)) + { + /* Free original buffer chain */ + vlib_buffer_free_one (vm, pi0); /* Free original packet */ + next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ? + MPLS_FRAG_NEXT_REWRITE_MIDCHAIN : + MPLS_FRAG_NEXT_REWRITE); + } + else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET) + { + icmp4_error_set_vnet_buffer ( + p0, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + vnet_buffer (p0)->ip_frag.mtu); + next0 = MPLS_FRAG_NEXT_ICMP_ERROR; + } + else + { + vlib_error_count (vm, next_index, error0, 1); + vec_add1 (frags, pi0); /* Get rid of the original buffer */ + next0 = MPLS_FRAG_NEXT_DROP; + } + + /* Send fragments that were added in the frame */ + u32 *frag_from, frag_left; + + frag_from = frags; + frag_left = vec_len (frags); + + while (frag_left > 0) + { + while (frag_left > 0 && n_left_to_next > 0) + { + u32 i; + i = to_next[0] = frag_from[0]; + frag_from += 1; + frag_left -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, i); + p0->error = error_node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, i, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + vec_reset_length (frags); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vec_free (frags); + + return frame->n_vectors; +} + +static u8 * +format_mpls_frag_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_frag_trace_t *t = va_arg (*args, mpls_frag_trace_t *); + + s = format (s, "mtu:%d pkt-size:%d", t->mtu, t->pkt_size); + return s; +} + +VLIB_REGISTER_NODE (mpls_frag_node) = { + .function = mpls_frag, + .name = "mpls-frag", + .vector_size = sizeof (u32), + .format_trace = format_mpls_frag_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = IP_FRAG_N_ERROR, + .error_strings = mpls_frag_error_strings, + + .n_next_nodes = MPLS_FRAG_N_NEXT, + .next_nodes = { + [MPLS_FRAG_NEXT_REWRITE] = "mpls-output", + [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain", + [MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [MPLS_FRAG_NEXT_DROP] = "mpls-drop" + }, +}; + +/* * @brief Next index values from the MPLS incomplete adj node */ #define foreach_mpls_adj_incomplete_next \ |