aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/plugins/map/ip4_map.c268
-rw-r--r--src/plugins/map/ip4_map_t.c6
-rw-r--r--src/plugins/map/ip6_map.c8
-rw-r--r--src/plugins/map/ip6_map_t.c6
-rw-r--r--src/plugins/map/test/test_map.py74
-rw-r--r--src/vnet/ip/ip4_forward.c16
-rw-r--r--src/vnet/ip/ip6_forward.c15
-rw-r--r--src/vnet/ip/ip_frag.c181
-rw-r--r--src/vnet/ip/ip_frag.h45
-rw-r--r--src/vnet/mpls/mpls_output.c218
-rw-r--r--test/test_mpls.py39
-rw-r--r--test/vpp_interface.py12
12 files changed, 476 insertions, 412 deletions
diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c
index ad94907e499..f2a00907a57 100644
--- a/src/plugins/map/ip4_map.c
+++ b/src/plugins/map/ip4_map.c
@@ -26,8 +26,6 @@ enum ip4_map_next_e
#ifdef MAP_SKIP_IP6_LOOKUP
IP4_MAP_NEXT_IP6_REWRITE,
#endif
- IP4_MAP_NEXT_IP4_FRAGMENT,
- IP4_MAP_NEXT_IP6_FRAGMENT,
IP4_MAP_NEXT_ICMP_ERROR,
IP4_MAP_NEXT_DROP,
IP4_MAP_N_NEXT,
@@ -117,17 +115,26 @@ ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
}
static u32
-ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
+ip4_map_fragment (vlib_main_t * vm, u32 bi, u16 mtu, bool df, u32 ** buffers,
+ u8 * error)
{
map_main_t *mm = &map_main;
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
if (mm->frag_inner)
{
- // TODO: Fix inner fragmentation after removed inner support from ip-frag.
- ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu,
- IP4_FRAG_NEXT_IP6_LOOKUP,
- IP_FRAG_FLAG_IP6_HEADER);
- return (IP4_MAP_NEXT_IP4_FRAGMENT);
+ /* IPv4 fragmented packets inside of IPv6 */
+ ip4_frag_do_fragment (vm, bi, mtu, sizeof (ip6_header_t), buffers);
+
+ /* Fixup */
+ u32 *i;
+ vec_foreach (i, *buffers)
+ {
+ vlib_buffer_t *p = vlib_get_buffer (vm, *i);
+ ip6_header_t *ip6 = vlib_buffer_get_current (p);
+ ip6->payload_length =
+ clib_host_to_net_u16 (p->current_length - sizeof (ip6_header_t));
+ }
}
else
{
@@ -140,10 +147,11 @@ ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
*error = MAP_ERROR_DF_SET;
return (IP4_MAP_NEXT_ICMP_ERROR);
}
- ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
- IP_FRAG_FLAG_IP6_HEADER);
- return (IP4_MAP_NEXT_IP6_FRAGMENT);
+
+ /* Create IPv6 fragments here */
+ ip6_frag_do_fragment (vm, bi, mtu, 0, buffers);
}
+ return (IP4_MAP_NEXT_IP6_LOOKUP);
}
/*
@@ -165,189 +173,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
while (n_left_from > 0)
{
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- /* Dual loop */
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 pi0, pi1;
- vlib_buffer_t *p0, *p1;
- map_domain_t *d0, *d1;
- u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE;
- ip4_header_t *ip40, *ip41;
- u16 port0 = 0, port1 = 0;
- ip6_header_t *ip6h0, *ip6h1;
- u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
- u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 =
- IP4_MAP_NEXT_IP6_LOOKUP;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, STORE);
- vlib_prefetch_buffer_header (p3, STORE);
- /* IPv4 + 8 = 28. possibly plus -40 */
- CLIB_PREFETCH (p2->data - 40, 68, STORE);
- CLIB_PREFETCH (p3->data - 40, 68, STORE);
- }
-
- pi0 = to_next[0] = from[0];
- pi1 = to_next[1] = from[1];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- p0 = vlib_get_buffer (vm, pi0);
- p1 = vlib_get_buffer (vm, pi1);
- ip40 = vlib_buffer_get_current (p0);
- ip41 = vlib_buffer_get_current (p1);
- d0 =
- ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
- &error0);
- d1 =
- ip4_map_get_domain (&ip41->dst_address, &map_domain_index1,
- &error1);
-
- /*
- * Shared IPv4 address
- */
- port0 = ip4_map_port_and_security_check (d0, p0, &error0);
- port1 = ip4_map_port_and_security_check (d1, p1, &error1);
-
- /* Decrement IPv4 TTL */
- ip4_map_decrement_ttl (ip40, &error0);
- ip4_map_decrement_ttl (ip41, &error1);
- bool df0 =
- ip40->flags_and_fragment_offset &
- clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
- bool df1 =
- ip41->flags_and_fragment_offset &
- clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
-
- /* MAP calc */
- u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
- u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32);
- u16 dp40 = clib_net_to_host_u16 (port0);
- u16 dp41 = clib_net_to_host_u16 (port1);
- u64 dal60 = map_get_pfx (d0, da40, dp40);
- u64 dal61 = map_get_pfx (d1, da41, dp41);
- u64 dar60 = map_get_sfx (d0, da40, dp40);
- u64 dar61 = map_get_sfx (d1, da41, dp41);
- if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
- error0 = MAP_ERROR_NO_BINDING;
- if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE)
- error1 = MAP_ERROR_NO_BINDING;
-
- /* construct ipv6 header */
- vlib_buffer_advance (p0, -sizeof (ip6_header_t));
- vlib_buffer_advance (p1, -sizeof (ip6_header_t));
- ip6h0 = vlib_buffer_get_current (p0);
- ip6h1 = vlib_buffer_get_current (p1);
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
-
- ip6h0->ip_version_traffic_class_and_flow_label =
- ip4_map_vtcfl (ip40, p0);
- ip6h1->ip_version_traffic_class_and_flow_label =
- ip4_map_vtcfl (ip41, p1);
- ip6h0->payload_length = ip40->length;
- ip6h1->payload_length = ip41->length;
- ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
- ip6h1->protocol = IP_PROTOCOL_IP_IN_IP;
- ip6h0->hop_limit = 0x40;
- ip6h1->hop_limit = 0x40;
- ip6h0->src_address = d0->ip6_src;
- ip6h1->src_address = d1->ip6_src;
- ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
- ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
- ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61);
- ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61);
-
- /*
- * Determine next node. Can be one of:
- * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
- */
- if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
- {
- if (PREDICT_FALSE
- (d0->mtu
- && (clib_net_to_host_u16 (ip6h0->payload_length) +
- sizeof (*ip6h0) > d0->mtu)))
- {
- next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
- }
- else
- {
- next0 =
- ip4_map_ip6_lookup_bypass (p0,
- ip40) ?
- IP4_MAP_NEXT_IP6_REWRITE : next0;
- vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
- thread_index,
- map_domain_index0, 1,
- clib_net_to_host_u16
- (ip6h0->payload_length) +
- 40);
- }
- }
- else
- {
- next0 = IP4_MAP_NEXT_DROP;
- }
-
- /*
- * Determine next node. Can be one of:
- * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
- */
- if (PREDICT_TRUE (error1 == MAP_ERROR_NONE))
- {
- if (PREDICT_FALSE
- (d1->mtu
- && (clib_net_to_host_u16 (ip6h1->payload_length) +
- sizeof (*ip6h1) > d1->mtu)))
- {
- next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1);
- }
- else
- {
- next1 =
- ip4_map_ip6_lookup_bypass (p1,
- ip41) ?
- IP4_MAP_NEXT_IP6_REWRITE : next1;
- vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
- thread_index,
- map_domain_index1, 1,
- clib_net_to_host_u16
- (ip6h1->payload_length) +
- 40);
- }
- }
- else
- {
- next1 = IP4_MAP_NEXT_DROP;
- }
-
- if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- map_add_trace (vm, node, p0, map_domain_index0, port0);
- }
- if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
- {
- map_add_trace (vm, node, p1, map_domain_index1, port0);
- }
-
- p0->error = error_node->errors[error0];
- p1->error = error_node->errors[error1];
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
- n_left_to_next, pi0, pi1, next0,
- next1);
- }
-
while (n_left_from > 0 && n_left_to_next > 0)
{
u32 pi0;
@@ -359,12 +184,13 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
ip6_header_t *ip6h0;
u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
u32 map_domain_index0 = ~0;
+ u32 *buffer0 = 0;
+ bool free_original_buffer0 = false;
+ u32 *frag_from0, frag_left0;
pi0 = to_next[0] = from[0];
from += 1;
n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
p0 = vlib_get_buffer (vm, pi0);
ip40 = vlib_buffer_get_current (p0);
@@ -413,7 +239,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
/*
* Determine next node. Can be one of:
- * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
+ * ip6-lookup, ip6-rewrite, error-drop
*/
if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
{
@@ -422,7 +248,14 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
&& (clib_net_to_host_u16 (ip6h0->payload_length) +
sizeof (*ip6h0) > d0->mtu)))
{
- next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
+ next0 =
+ ip4_map_fragment (vm, pi0, d0->mtu, df0, &buffer0,
+ &error0);
+
+ if (error0 == MAP_ERROR_NONE)
+ {
+ free_original_buffer0 = true;
+ }
}
else
{
@@ -450,8 +283,41 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
p0->error = error_node->errors[error0];
exit:
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, pi0, next0);
+ /* Send fragments that were added in the frame */
+ if (free_original_buffer0)
+ {
+ vlib_buffer_free_one (vm, pi0); /* Free original packet */
+ }
+ else
+ {
+ vec_add1 (buffer0, pi0);
+ }
+
+ frag_from0 = buffer0;
+ frag_left0 = vec_len (buffer0);
+
+ while (frag_left0 > 0)
+ {
+ while (frag_left0 > 0 && n_left_to_next > 0)
+ {
+ u32 i0;
+ i0 = to_next[0] = frag_from0[0];
+ frag_from0 += 1;
+ frag_left0 -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer (vm, i0)->error =
+ error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ i0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (buffer0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
@@ -491,8 +357,6 @@ VLIB_REGISTER_NODE(ip4_map_node) = {
#ifdef MAP_SKIP_IP6_LOOKUP
[IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
#endif
- [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
- [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
[IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
[IP4_MAP_NEXT_DROP] = "error-drop",
},
diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c
index 621fb0615dc..c254efc78b9 100644
--- a/src/plugins/map/ip4_map_t.c
+++ b/src/plugins/map/ip4_map_t.c
@@ -168,7 +168,7 @@ ip4_map_t_icmp (vlib_main_t * vm,
if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
{
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
- vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP6_LOOKUP;
next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
}
err0:
@@ -287,7 +287,7 @@ ip4_map_t_fragmented (vlib_main_t * vm,
{
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
- IP6_FRAG_NEXT_IP6_LOOKUP;
+ IP_FRAG_NEXT_IP6_LOOKUP;
next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG;
}
}
@@ -453,7 +453,7 @@ ip4_map_t_tcp_udp (vlib_main_t * vm,
//Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
- IP6_FRAG_NEXT_IP6_LOOKUP;
+ IP_FRAG_NEXT_IP6_LOOKUP;
next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
}
}
diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c
index 96f81efc1cb..47958f92a38 100644
--- a/src/plugins/map/ip6_map.c
+++ b/src/plugins/map/ip6_map.c
@@ -314,7 +314,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
vnet_buffer (p0)->ip_frag.flags = 0;
vnet_buffer (p0)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
}
@@ -346,7 +346,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
vnet_buffer (p1)->ip_frag.flags = 0;
vnet_buffer (p1)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
vnet_buffer (p1)->ip_frag.mtu = d1->mtu;
next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
}
@@ -497,7 +497,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
vnet_buffer (p0)->ip_frag.flags = 0;
vnet_buffer (p0)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
}
@@ -622,7 +622,7 @@ ip6_map_post_ip4_reass (vlib_main_t * vm,
&& error0 == MAP_ERROR_NONE))
{
vnet_buffer (p0)->ip_frag.flags = 0;
- vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP4_LOOKUP;
vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT;
}
diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c
index 6e9c0d7640c..ef7b91349e5 100644
--- a/src/plugins/map/ip6_map_t.c
+++ b/src/plugins/map/ip6_map_t.c
@@ -169,7 +169,7 @@ ip6_map_t_icmp (vlib_main_t * vm,
{
// Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
- vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG;
}
err0:
@@ -288,7 +288,7 @@ ip6_map_t_fragmented (vlib_main_t * vm,
// Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
}
}
@@ -441,7 +441,7 @@ ip6_map_t_tcp_udp (vlib_main_t * vm,
// Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
}
}
diff --git a/src/plugins/map/test/test_map.py b/src/plugins/map/test/test_map.py
index a7e5f165576..c1fe05e1150 100644
--- a/src/plugins/map/test/test_map.py
+++ b/src/plugins/map/test/test_map.py
@@ -140,7 +140,7 @@ class TestMAP(VppTestCase):
IP(src=self.pg0.remote_ip4, dst=self.pg0.remote_ip4) /
UDP(sport=20000, dport=10000) /
Raw(b'\xa5' * 100))
- rx = self.send_and_expect(self.pg0, v4*1, self.pg0)
+ rx = self.send_and_expect(self.pg0, v4 * 4, self.pg0)
v4_reply = v4[1]
v4_reply.ttl -= 1
for p in rx:
@@ -154,7 +154,7 @@ class TestMAP(VppTestCase):
UDP(sport=20000, dport=10000) /
Raw(b'\xa5' * 100))
- self.send_and_assert_encapped_one(v4, "3000::1", map_translated_addr)
+ self.send_and_assert_encapped(v4 * 4, "3000::1", map_translated_addr)
#
# Verify reordered fragments are able to pass as well
@@ -294,6 +294,76 @@ class TestMAP(VppTestCase):
pre_res_route.remove_vpp_config()
self.vapi.ppcli("map params pre-resolve del ip6-nh 4001::1")
+ def test_map_e_inner_frag(self):
+ """ MAP-E Inner fragmentation """
+
+ #
+ # Add a route to the MAP-BR
+ #
+ map_br_pfx = "2001::"
+ map_br_pfx_len = 32
+ map_route = VppIpRoute(self,
+ map_br_pfx,
+ map_br_pfx_len,
+ [VppRoutePath(self.pg1.remote_ip6,
+ self.pg1.sw_if_index)])
+ map_route.add_vpp_config()
+
+ #
+ # Add a domain that maps from pg0 to pg1
+ #
+ map_dst = '2001::/32'
+ map_src = '3000::1/128'
+ client_pfx = '192.168.0.0/16'
+ map_translated_addr = '2001:0:101:7000:0:c0a8:101:7'
+ tag = 'MAP-E tag.'
+ self.vapi.map_add_domain(ip4_prefix=client_pfx,
+ ip6_prefix=map_dst,
+ ip6_src=map_src,
+ ea_bits_len=20,
+ psid_offset=4,
+ psid_length=4,
+ mtu=1000,
+ tag=tag)
+
+ # Enable MAP on interface.
+ self.vapi.map_if_enable_disable(is_enable=1,
+ sw_if_index=self.pg0.sw_if_index,
+ is_translation=0)
+
+ # Enable inner fragmentation
+ self.vapi.map_param_set_fragmentation(inner=1)
+
+ v4 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
+ IP(src=self.pg0.remote_ip4, dst='192.168.1.1') /
+ UDP(sport=20000, dport=10000) /
+ Raw(b'\xa5' * 1300))
+
+ self.pg_send(self.pg0, v4*1)
+ rx = self.pg1.get_capture(2)
+
+ frags = fragment_rfc791(v4[1], 1000)
+ frags[0].id = 0
+ frags[1].id = 0
+ frags[0].ttl -= 1
+ frags[1].ttl -= 1
+ frags[0].chksum = 0
+ frags[1].chksum = 0
+
+ v6_reply1 = (IPv6(src='3000::1', dst=map_translated_addr, hlim=63) /
+ frags[0])
+ v6_reply2 = (IPv6(src='3000::1', dst=map_translated_addr, hlim=63) /
+ frags[1])
+ rx[0][1].fl = 0
+ rx[1][1].fl = 0
+ rx[0][1][IP].id = 0
+ rx[1][1][IP].id = 0
+ rx[0][1][IP].chksum = 0
+ rx[1][1][IP].chksum = 0
+
+ self.validate(rx[0][1], v6_reply1)
+ self.validate(rx[1][1], v6_reply2)
+
def validate(self, rx, expected):
self.assertEqual(rx, expected.__class__(scapy.compat.raw(expected)))
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 1550b313915..44a681926b3 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -2293,8 +2293,8 @@ typedef enum
always_inline void
ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
- u16 adj_packet_bytes, bool df, u16 * next, u32 * error,
- u8 is_midchain)
+ u16 adj_packet_bytes, bool df, u16 * next,
+ u8 is_midchain, u32 * error)
{
if (packet_len > adj_packet_bytes)
{
@@ -2312,8 +2312,8 @@ ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
/* IP fragmentation */
ip_frag_set_vnet_buffer (b, adj_packet_bytes,
(is_midchain ?
- IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN :
- IP4_FRAG_NEXT_IP4_REWRITE), 0);
+ IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
+ IP_FRAG_NEXT_IP_REWRITE), 0);
*next = IP4_REWRITE_NEXT_FRAGMENT;
}
}
@@ -2486,12 +2486,12 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0, is_midchain);
+ next + 0, is_midchain, &error0);
ip4_mtu_check (b[1], ip1_len,
adj1[0].rewrite_header.max_l3_packet_bytes,
ip1->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 1, &error1, is_midchain);
+ next + 1, is_midchain, &error1);
if (is_mcast)
{
@@ -2660,7 +2660,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0, is_midchain);
+ next + 0, is_midchain, &error0);
if (is_mcast)
{
@@ -2758,7 +2758,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0, is_midchain);
+ next + 0, is_midchain, &error0);
if (is_mcast)
{
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 50de501fe0d..9656621c13b 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1652,7 +1652,7 @@ typedef enum
always_inline void
ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
u16 adj_packet_bytes, bool is_locally_generated,
- u32 * next, u32 * error)
+ u32 * next, u8 is_midchain, u32 * error)
{
if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
{
@@ -1660,7 +1660,9 @@ ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
{
/* IP fragmentation */
ip_frag_set_vnet_buffer (b, adj_packet_bytes,
- IP6_FRAG_NEXT_IP6_REWRITE, 0);
+ (is_midchain ?
+ IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
+ IP_FRAG_NEXT_IP_REWRITE), 0);
*next = IP6_REWRITE_NEXT_FRAGMENT;
*error = IP6_ERROR_MTU_EXCEEDED;
}
@@ -1840,10 +1842,12 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
ip6_mtu_check (p0, ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
- is_locally_originated0, &next0, &error0);
+ is_locally_originated0, &next0, is_midchain,
+ &error0);
ip6_mtu_check (p1, ip1_len,
adj1[0].rewrite_header.max_l3_packet_bytes,
- is_locally_originated1, &next1, &error1);
+ is_locally_originated1, &next1, is_midchain,
+ &error1);
/* Don't adjust the buffer for hop count issue; icmp-error node
* wants to see the IP header */
@@ -2011,7 +2015,8 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
ip6_mtu_check (p0, ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
- is_locally_originated0, &next0, &error0);
+ is_locally_originated0, &next0, is_midchain,
+ &error0);
/* Don't adjust the buffer for hop count issue; icmp-error node
* wants to see the IP header */
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index 54efb63c986..9aa8777514f 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -23,26 +23,6 @@
#include <vnet/ip/ip.h>
-/*
- * Copy the mpls header if present.
- * The current is pointing to the ip header.
- * Adjust the buffer and point to the mpls headers on these fragments
- * before sending the packet back to mpls-output node.
- */
-static inline void
-copy_mpls_hdr (vlib_buffer_t * to_b, vlib_buffer_t * from_b)
-{
- if ((vnet_buffer (from_b)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
- {
- u8 mpls_hdr_length = vnet_buffer (from_b)->mpls.mpls_hdr_length;
- u8 *org_from_mpls_packet =
- from_b->data + (from_b->current_data - mpls_hdr_length);
- clib_memcpy_fast ((to_b->data - mpls_hdr_length), org_from_mpls_packet,
- mpls_hdr_length);
- vlib_buffer_advance (to_b, -vnet_buffer (to_b)->mpls.mpls_hdr_length);
- }
-}
-
typedef struct
{
u8 ipv6;
@@ -87,14 +67,6 @@ frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from)
vnet_buffer2 (to)->qos = vnet_buffer2 (from)->qos;
to->flags |= VNET_BUFFER_F_QOS_DATA_VALID;
}
-
- /* Copy mpls opaque data */
- if ((vnet_buffer (from)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
- {
- vnet_buffer (to)->mpls.pyld_proto = vnet_buffer (from)->mpls.pyld_proto;
- vnet_buffer (to)->mpls.mpls_hdr_length =
- vnet_buffer (from)->mpls.mpls_hdr_length;
- }
}
static vlib_buffer_t *
@@ -116,20 +88,20 @@ frag_buffer_alloc (vlib_buffer_t * org_b, u32 * bi)
* but does not generate buffer chains. I.e. a fragment is always
* contained with in a single buffer and limited to the max buffer
* size.
+ * from_bi: current pointer must point to IPv4 header
*/
-void
-ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
- ip_frag_error_t * error)
+ip_frag_error_t
+ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
+ u16 l2unfragmentablesize, u32 ** buffer)
{
vlib_buffer_t *from_b;
ip4_header_t *ip4;
- u16 mtu, len, max, rem, ip_frag_id, ip_frag_offset;
+ u16 len, max, rem, ip_frag_id, ip_frag_offset;
u8 *org_from_packet, more;
from_b = vlib_get_buffer (vm, from_bi);
- mtu = vnet_buffer (from_b)->ip_frag.mtu;
org_from_packet = vlib_buffer_get_current (from_b);
- ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b);
+ ip4 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
max =
@@ -139,21 +111,18 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t)))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
if (mtu < sizeof (ip4_header_t))
{
- *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
- return;
+ return IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
}
if (ip4->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))
{
- *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
- return;
+ return IP_FRAG_ERROR_DONT_FRAGMENT_SET;
}
if (ip4_is_fragment (ip4))
@@ -174,7 +143,8 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
u8 *from_data = (void *) (ip4 + 1);
vlib_buffer_t *org_from_b = from_b;
u16 fo = 0;
- u16 left_in_from_buffer = from_b->current_length - sizeof (ip4_header_t);
+ u16 left_in_from_buffer =
+ from_b->current_length - (l2unfragmentablesize + sizeof (ip4_header_t));
u16 ptr = 0;
/* Do the actual fragmentation */
@@ -190,17 +160,19 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
len &= ~0x7;
if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
{
- *error = IP_FRAG_ERROR_MEMORY;
- return;
+ return IP_FRAG_ERROR_MEMORY;
}
vec_add1 (*buffer, to_bi);
frag_set_sw_if_index (to_b, org_from_b);
/* Copy ip4 header */
- clib_memcpy_fast (to_b->data, org_from_packet, sizeof (ip4_header_t));
- to_ip4 = vlib_buffer_get_current (to_b);
+ to_data = vlib_buffer_get_current (to_b);
+ clib_memcpy_fast (to_data, org_from_packet,
+ l2unfragmentablesize + sizeof (ip4_header_t));
+ to_ip4 = (ip4_header_t *) (to_data + l2unfragmentablesize);
to_data = (void *) (to_ip4 + 1);
vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data;
+ vlib_buffer_copy_trace_flag (vm, from_b, to_bi);
to_b->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
if (from_b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID)
@@ -232,8 +204,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
/* Move buffer */
if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
from_b = vlib_get_buffer (vm, from_b->next_buffer);
from_data = (u8 *) vlib_buffer_get_current (from_b);
@@ -242,8 +213,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
to_ptr += bytes_to_copy;
}
- to_b->current_length = len + sizeof (ip4_header_t);
to_b->flags |= VNET_BUFFER_F_IS_IP4;
+ to_b->current_length =
+ len + sizeof (ip4_header_t) + l2unfragmentablesize;
to_ip4->fragment_id = ip_frag_id;
to_ip4->flags_and_fragment_offset =
@@ -256,31 +228,11 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
/* we've just done the IP checksum .. */
to_b->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
- if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
- {
- /* Encapsulating ipv4 header */
- ip4_header_t *encap_header4 =
- (ip4_header_t *) vlib_buffer_get_current (to_b);
- encap_header4->length = clib_host_to_net_u16 (to_b->current_length);
- encap_header4->checksum = ip4_header_checksum (encap_header4);
- }
- else if (vnet_buffer (org_from_b)->
- ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
- {
- /* Encapsulating ipv6 header */
- ip6_header_t *encap_header6 =
- (ip6_header_t *) vlib_buffer_get_current (to_b);
- encap_header6->payload_length =
- clib_host_to_net_u16 (to_b->current_length -
- sizeof (*encap_header6));
- }
-
- /* Copy mpls header if present */
- copy_mpls_hdr (to_b, org_from_b);
-
rem -= len;
fo += len;
}
+
+ return IP_FRAG_ERROR_NONE;
}
void
@@ -322,19 +274,19 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
pi0 = from[0];
from += 1;
n_left_from -= 1;
- error0 = IP_FRAG_ERROR_NONE;
p0 = vlib_get_buffer (vm, pi0);
+ u16 mtu = vnet_buffer (p0)->ip_frag.mtu;
if (is_ip6)
- ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
+ error0 = ip6_frag_do_fragment (vm, pi0, mtu, 0, &buffer);
else
- ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
+ error0 = ip4_frag_do_fragment (vm, pi0, mtu, 0, &buffer);
if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
{
ip_frag_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
+ tr->mtu = mtu;
tr->ipv6 = is_ip6 ? 1 : 0;
tr->n_fragments = vec_len (buffer);
tr->next = vnet_buffer (p0)->ip_frag.next_index;
@@ -345,20 +297,13 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
vnet_buffer (p0)->ip_frag.mtu);
- next0 = IP4_FRAG_NEXT_ICMP_ERROR;
+ next0 = IP_FRAG_NEXT_ICMP_ERROR;
}
else
{
- if (is_ip6)
- next0 =
- (error0 ==
- IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
- ip_frag.next_index : IP6_FRAG_NEXT_DROP;
- else
- next0 =
- (error0 ==
- IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
- ip_frag.next_index : IP4_FRAG_NEXT_DROP;
+ next0 = (error0 == IP_FRAG_ERROR_NONE ?
+ vnet_buffer (p0)->ip_frag.next_index :
+ IP_FRAG_NEXT_DROP);
}
if (error0 == IP_FRAG_ERROR_NONE)
@@ -431,18 +376,20 @@ ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
/*
* Fragments the packet given in from_bi. Fragments are returned in the buffer vector.
* Caller must ensure the original packet is freed.
+ * from_bi: current pointer must point to IPv6 header
*/
-void
-ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
- ip_frag_error_t * error)
+ip_frag_error_t
+ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
+ u16 l2unfragmentablesize, u32 ** buffer)
{
vlib_buffer_t *from_b;
ip6_header_t *ip6;
- u16 mtu, len, max, rem, ip_frag_id;
+ u16 len, max, rem, ip_frag_id;
+ u8 *org_from_packet;
from_b = vlib_get_buffer (vm, from_bi);
- mtu = vnet_buffer (from_b)->ip_frag.mtu;
- ip6 = (ip6_header_t *) vlib_buffer_get_current (from_b);
+ org_from_packet = vlib_buffer_get_current (from_b);
+ ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
rem = clib_net_to_host_u16 (ip6->payload_length);
max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct??
@@ -450,21 +397,20 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
/* TODO: Look through header chain for fragmentation header */
if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
u8 *from_data = (void *) (ip6 + 1);
vlib_buffer_t *org_from_b = from_b;
u16 fo = 0;
- u16 left_in_from_buffer = from_b->current_length - sizeof (ip6_header_t);
+ u16 left_in_from_buffer =
+ from_b->current_length - (l2unfragmentablesize + sizeof (ip6_header_t));
u16 ptr = 0;
ip_frag_id = ++running_fragment_id; // Fix
@@ -485,14 +431,14 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
len &= ~0x7;
if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
{
- *error = IP_FRAG_ERROR_MEMORY;
- return;
+ return IP_FRAG_ERROR_MEMORY;
}
vec_add1 (*buffer, to_bi);
frag_set_sw_if_index (to_b, org_from_b);
/* Copy ip6 header */
- clib_memcpy_fast (to_b->data, ip6, sizeof (ip6_header_t));
+ clib_memcpy_fast (to_b->data, org_from_packet,
+ l2unfragmentablesize + sizeof (ip6_header_t));
to_ip6 = vlib_buffer_get_current (to_b);
to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
to_data = (void *) (to_frag_hdr + 1);
@@ -530,8 +476,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
/* Move buffer */
if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
from_b = vlib_get_buffer (vm, from_b->next_buffer);
from_data = (u8 *) vlib_buffer_get_current (from_b);
@@ -551,12 +496,11 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
to_frag_hdr->next_hdr = ip6->protocol;
to_frag_hdr->rsv = 0;
- /* Copy mpls header if present */
- copy_mpls_hdr (to_b, org_from_b);
-
rem -= len;
fo += len;
}
+
+ return IP_FRAG_ERROR_NONE;
}
static char *ip4_frag_error_strings[] = {
@@ -576,15 +520,14 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
.n_errors = IP_FRAG_N_ERROR,
.error_strings = ip4_frag_error_strings,
- .n_next_nodes = IP4_FRAG_N_NEXT,
+ .n_next_nodes = IP_FRAG_N_NEXT,
.next_nodes = {
- [IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite",
- [IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN] = "ip4-midchain",
- [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
- [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [IP4_FRAG_NEXT_DROP] = "ip4-drop"
+ [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP_FRAG_NEXT_DROP] = "ip4-drop"
},
};
/* *INDENT-ON* */
@@ -600,14 +543,14 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
.n_errors = IP_FRAG_N_ERROR,
.error_strings = ip4_frag_error_strings,
- .n_next_nodes = IP6_FRAG_N_NEXT,
+ .n_next_nodes = IP_FRAG_N_NEXT,
.next_nodes = {
- [IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite",
- [IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN] = "ip6-midchain",
- [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
- [IP6_FRAG_NEXT_DROP] = "ip6-drop"
+ [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
+ [IP_FRAG_NEXT_DROP] = "ip6-drop"
},
};
/* *INDENT-ON* */
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
index ce4236b8465..86462e6c7d2 100644
--- a/src/vnet/ip/ip_frag.h
+++ b/src/vnet/ip/ip_frag.h
@@ -39,7 +39,6 @@
#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
-#define IP_FRAG_FLAG_MPLS_HEADER 0x04 //Encapsulating MPLS header
#define IP4_FRAG_NODE_NAME "ip4-frag"
#define IP6_FRAG_NODE_NAME "ip6-frag"
@@ -49,26 +48,14 @@ extern vlib_node_registration_t ip6_frag_node;
typedef enum
{
- IP4_FRAG_NEXT_IP4_REWRITE,
- IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN,
- IP4_FRAG_NEXT_IP4_LOOKUP,
- IP4_FRAG_NEXT_IP6_LOOKUP,
- IP4_FRAG_NEXT_MPLS_OUTPUT,
- IP4_FRAG_NEXT_ICMP_ERROR,
- IP4_FRAG_NEXT_DROP,
- IP4_FRAG_N_NEXT
-} ip4_frag_next_t;
-
-typedef enum
-{
- IP6_FRAG_NEXT_IP4_LOOKUP,
- IP6_FRAG_NEXT_IP6_LOOKUP,
- IP6_FRAG_NEXT_IP6_REWRITE,
- IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN,
- IP6_FRAG_NEXT_MPLS_OUTPUT,
- IP6_FRAG_NEXT_DROP,
- IP6_FRAG_N_NEXT
-} ip6_frag_next_t;
+ IP_FRAG_NEXT_IP_REWRITE,
+ IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN,
+ IP_FRAG_NEXT_IP4_LOOKUP,
+ IP_FRAG_NEXT_IP6_LOOKUP,
+ IP_FRAG_NEXT_ICMP_ERROR,
+ IP_FRAG_NEXT_DROP,
+ IP_FRAG_N_NEXT
+} ip_frag_next_t;
#define foreach_ip_frag_error \
/* Must be first. */ \
@@ -91,12 +78,16 @@ typedef enum
void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu,
u8 next_index, u8 flags);
-void
-ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
- ip_frag_error_t * error);
-void
-ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
- ip_frag_error_t * error);
+
+extern ip_frag_error_t ip4_frag_do_fragment (vlib_main_t * vm,
+ u32 from_bi,
+ u16 mtu,
+ u16 encapsize, u32 ** buffer);
+extern ip_frag_error_t ip6_frag_do_fragment (vlib_main_t * vm,
+ u32 from_bi,
+ u16 mtu,
+ u16 encapsize, u32 ** buffer);
+
#endif /* ifndef IP_FRAG_H */
/*
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 5ede22aa410..247f531df9f 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -34,8 +34,7 @@ typedef enum {
#define foreach_mpls_output_next \
_(DROP, "error-drop") \
-_(IP4_FRAG, "ip4-frag") \
-_(IP6_FRAG, "ip6-frag")
+_(FRAG, "mpls-frag")
typedef enum {
#define _(s,n) MPLS_OUTPUT_NEXT_##s,
@@ -58,31 +57,6 @@ format_mpls_output_trace (u8 * s, va_list * args)
return s;
}
-/*
- * Save the mpls header length and adjust the current to ip header
- */
-static inline u32
-set_mpls_fragmentation(vlib_buffer_t * p0, ip_adjacency_t * adj0)
-{
- u32 next0;
-
- /* advance size of (all) mpls header to ip header before fragmenting */
- /* save the current pointing to first mpls header. */
- vnet_buffer (p0)->mpls.mpls_hdr_length = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
- vlib_buffer_advance (p0, vnet_buffer (p0)->mpls.mpls_hdr_length);
-
- /* IP fragmentation */
- ip_frag_set_vnet_buffer (p0, adj0[0].rewrite_header.max_l3_packet_bytes,
- IP4_FRAG_NEXT_MPLS_OUTPUT,
- ((vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4) ? IP_FRAG_FLAG_IP4_HEADER:IP_FRAG_FLAG_IP6_HEADER));
-
- /* Tell ip_frag to retain certain mpls parameters after fragmentation of mpls packet */
- vnet_buffer (p0)->ip_frag.flags = (vnet_buffer (p0)->ip_frag.flags | IP_FRAG_FLAG_MPLS_HEADER);
- next0 = (vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4)? MPLS_OUTPUT_NEXT_IP4_FRAG:MPLS_OUTPUT_NEXT_IP6_FRAG;
-
- return next0;
-}
-
static inline uword
mpls_output_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -196,7 +170,7 @@ mpls_output_inline (vlib_main_t * vm,
else
{
error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = set_mpls_fragmentation (p0, adj0);
+ next0 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
@@ -219,7 +193,7 @@ mpls_output_inline (vlib_main_t * vm,
else
{
error1 = IP4_ERROR_MTU_EXCEEDED;
- next1 = set_mpls_fragmentation (p1, adj1);
+ next1 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
@@ -308,7 +282,7 @@ mpls_output_inline (vlib_main_t * vm,
else
{
error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = set_mpls_fragmentation (p0, adj0);
+ next0 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
@@ -371,11 +345,9 @@ VLIB_REGISTER_NODE (mpls_output_node) = {
.n_next_nodes = MPLS_OUTPUT_N_NEXT,
.next_nodes = {
-#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n,
- foreach_mpls_output_next
-#undef _
- },
-
+ [MPLS_OUTPUT_NEXT_DROP] = "mpls-drop",
+ [MPLS_OUTPUT_NEXT_FRAG] = "mpls-frag",
+ },
.format_trace = format_mpls_output_trace,
};
@@ -390,12 +362,184 @@ VLIB_REGISTER_NODE (mpls_midchain_node) = {
.name = "mpls-midchain",
.vector_size = sizeof (u32),
- .format_trace = format_mpls_output_trace,
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
.sibling_of = "mpls-output",
+ .format_trace = format_mpls_output_trace,
};
-/**
+static char *mpls_frag_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_frag_error
+#undef _
+};
+
+typedef struct mpls_frag_trace_t_
+{
+ u16 pkt_size;
+ u16 mtu;
+} mpls_frag_trace_t;
+
+typedef enum
+{
+ MPLS_FRAG_NEXT_REWRITE,
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
+ MPLS_FRAG_NEXT_ICMP_ERROR,
+ MPLS_FRAG_NEXT_DROP,
+ MPLS_FRAG_N_NEXT,
+} mpls_frag_next_t;
+
+static uword
+mpls_frag (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags;
+ vlib_node_runtime_t * error_node;
+
+ error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ frags = NULL;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ mpls_frag_next_t next0;
+ u32 pi0, adj_index0;
+ ip_frag_error_t error0 = IP_FRAG_ERROR_NONE;
+ i16 encap_size;
+ u8 is_ip4;
+
+ pi0 = to_next[0] = from[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ from += 1;
+ n_left_from -= 1;
+ is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4;
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get(adj_index0);
+
+ /* the size of the MPLS stack */
+ encap_size = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
+
+ /* IP fragmentation */
+ if (is_ip4)
+ error0 = ip4_frag_do_fragment (vm, pi0,
+ adj0->rewrite_header.max_l3_packet_bytes,
+ encap_size, &frags);
+ else
+ error0 = ip6_frag_do_fragment (vm, pi0,
+ adj0->rewrite_header.max_l3_packet_bytes,
+ encap_size, &frags);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->mtu = adj0->rewrite_header.max_l3_packet_bytes;
+ tr->pkt_size = vlib_buffer_length_in_chain(vm, p0);
+ }
+
+ if (PREDICT_TRUE(error0 == IP_FRAG_ERROR_NONE))
+ {
+ /* Free original buffer chain */
+ vlib_buffer_free_one (vm, pi0); /* Free original packet */
+ next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ?
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN :
+ MPLS_FRAG_NEXT_REWRITE);
+ }
+ else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+ {
+ icmp4_error_set_vnet_buffer (
+ p0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ vnet_buffer (p0)->ip_frag.mtu);
+ next0 = MPLS_FRAG_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ vlib_error_count (vm, next_index, error0, 1);
+ vec_add1 (frags, pi0); /* Get rid of the original buffer */
+ next0 = MPLS_FRAG_NEXT_DROP;
+ }
+
+ /* Send fragments that were added in the frame */
+ u32 *frag_from, frag_left;
+
+ frag_from = frags;
+ frag_left = vec_len (frags);
+
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, i);
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (frags);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vec_free (frags);
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_mpls_frag_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_frag_trace_t *t = va_arg (*args, mpls_frag_trace_t *);
+
+ s = format (s, "mtu:%d pkt-size:%d", t->mtu, t->pkt_size);
+ return s;
+}
+
+VLIB_REGISTER_NODE (mpls_frag_node) = {
+ .function = mpls_frag,
+ .name = "mpls-frag",
+ .vector_size = sizeof (u32),
+ .format_trace = format_mpls_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = mpls_frag_error_strings,
+
+ .n_next_nodes = MPLS_FRAG_N_NEXT,
+ .next_nodes = {
+ [MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
+ [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
+ [MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [MPLS_FRAG_NEXT_DROP] = "mpls-drop"
+ },
+};
+
+/*
* @brief Next index values from the MPLS incomplete adj node
*/
#define foreach_mpls_adj_incomplete_next \
diff --git a/test/test_mpls.py b/test/test_mpls.py
index ebeea5fb367..8ed047df5f4 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -154,7 +154,8 @@ class TestMPLS(VppTestCase):
pkts.append(p)
return pkts
- def create_stream_ip4(self, src_if, dst_ip, ip_ttl=64, ip_dscp=0):
+ def create_stream_ip4(self, src_if, dst_ip, ip_ttl=64,
+ ip_dscp=0, payload_size=None):
self.reset_packet_infos()
pkts = []
for i in range(0, 257):
@@ -166,6 +167,8 @@ class TestMPLS(VppTestCase):
UDP(sport=1234, dport=1234) /
Raw(payload))
info.data = p.copy()
+ if payload_size:
+ self.extend_packet(p, payload_size)
pkts.append(p)
return pkts
@@ -911,7 +914,7 @@ class TestMPLS(VppTestCase):
""" MPLS Tunnel Tests - Pipe """
#
- # Create a tunnel with a single out label
+ # Create a tunnel with two out labels
#
mpls_tun = VppMPLSTunnelInterface(
self,
@@ -964,6 +967,38 @@ class TestMPLS(VppTestCase):
VppMplsLabel(46),
VppMplsLabel(33, ttl=255)])
+ #
+ # change tunnel's MTU to a low value
+ #
+ mpls_tun.set_l3_mtu(1200)
+
+ # send IP into the tunnel to be fragmented
+ tx = self.create_stream_ip4(self.pg0, "10.0.0.3",
+ payload_size=1500)
+ rx = self.send_and_expect(self.pg0, tx, self.pg0, len(tx)*2)
+
+ fake_tx = []
+ for p in tx:
+ fake_tx.append(p)
+ fake_tx.append(p)
+ self.verify_capture_tunneled_ip4(self.pg0, rx, fake_tx,
+ [VppMplsLabel(44),
+ VppMplsLabel(46)])
+
+ # send MPLS into the tunnel to be fragmented
+ tx = self.create_stream_ip4(self.pg0, "10.0.0.4",
+ payload_size=1500)
+ rx = self.send_and_expect(self.pg0, tx, self.pg0, len(tx)*2)
+
+ fake_tx = []
+ for p in tx:
+ fake_tx.append(p)
+ fake_tx.append(p)
+ self.verify_capture_tunneled_ip4(self.pg0, rx, fake_tx,
+ [VppMplsLabel(44),
+ VppMplsLabel(46),
+ VppMplsLabel(33, ttl=255)])
+
def test_tunnel_uniform(self):
""" MPLS Tunnel Tests - Uniform """
diff --git a/test/vpp_interface.py b/test/vpp_interface.py
index 431a03a6858..a5f6f45fdaf 100644
--- a/test/vpp_interface.py
+++ b/test/vpp_interface.py
@@ -495,3 +495,15 @@ class VppInterface(object):
def get_tx_stats(self):
c = self.test.statistics.get_counter("^/if/tx$")
return c[0][self.sw_if_index]
+
+ def set_l3_mtu(self, mtu):
+ self.test.vapi.sw_interface_set_mtu(self.sw_if_index, [mtu, 0, 0, 0])
+
+ def set_ip4_mtu(self, mtu):
+ self.test.vapi.sw_interface_set_mtu(self.sw_if_index, [0, mtu, 0, 0])
+
+ def set_ip6_mtu(self, mtu):
+ self.test.vapi.sw_interface_set_mtu(self.sw_if_index, [0, 0, mtu, 0])
+
+ def set_mpls_mtu(self, mtu):
+ self.test.vapi.sw_interface_set_mtu(self.sw_if_index, [0, 0, 0, mtu])