summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOle Troan <ot@cisco.com>2019-10-09 13:33:19 +0200
committerNeale Ranns <nranns@cisco.com>2019-11-11 12:33:36 +0000
commiteb284a1f8f10d752285a0a59e75bc54acae50779 (patch)
tree4ce777b223a51fa70f9e0dc18e7842b488b81cd6
parentd318a996b7bdcf0246b2d9927a918a3773a88fa6 (diff)
ip: functional interface to ip fragmentation
This provides a functional interface to IP fragmentation. Allowing external features to fragment. Supports arbitrary encap size, for e.g. MPLS or inner fragmentation of tunnels. This also removed dual loop in MAP that was fundamentally broken. Type: fix Signed-off-by: Ole Troan <ot@cisco.com> Change-Id: Ia89ecec8ee3cbe2416edbe87630fdb714898c2a8 Signed-off-by: Ole Troan <ot@cisco.com>
-rw-r--r--src/plugins/map/ip4_map.c268
-rw-r--r--src/plugins/map/ip4_map_t.c6
-rw-r--r--src/plugins/map/ip6_map.c8
-rw-r--r--src/plugins/map/ip6_map_t.c6
-rw-r--r--src/plugins/map/test/test_map.py74
-rw-r--r--src/vnet/ip/ip4_forward.c16
-rw-r--r--src/vnet/ip/ip6_forward.c15
-rw-r--r--src/vnet/ip/ip_frag.c181
-rw-r--r--src/vnet/ip/ip_frag.h45
-rw-r--r--src/vnet/mpls/mpls_output.c218
-rw-r--r--test/test_mpls.py39
-rw-r--r--test/vpp_interface.py12
12 files changed, 476 insertions, 412 deletions
diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c
index ad94907e499..f2a00907a57 100644
--- a/src/plugins/map/ip4_map.c
+++ b/src/plugins/map/ip4_map.c
@@ -26,8 +26,6 @@ enum ip4_map_next_e
#ifdef MAP_SKIP_IP6_LOOKUP
IP4_MAP_NEXT_IP6_REWRITE,
#endif
- IP4_MAP_NEXT_IP4_FRAGMENT,
- IP4_MAP_NEXT_IP6_FRAGMENT,
IP4_MAP_NEXT_ICMP_ERROR,
IP4_MAP_NEXT_DROP,
IP4_MAP_N_NEXT,
@@ -117,17 +115,26 @@ ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
}
static u32
-ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
+ip4_map_fragment (vlib_main_t * vm, u32 bi, u16 mtu, bool df, u32 ** buffers,
+ u8 * error)
{
map_main_t *mm = &map_main;
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
if (mm->frag_inner)
{
- // TODO: Fix inner fragmentation after removed inner support from ip-frag.
- ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu,
- IP4_FRAG_NEXT_IP6_LOOKUP,
- IP_FRAG_FLAG_IP6_HEADER);
- return (IP4_MAP_NEXT_IP4_FRAGMENT);
+ /* IPv4 fragmented packets inside of IPv6 */
+ ip4_frag_do_fragment (vm, bi, mtu, sizeof (ip6_header_t), buffers);
+
+ /* Fixup */
+ u32 *i;
+ vec_foreach (i, *buffers)
+ {
+ vlib_buffer_t *p = vlib_get_buffer (vm, *i);
+ ip6_header_t *ip6 = vlib_buffer_get_current (p);
+ ip6->payload_length =
+ clib_host_to_net_u16 (p->current_length - sizeof (ip6_header_t));
+ }
}
else
{
@@ -140,10 +147,11 @@ ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
*error = MAP_ERROR_DF_SET;
return (IP4_MAP_NEXT_ICMP_ERROR);
}
- ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
- IP_FRAG_FLAG_IP6_HEADER);
- return (IP4_MAP_NEXT_IP6_FRAGMENT);
+
+ /* Create IPv6 fragments here */
+ ip6_frag_do_fragment (vm, bi, mtu, 0, buffers);
}
+ return (IP4_MAP_NEXT_IP6_LOOKUP);
}
/*
@@ -165,189 +173,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
while (n_left_from > 0)
{
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- /* Dual loop */
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 pi0, pi1;
- vlib_buffer_t *p0, *p1;
- map_domain_t *d0, *d1;
- u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE;
- ip4_header_t *ip40, *ip41;
- u16 port0 = 0, port1 = 0;
- ip6_header_t *ip6h0, *ip6h1;
- u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
- u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 =
- IP4_MAP_NEXT_IP6_LOOKUP;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, STORE);
- vlib_prefetch_buffer_header (p3, STORE);
- /* IPv4 + 8 = 28. possibly plus -40 */
- CLIB_PREFETCH (p2->data - 40, 68, STORE);
- CLIB_PREFETCH (p3->data - 40, 68, STORE);
- }
-
- pi0 = to_next[0] = from[0];
- pi1 = to_next[1] = from[1];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- p0 = vlib_get_buffer (vm, pi0);
- p1 = vlib_get_buffer (vm, pi1);
- ip40 = vlib_buffer_get_current (p0);
- ip41 = vlib_buffer_get_current (p1);
- d0 =
- ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
- &error0);
- d1 =
- ip4_map_get_domain (&ip41->dst_address, &map_domain_index1,
- &error1);
-
- /*
- * Shared IPv4 address
- */
- port0 = ip4_map_port_and_security_check (d0, p0, &error0);
- port1 = ip4_map_port_and_security_check (d1, p1, &error1);
-
- /* Decrement IPv4 TTL */
- ip4_map_decrement_ttl (ip40, &error0);
- ip4_map_decrement_ttl (ip41, &error1);
- bool df0 =
- ip40->flags_and_fragment_offset &
- clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
- bool df1 =
- ip41->flags_and_fragment_offset &
- clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
-
- /* MAP calc */
- u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
- u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32);
- u16 dp40 = clib_net_to_host_u16 (port0);
- u16 dp41 = clib_net_to_host_u16 (port1);
- u64 dal60 = map_get_pfx (d0, da40, dp40);
- u64 dal61 = map_get_pfx (d1, da41, dp41);
- u64 dar60 = map_get_sfx (d0, da40, dp40);
- u64 dar61 = map_get_sfx (d1, da41, dp41);
- if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
- error0 = MAP_ERROR_NO_BINDING;
- if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE)
- error1 = MAP_ERROR_NO_BINDING;
-
- /* construct ipv6 header */
- vlib_buffer_advance (p0, -sizeof (ip6_header_t));
- vlib_buffer_advance (p1, -sizeof (ip6_header_t));
- ip6h0 = vlib_buffer_get_current (p0);
- ip6h1 = vlib_buffer_get_current (p1);
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
-
- ip6h0->ip_version_traffic_class_and_flow_label =
- ip4_map_vtcfl (ip40, p0);
- ip6h1->ip_version_traffic_class_and_flow_label =
- ip4_map_vtcfl (ip41, p1);
- ip6h0->payload_length = ip40->length;
- ip6h1->payload_length = ip41->length;
- ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
- ip6h1->protocol = IP_PROTOCOL_IP_IN_IP;
- ip6h0->hop_limit = 0x40;
- ip6h1->hop_limit = 0x40;
- ip6h0->src_address = d0->ip6_src;
- ip6h1->src_address = d1->ip6_src;
- ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
- ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
- ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61);
- ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61);
-
- /*
- * Determine next node. Can be one of:
- * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
- */
- if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
- {
- if (PREDICT_FALSE
- (d0->mtu
- && (clib_net_to_host_u16 (ip6h0->payload_length) +
- sizeof (*ip6h0) > d0->mtu)))
- {
- next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
- }
- else
- {
- next0 =
- ip4_map_ip6_lookup_bypass (p0,
- ip40) ?
- IP4_MAP_NEXT_IP6_REWRITE : next0;
- vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
- thread_index,
- map_domain_index0, 1,
- clib_net_to_host_u16
- (ip6h0->payload_length) +
- 40);
- }
- }
- else
- {
- next0 = IP4_MAP_NEXT_DROP;
- }
-
- /*
- * Determine next node. Can be one of:
- * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
- */
- if (PREDICT_TRUE (error1 == MAP_ERROR_NONE))
- {
- if (PREDICT_FALSE
- (d1->mtu
- && (clib_net_to_host_u16 (ip6h1->payload_length) +
- sizeof (*ip6h1) > d1->mtu)))
- {
- next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1);
- }
- else
- {
- next1 =
- ip4_map_ip6_lookup_bypass (p1,
- ip41) ?
- IP4_MAP_NEXT_IP6_REWRITE : next1;
- vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
- thread_index,
- map_domain_index1, 1,
- clib_net_to_host_u16
- (ip6h1->payload_length) +
- 40);
- }
- }
- else
- {
- next1 = IP4_MAP_NEXT_DROP;
- }
-
- if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- map_add_trace (vm, node, p0, map_domain_index0, port0);
- }
- if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
- {
- map_add_trace (vm, node, p1, map_domain_index1, port0);
- }
-
- p0->error = error_node->errors[error0];
- p1->error = error_node->errors[error1];
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
- n_left_to_next, pi0, pi1, next0,
- next1);
- }
-
while (n_left_from > 0 && n_left_to_next > 0)
{
u32 pi0;
@@ -359,12 +184,13 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
ip6_header_t *ip6h0;
u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
u32 map_domain_index0 = ~0;
+ u32 *buffer0 = 0;
+ bool free_original_buffer0 = false;
+ u32 *frag_from0, frag_left0;
pi0 = to_next[0] = from[0];
from += 1;
n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
p0 = vlib_get_buffer (vm, pi0);
ip40 = vlib_buffer_get_current (p0);
@@ -413,7 +239,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
/*
* Determine next node. Can be one of:
- * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
+ * ip6-lookup, ip6-rewrite, error-drop
*/
if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
{
@@ -422,7 +248,14 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
&& (clib_net_to_host_u16 (ip6h0->payload_length) +
sizeof (*ip6h0) > d0->mtu)))
{
- next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
+ next0 =
+ ip4_map_fragment (vm, pi0, d0->mtu, df0, &buffer0,
+ &error0);
+
+ if (error0 == MAP_ERROR_NONE)
+ {
+ free_original_buffer0 = true;
+ }
}
else
{
@@ -450,8 +283,41 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
p0->error = error_node->errors[error0];
exit:
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, pi0, next0);
+ /* Send fragments that were added in the frame */
+ if (free_original_buffer0)
+ {
+ vlib_buffer_free_one (vm, pi0); /* Free original packet */
+ }
+ else
+ {
+ vec_add1 (buffer0, pi0);
+ }
+
+ frag_from0 = buffer0;
+ frag_left0 = vec_len (buffer0);
+
+ while (frag_left0 > 0)
+ {
+ while (frag_left0 > 0 && n_left_to_next > 0)
+ {
+ u32 i0;
+ i0 = to_next[0] = frag_from0[0];
+ frag_from0 += 1;
+ frag_left0 -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer (vm, i0)->error =
+ error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ i0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (buffer0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
@@ -491,8 +357,6 @@ VLIB_REGISTER_NODE(ip4_map_node) = {
#ifdef MAP_SKIP_IP6_LOOKUP
[IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
#endif
- [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
- [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
[IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
[IP4_MAP_NEXT_DROP] = "error-drop",
},
diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c
index 621fb0615dc..c254efc78b9 100644
--- a/src/plugins/map/ip4_map_t.c
+++ b/src/plugins/map/ip4_map_t.c
@@ -168,7 +168,7 @@ ip4_map_t_icmp (vlib_main_t * vm,
if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
{
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
- vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP6_LOOKUP;
next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
}
err0:
@@ -287,7 +287,7 @@ ip4_map_t_fragmented (vlib_main_t * vm,
{
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
- IP6_FRAG_NEXT_IP6_LOOKUP;
+ IP_FRAG_NEXT_IP6_LOOKUP;
next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG;
}
}
@@ -453,7 +453,7 @@ ip4_map_t_tcp_udp (vlib_main_t * vm,
//Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
- IP6_FRAG_NEXT_IP6_LOOKUP;
+ IP_FRAG_NEXT_IP6_LOOKUP;
next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
}
}
diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c
index 96f81efc1cb..47958f92a38 100644
--- a/src/plugins/map/ip6_map.c
+++ b/src/plugins/map/ip6_map.c
@@ -314,7 +314,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
vnet_buffer (p0)->ip_frag.flags = 0;
vnet_buffer (p0)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
}
@@ -346,7 +346,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
vnet_buffer (p1)->ip_frag.flags = 0;
vnet_buffer (p1)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
vnet_buffer (p1)->ip_frag.mtu = d1->mtu;
next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
}
@@ -497,7 +497,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
vnet_buffer (p0)->ip_frag.flags = 0;
vnet_buffer (p0)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
}
@@ -622,7 +622,7 @@ ip6_map_post_ip4_reass (vlib_main_t * vm,
&& error0 == MAP_ERROR_NONE))
{
vnet_buffer (p0)->ip_frag.flags = 0;
- vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP4_LOOKUP;
vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT;
}
diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c
index 6e9c0d7640c..ef7b91349e5 100644
--- a/src/plugins/map/ip6_map_t.c
+++ b/src/plugins/map/ip6_map_t.c
@@ -169,7 +169,7 @@ ip6_map_t_icmp (vlib_main_t * vm,
{
// Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
- vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG;
}
err0:
@@ -288,7 +288,7 @@ ip6_map_t_fragmented (vlib_main_t * vm,
// Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
}
}
@@ -441,7 +441,7 @@ ip6_map_t_tcp_udp (vlib_main_t * vm,
// Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
- IP4_FRAG_NEXT_IP4_LOOKUP;
+ IP_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
}
}
diff --git a/src/plugins/map/test/test_map.py b/src/plugins/map/test/test_map.py
index a7e5f165576..c1fe05e1150 100644
--- a/src/plugins/map/test/test_map.py
+++ b/src/plugins/map/test/test_map.py
@@ -140,7 +140,7 @@ class TestMAP(VppTestCase):
IP(src=self.pg0.remote_ip4, dst=self.pg0.remote_ip4) /
UDP(sport=20000, dport=10000) /
Raw(b'\xa5' * 100))
- rx = self.send_and_expect(self.pg0, v4*1, self.pg0)
+ rx = self.send_and_expect(self.pg0, v4 * 4, self.pg0)
v4_reply = v4[1]
v4_reply.ttl -= 1
for p in rx:
@@ -154,7 +154,7 @@ class TestMAP(VppTestCase):
UDP(sport=20000, dport=10000) /
Raw(b'\xa5' * 100))
- self.send_and_assert_encapped_one(v4, "3000::1", map_translated_addr)
+ self.send_and_assert_encapped(v4 * 4, "3000::1", map_translated_addr)
#
# Verify reordered fragments are able to pass as well
@@ -294,6 +294,76 @@ class TestMAP(VppTestCase):
pre_res_route.remove_vpp_config()
self.vapi.ppcli("map params pre-resolve del ip6-nh 4001::1")
+ def test_map_e_inner_frag(self):
+ """ MAP-E Inner fragmentation """
+
+ #
+ # Add a route to the MAP-BR
+ #
+ map_br_pfx = "2001::"
+ map_br_pfx_len = 32
+ map_route = VppIpRoute(self,
+ map_br_pfx,
+ map_br_pfx_len,
+ [VppRoutePath(self.pg1.remote_ip6,
+ self.pg1.sw_if_index)])
+ map_route.add_vpp_config()
+
+ #
+ # Add a domain that maps from pg0 to pg1
+ #
+ map_dst = '2001::/32'
+ map_src = '3000::1/128'
+ client_pfx = '192.168.0.0/16'
+ map_translated_addr = '2001:0:101:7000:0:c0a8:101:7'
+ tag = 'MAP-E tag.'
+ self.vapi.map_add_domain(ip4_prefix=client_pfx,
+ ip6_prefix=map_dst,
+ ip6_src=map_src,
+ ea_bits_len=20,
+ psid_offset=4,
+ psid_length=4,
+ mtu=1000,
+ tag=tag)
+
+ # Enable MAP on interface.
+ self.vapi.map_if_enable_disable(is_enable=1,
+ sw_if_index=self.pg0.sw_if_index,
+ is_translation=0)
+
+ # Enable inner fragmentation
+ self.vapi.map_param_set_fragmentation(inner=1)
+
+ v4 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
+ IP(src=self.pg0.remote_ip4, dst='192.168.1.1') /
+ UDP(sport=20000, dport=10000) /
+ Raw(b'\xa5' * 1300))
+
+ self.pg_send(self.pg0, v4*1)
+ rx = self.pg1.get_capture(2)
+
+ frags = fragment_rfc791(v4[1], 1000)
+ frags[0].id = 0
+ frags[1].id = 0
+ frags[0].ttl -= 1
+ frags[1].ttl -= 1
+ frags[0].chksum = 0
+ frags[1].chksum = 0
+
+ v6_reply1 = (IPv6(src='3000::1', dst=map_translated_addr, hlim=63) /
+ frags[0])
+ v6_reply2 = (IPv6(src='3000::1', dst=map_translated_addr, hlim=63) /
+ frags[1])
+ rx[0][1].fl = 0
+ rx[1][1].fl = 0
+ rx[0][1][IP].id = 0
+ rx[1][1][IP].id = 0
+ rx[0][1][IP].chksum = 0
+ rx[1][1][IP].chksum = 0
+
+ self.validate(rx[0][1], v6_reply1)
+ self.validate(rx[1][1], v6_reply2)
+
def validate(self, rx, expected):
self.assertEqual(rx, expected.__class__(scapy.compat.raw(expected)))
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 1550b313915..44a681926b3 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -2293,8 +2293,8 @@ typedef enum
always_inline void
ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
- u16 adj_packet_bytes, bool df, u16 * next, u32 * error,
- u8 is_midchain)
+ u16 adj_packet_bytes, bool df, u16 * next,
+ u8 is_midchain, u32 * error)
{
if (packet_len > adj_packet_bytes)
{
@@ -2312,8 +2312,8 @@ ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
/* IP fragmentation */
ip_frag_set_vnet_buffer (b, adj_packet_bytes,
(is_midchain ?
- IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN :
- IP4_FRAG_NEXT_IP4_REWRITE), 0);
+ IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
+ IP_FRAG_NEXT_IP_REWRITE), 0);
*next = IP4_REWRITE_NEXT_FRAGMENT;
}
}
@@ -2486,12 +2486,12 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0, is_midchain);
+ next + 0, is_midchain, &error0);
ip4_mtu_check (b[1], ip1_len,
adj1[0].rewrite_header.max_l3_packet_bytes,
ip1->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 1, &error1, is_midchain);
+ next + 1, is_midchain, &error1);
if (is_mcast)
{
@@ -2660,7 +2660,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0, is_midchain);
+ next + 0, is_midchain, &error0);
if (is_mcast)
{
@@ -2758,7 +2758,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0, is_midchain);
+ next + 0, is_midchain, &error0);
if (is_mcast)
{
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 50de501fe0d..9656621c13b 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1652,7 +1652,7 @@ typedef enum
always_inline void
ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
u16 adj_packet_bytes, bool is_locally_generated,
- u32 * next, u32 * error)
+ u32 * next, u8 is_midchain, u32 * error)
{
if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
{
@@ -1660,7 +1660,9 @@ ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
{
/* IP fragmentation */
ip_frag_set_vnet_buffer (b, adj_packet_bytes,
- IP6_FRAG_NEXT_IP6_REWRITE, 0);
+ (is_midchain ?
+ IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
+ IP_FRAG_NEXT_IP_REWRITE), 0);
*next = IP6_REWRITE_NEXT_FRAGMENT;
*error = IP6_ERROR_MTU_EXCEEDED;
}
@@ -1840,10 +1842,12 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
ip6_mtu_check (p0, ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
- is_locally_originated0, &next0, &error0);
+ is_locally_originated0, &next0, is_midchain,
+ &error0);
ip6_mtu_check (p1, ip1_len,
adj1[0].rewrite_header.max_l3_packet_bytes,
- is_locally_originated1, &next1, &error1);
+ is_locally_originated1, &next1, is_midchain,
+ &error1);
/* Don't adjust the buffer for hop count issue; icmp-error node
* wants to see the IP header */
@@ -2011,7 +2015,8 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
ip6_mtu_check (p0, ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
- is_locally_originated0, &next0, &error0);
+ is_locally_originated0, &next0, is_midchain,
+ &error0);
/* Don't adjust the buffer for hop count issue; icmp-error node
* wants to see the IP header */
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index 54efb63c986..9aa8777514f 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -23,26 +23,6 @@
#include <vnet/ip/ip.h>
-/*
- * Copy the mpls header if present.
- * The current is pointing to the ip header.
- * Adjust the buffer and point to the mpls headers on these fragments
- * before sending the packet back to mpls-output node.
- */
-static inline void
-copy_mpls_hdr (vlib_buffer_t * to_b, vlib_buffer_t * from_b)
-{
- if ((vnet_buffer (from_b)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
- {
- u8 mpls_hdr_length = vnet_buffer (from_b)->mpls.mpls_hdr_length;
- u8 *org_from_mpls_packet =
- from_b->data + (from_b->current_data - mpls_hdr_length);
- clib_memcpy_fast ((to_b->data - mpls_hdr_length), org_from_mpls_packet,
- mpls_hdr_length);
- vlib_buffer_advance (to_b, -vnet_buffer (to_b)->mpls.mpls_hdr_length);
- }
-}
-
typedef struct
{
u8 ipv6;
@@ -87,14 +67,6 @@ frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from)
vnet_buffer2 (to)->qos = vnet_buffer2 (from)->qos;
to->flags |= VNET_BUFFER_F_QOS_DATA_VALID;
}
-
- /* Copy mpls opaque data */
- if ((vnet_buffer (from)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
- {
- vnet_buffer (to)->mpls.pyld_proto = vnet_buffer (from)->mpls.pyld_proto;
- vnet_buffer (to)->mpls.mpls_hdr_length =
- vnet_buffer (from)->mpls.mpls_hdr_length;
- }
}
static vlib_buffer_t *
@@ -116,20 +88,20 @@ frag_buffer_alloc (vlib_buffer_t * org_b, u32 * bi)
* but does not generate buffer chains. I.e. a fragment is always
* contained with in a single buffer and limited to the max buffer
* size.
+ * from_bi: current pointer must point to IPv4 header
*/
-void
-ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
- ip_frag_error_t * error)
+ip_frag_error_t
+ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
+ u16 l2unfragmentablesize, u32 ** buffer)
{
vlib_buffer_t *from_b;
ip4_header_t *ip4;
- u16 mtu, len, max, rem, ip_frag_id, ip_frag_offset;
+ u16 len, max, rem, ip_frag_id, ip_frag_offset;
u8 *org_from_packet, more;
from_b = vlib_get_buffer (vm, from_bi);
- mtu = vnet_buffer (from_b)->ip_frag.mtu;
org_from_packet = vlib_buffer_get_current (from_b);
- ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b);
+ ip4 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
max =
@@ -139,21 +111,18 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t)))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
if (mtu < sizeof (ip4_header_t))
{
- *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
- return;
+ return IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
}
if (ip4->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))
{
- *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
- return;
+ return IP_FRAG_ERROR_DONT_FRAGMENT_SET;
}
if (ip4_is_fragment (ip4))
@@ -174,7 +143,8 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
u8 *from_data = (void *) (ip4 + 1);
vlib_buffer_t *org_from_b = from_b;
u16 fo = 0;
- u16 left_in_from_buffer = from_b->current_length - sizeof (ip4_header_t);
+ u16 left_in_from_buffer =
+ from_b->current_length - (l2unfragmentablesize + sizeof (ip4_header_t));
u16 ptr = 0;
/* Do the actual fragmentation */
@@ -190,17 +160,19 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
len &= ~0x7;
if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
{
- *error = IP_FRAG_ERROR_MEMORY;
- return;
+ return IP_FRAG_ERROR_MEMORY;
}
vec_add1 (*buffer, to_bi);
frag_set_sw_if_index (to_b, org_from_b);
/* Copy ip4 header */
- clib_memcpy_fast (to_b->data, org_from_packet, sizeof (ip4_header_t));
- to_ip4 = vlib_buffer_get_current (to_b);
+ to_data = vlib_buffer_get_current (to_b);
+ clib_memcpy_fast (to_data, org_from_packet,
+ l2unfragmentablesize + sizeof (ip4_header_t));
+ to_ip4 = (ip4_header_t *) (to_data + l2unfragmentablesize);
to_data = (void *) (to_ip4 + 1);
vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data;
+ vlib_buffer_copy_trace_flag (vm, from_b, to_bi);
to_b->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
if (from_b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID)
@@ -232,8 +204,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
/* Move buffer */
if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
from_b = vlib_get_buffer (vm, from_b->next_buffer);
from_data = (u8 *) vlib_buffer_get_current (from_b);
@@ -242,8 +213,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
to_ptr += bytes_to_copy;
}
- to_b->current_length = len + sizeof (ip4_header_t);
to_b->flags |= VNET_BUFFER_F_IS_IP4;
+ to_b->current_length =
+ len + sizeof (ip4_header_t) + l2unfragmentablesize;
to_ip4->fragment_id = ip_frag_id;
to_ip4->flags_and_fragment_offset =
@@ -256,31 +228,11 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
/* we've just done the IP checksum .. */
to_b->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
- if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
- {
- /* Encapsulating ipv4 header */
- ip4_header_t *encap_header4 =
- (ip4_header_t *) vlib_buffer_get_current (to_b);
- encap_header4->length = clib_host_to_net_u16 (to_b->current_length);
- encap_header4->checksum = ip4_header_checksum (encap_header4);
- }
- else if (vnet_buffer (org_from_b)->
- ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
- {
- /* Encapsulating ipv6 header */
- ip6_header_t *encap_header6 =
- (ip6_header_t *) vlib_buffer_get_current (to_b);
- encap_header6->payload_length =
- clib_host_to_net_u16 (to_b->current_length -
- sizeof (*encap_header6));
- }
-
- /* Copy mpls header if present */
- copy_mpls_hdr (to_b, org_from_b);
-
rem -= len;
fo += len;
}
+
+ return IP_FRAG_ERROR_NONE;
}
void
@@ -322,19 +274,19 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
pi0 = from[0];
from += 1;
n_left_from -= 1;
- error0 = IP_FRAG_ERROR_NONE;
p0 = vlib_get_buffer (vm, pi0);
+ u16 mtu = vnet_buffer (p0)->ip_frag.mtu;
if (is_ip6)
- ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
+ error0 = ip6_frag_do_fragment (vm, pi0, mtu, 0, &buffer);
else
- ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
+ error0 = ip4_frag_do_fragment (vm, pi0, mtu, 0, &buffer);
if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
{
ip_frag_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
+ tr->mtu = mtu;
tr->ipv6 = is_ip6 ? 1 : 0;
tr->n_fragments = vec_len (buffer);
tr->next = vnet_buffer (p0)->ip_frag.next_index;
@@ -345,20 +297,13 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
vnet_buffer (p0)->ip_frag.mtu);
- next0 = IP4_FRAG_NEXT_ICMP_ERROR;
+ next0 = IP_FRAG_NEXT_ICMP_ERROR;
}
else
{
- if (is_ip6)
- next0 =
- (error0 ==
- IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
- ip_frag.next_index : IP6_FRAG_NEXT_DROP;
- else
- next0 =
- (error0 ==
- IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
- ip_frag.next_index : IP4_FRAG_NEXT_DROP;
+ next0 = (error0 == IP_FRAG_ERROR_NONE ?
+ vnet_buffer (p0)->ip_frag.next_index :
+ IP_FRAG_NEXT_DROP);
}
if (error0 == IP_FRAG_ERROR_NONE)
@@ -431,18 +376,20 @@ ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
/*
* Fragments the packet given in from_bi. Fragments are returned in the buffer vector.
* Caller must ensure the original packet is freed.
+ * from_bi: current pointer must point to IPv6 header
*/
-void
-ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
- ip_frag_error_t * error)
+ip_frag_error_t
+ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
+ u16 l2unfragmentablesize, u32 ** buffer)
{
vlib_buffer_t *from_b;
ip6_header_t *ip6;
- u16 mtu, len, max, rem, ip_frag_id;
+ u16 len, max, rem, ip_frag_id;
+ u8 *org_from_packet;
from_b = vlib_get_buffer (vm, from_bi);
- mtu = vnet_buffer (from_b)->ip_frag.mtu;
- ip6 = (ip6_header_t *) vlib_buffer_get_current (from_b);
+ org_from_packet = vlib_buffer_get_current (from_b);
+ ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
rem = clib_net_to_host_u16 (ip6->payload_length);
max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct??
@@ -450,21 +397,20 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
/* TODO: Look through header chain for fragmentation header */
if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
u8 *from_data = (void *) (ip6 + 1);
vlib_buffer_t *org_from_b = from_b;
u16 fo = 0;
- u16 left_in_from_buffer = from_b->current_length - sizeof (ip6_header_t);
+ u16 left_in_from_buffer =
+ from_b->current_length - (l2unfragmentablesize + sizeof (ip6_header_t));
u16 ptr = 0;
ip_frag_id = ++running_fragment_id; // Fix
@@ -485,14 +431,14 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
len &= ~0x7;
if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
{
- *error = IP_FRAG_ERROR_MEMORY;
- return;
+ return IP_FRAG_ERROR_MEMORY;
}
vec_add1 (*buffer, to_bi);
frag_set_sw_if_index (to_b, org_from_b);
/* Copy ip6 header */
- clib_memcpy_fast (to_b->data, ip6, sizeof (ip6_header_t));
+ clib_memcpy_fast (to_b->data, org_from_packet,
+ l2unfragmentablesize + sizeof (ip6_header_t));
to_ip6 = vlib_buffer_get_current (to_b);
to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
to_data = (void *) (to_frag_hdr + 1);
@@ -530,8 +476,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
/* Move buffer */
if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
from_b = vlib_get_buffer (vm, from_b->next_buffer);
from_data = (u8 *) vlib_buffer_get_current (from_b);
@@ -551,12 +496,11 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
to_frag_hdr->next_hdr = ip6->protocol;
to_frag_hdr->rsv = 0;
- /* Copy mpls header if present */
- copy_mpls_hdr (to_b, org_from_b);
-
rem -= len;
fo += len;
}
+
+ return IP_FRAG_ERROR_NONE;
}
static char *ip4_frag_error_strings[] = {
@@ -576,15 +520,14 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
.n_errors = IP_FRAG_N_ERROR,
.error_strings = ip4_frag_error_strings,
- .n_next_nodes = IP4_FRAG_N_NEXT,
+ .n_next_nodes = IP_FRAG_N_NEXT,
.next_nodes = {
- [IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite",
- [IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN] = "ip4-midchain",
- [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
- [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [IP4_FRAG_NEXT_DROP] = "ip4-drop"
+ [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP_FRAG_NEXT_DROP] = "ip4-drop"
},
};
/* *INDENT-ON* */
@@ -600,14 +543,14 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
.n_errors = IP_FRAG_N_ERROR,
.error_strings = ip4_frag_error_strings,
- .n_next_nodes = IP6_FRAG_N_NEXT,
+ .n_next_nodes = IP_FRAG_N_NEXT,
.next_nodes = {
- [IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite",
- [IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN] = "ip6-midchain",
- [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
- [IP6_FRAG_NEXT_DROP] = "ip6-drop"
+ [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
+ [IP_FRAG_NEXT_DROP] = "ip6-drop"
},
};
/* *INDENT-ON* */
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
index ce4236b8465..86462e6c7d2 100644
--- a/src/vnet/ip/ip_frag.h
+++ b/src/vnet/ip/ip_frag.h
@@ -39,7 +39,6 @@
#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
-#define IP_FRAG_FLAG_MPLS_HEADER 0x04 //Encapsulating MPLS header
#define IP4_FRAG_NODE_NAME "ip4-frag"
#define IP6_FRAG_NODE_NAME "ip6-frag"
@@ -49,26 +48,14 @@ extern vlib_node_registration_t ip6_frag_node;
typedef enum
{
- IP4_FRAG_NEXT_IP4_REWRITE,
- IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN,
- IP4_FRAG_NEXT_IP4_LOOKUP,
- IP4_FRAG_NEXT_IP6_LOOKUP,
- IP4_FRAG_NEXT_MPLS_OUTPUT,
- IP4_FRAG_NEXT_ICMP_ERROR,
- IP4_FRAG_NEXT_DROP,
- IP4_FRAG_N_NEXT
-} ip4_frag_next_t;
-
-typedef enum
-{
- IP6_FRAG_NEXT_IP4_LOOKUP,
- IP6_FRAG_NEXT_IP6_LOOKUP,
- IP6_FRAG_NEXT_IP6_REWRITE,
- IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN,
- IP6_FRAG_NEXT_MPLS_OUTPUT,
- IP6_FRAG_NEXT_DROP,
- IP6_FRAG_N_NEXT
-} ip6_frag_next_t;
+ IP_FRAG_NEXT_IP_REWRITE,
+ IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN,
+ IP_FRAG_NEXT_IP4_LOOKUP,
+ IP_FRAG_NEXT_IP6_LOOKUP,
+ IP_FRAG_NEXT_ICMP_ERROR,
+ IP_FRAG_NEXT_DROP,
+ IP_FRAG_N_NEXT
+} ip_frag_next_t;
#define foreach_ip_frag_error \
/* Must be first. */ \
@@ -91,12 +78,16 @@ typedef enum
void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu,
u8 next_index, u8 flags);
-void
-ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
- ip_frag_error_t * error);
-void
-ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
- ip_frag_error_t * error);
+
+extern ip_frag_error_t ip4_frag_do_fragment (vlib_main_t * vm,
+ u32 from_bi,
+ u16 mtu,
+ u16 encapsize, u32 ** buffer);
+extern ip_frag_error_t ip6_frag_do_fragment (vlib_main_t * vm,
+ u32 from_bi,
+ u16 mtu,
+ u16 encapsize, u32 ** buffer);
+
#endif /* ifndef IP_FRAG_H */
/*
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 5ede22aa410..247f531df9f 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -34,8 +34,7 @@ typedef enum {
#define foreach_mpls_output_next \
_(DROP, "error-drop") \
-_(IP4_FRAG, "ip4-frag") \
-_(IP6_FRAG, "ip6-frag")
+_(FRAG, "mpls-frag")
typedef enum {
#define _(s,n) MPLS_OUTPUT_NEXT_##s,
@@ -58,31 +57,6 @@ format_mpls_output_trace (u8 * s, va_list * args)
return s;
}
-/*
- * Save the mpls header length and adjust the current to ip header
- */
-static inline u32
-set_mpls_fragmentation(vlib_buffer_t * p0, ip_adjacency_t * adj0)
-{
- u32 next0;
-
- /* advance size of (all) mpls header to ip header before fragmenting */
- /* save the current pointing to first mpls header. */
- vnet_buffer (p0)->mpls.mpls_hdr_length = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
- vlib_buffer_advance (p0, vnet_buffer (p0)->mpls.mpls_hdr_length);
-
- /* IP fragmentation */
- ip_frag_set_vnet_buffer (p0, adj0[0].rewrite_header.max_l3_packet_bytes,
- IP4_FRAG_NEXT_MPLS_OUTPUT,
- ((vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4) ? IP_FRAG_FLAG_IP4_HEADER:IP_FRAG_FLAG_IP6_HEADER));
-
- /* Tell ip_frag to retain certain mpls parameters after fragmentation of mpls packet */
- vnet_buffer (p0)->ip_frag.flags = (vnet_buffer (p0)->ip_frag.flags | IP_FRAG_FLAG_MPLS_HEADER);
- next0 = (vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4)? MPLS_OUTPUT_NEXT_IP4_FRAG:MPLS_OUTPUT_NEXT_IP6_FRAG;
-
- return next0;
-}
-
static inline uword
mpls_output_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -196,7 +170,7 @@ mpls_output_inline (vlib_main_t * vm,
else
{
error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = set_mpls_fragmentation (p0, adj0);
+ next0 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
@@ -219,7 +193,7 @@ mpls_output_inline (vlib_main_t * vm,
else
{
error1 = IP4_ERROR_MTU_EXCEEDED;
- next1 = set_mpls_fragmentation (p1, adj1);
+ next1 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
@@ -308,7 +282,7 @@ mpls_output_inline (vlib_main_t * vm,
else
{
error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = set_mpls_fragmentation (p0, adj0);
+ next0 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
@@ -371,11 +345,9 @@ VLIB_REGISTER_NODE (mpls_output_node) = {
.n_next_nodes = MPLS_OUTPUT_N_NEXT,
.next_nodes = {
-#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n,
- foreach_mpls_output_next
-#undef _
- },
-
+ [MPLS_OUTPUT_NEXT_DROP] = "mpls-drop",
+ [MPLS_OUTPUT_NEXT_FRAG] = "mpls-frag",
+ },
.format_trace = format_mpls_output_trace,
};
@@ -390,12 +362,184 @@ VLIB_REGISTER_NODE (mpls_midchain_node) = {
.name = "mpls-midchain",
.vector_size = sizeof (u32),
- .format_trace = format_mpls_output_trace,
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
.sibling_of = "mpls-output",
+ .format_trace = format_mpls_output_trace,
};
-/**
+static char *mpls_frag_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_frag_error
+#undef _
+};
+
+typedef struct mpls_frag_trace_t_
+{
+ u16 pkt_size;
+ u16 mtu;
+} mpls_frag_trace_t;
+
+typedef enum
+{
+ MPLS_FRAG_NEXT_REWRITE,
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
+ MPLS_FRAG_NEXT_ICMP_ERROR,
+ MPLS_FRAG_NEXT_DROP,
+ MPLS_FRAG_N_NEXT,
+} mpls_frag_next_t;
+
+static uword
+mpls_frag (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags;
+ vlib_node_runtime_t * error_node;
+
+ error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ frags = NULL;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ mpls_frag_next_t next0;
+ u32 pi0, adj_index0;
+ ip_frag_error_t error0 = IP_FRAG_ERROR_NONE;
+ i16 encap_size;
+ u8 is_ip4;
+
+ pi0 = to_next[0] = from[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ from += 1;
+ n_left_from -= 1;
+ is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4;
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get(adj_index0);
+
+ /* the size of the MPLS stack */
+ encap_size = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
+
+ /* IP fragmentation */
+ if (is_ip4)
+ error0 = ip4_frag_do_fragment (vm, pi0,
+ adj0->rewrite_header.max_l3_packet_bytes,
+ encap_size, &frags);
+ else
+ error0 = ip6_frag_do_fragment (vm, pi0,
+ adj0->rewrite_header.max_l3_packet_bytes,
+ encap_size, &frags);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->mtu = adj0->rewrite_header.max_l3_packet_bytes;
+ tr->pkt_size = vlib_buffer_length_in_chain(vm, p0);
+ }
+
+ if (PREDICT_TRUE(error0 == IP_FRAG_ERROR_NONE))
+ {
+ /* Free original buffer chain */
+ vlib_buffer_free_one (vm, pi0); /* Free original packet */
+ next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ?
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN :
+ MPLS_FRAG_NEXT_REWRITE);
+ }
+ else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+ {
+ icmp4_error_set_vnet_buffer (
+ p0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ vnet_buffer (p0)->ip_frag.mtu);
+ next0 = MPLS_FRAG_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ vlib_error_count (vm, next_index, error0, 1);
+ vec_add1 (frags, pi0); /* Get rid of the original buffer */
+ next0 = MPLS_FRAG_NEXT_DROP;
+ }
+
+ /* Send fragments that were added in the frame */
+ u32 *frag_from, frag_left;
+
+ frag_from = frags;
+ frag_left = vec_len (frags);
+
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, i);
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (frags);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vec_free (frags);
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_mpls_frag_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_frag_trace_t *t = va_arg (*args, mpls_frag_trace_t *);
+
+ s = format (s, "mtu:%d pkt-size:%d", t->mtu, t->pkt_size);
+ return s;
+}
+
+VLIB_REGISTER_NODE (mpls_frag_node) = {
+ .function = mpls_frag,
+ .name = "mpls-frag",
+ .vector_size = sizeof (u32),
+ .format_trace = format_mpls_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = mpls_frag_error_strings,
+
+ .n_next_nodes = MPLS_FRAG_N_NEXT,
+ .next_nodes = {
+ [MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
+ [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
+ [MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [MPLS_FRAG_NEXT_DROP] = "mpls-drop"
+ },
+};
+
+/*
* @brief Next index values from the MPLS incomplete adj node
*/
#define foreach_mpls_adj_incomplete_next \
diff --git a/test/test_mpls.py b/test/test_mpls.py
index ebeea5fb367..8ed047df5f4 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -154,7 +154,8 @@ class TestMPLS(VppTestCase):
pkts.append(p)
return pkts
- def create_stream_ip4(self, src_if, dst_ip, ip_ttl=64, ip_dscp=0):
+ def create_stream_ip4(self, src_if, dst_ip, ip_ttl=64,
+ ip_dscp=0, payload_size=None):
self.reset_packet_infos()
pkts = []
for i in range(0, 257):
@@ -166,6 +167,8 @@ class TestMPLS(VppTestCase):
UDP(sport=1234, dport=1234) /
Raw(payload))
info.data = p.copy()
+ if payload_size:
+ self.extend_packet(p, payload_size)
pkts.append(p)
return pkts
@@ -911,7 +914,7 @@ class TestMPLS(VppTestCase):
""" MPLS Tunnel Tests - Pipe """
#
- # Create a tunnel with a single out label
+ # Create a tunnel with two out labels
#
mpls_tun = VppMPLSTunnelInterface(
self,
@@ -964,6 +967,38 @@ class TestMPLS(VppTestCase):
VppMplsLabel(46),
VppMplsLabel(33, ttl=255)])
+ #
+ # change tunnel's MTU to a low value
+ #
+ mpls_tun.set_l3_mtu(1200)
+
+ # send IP into the tunnel to be fragmented
+ tx = self.create_stream_ip4(self.pg0, "10.0.0.3",
+ payload_size=1500)
+ rx = self.send_and_expect(self.pg0, tx, self.pg0, len(tx)*2)
+
+ fake_tx = []
+ for p in tx:
+ fake_tx.append(p)
+ fake_tx.append(p)
+ self.verify_capture_tunneled_ip4(self.pg0, rx, fake_tx,
+ [VppMplsLabel(44),
+ VppMplsLabel(46)])
+
+ # send MPLS into the tunnel to be fragmented
+ tx = self.create_stream_ip4(self.pg0, "10.0.0.4",
+ payload_size=1500)
+ rx = self.send_and_expect(self.pg0, tx, self.pg0, len(tx)*2)
+
+ fake_tx = []
+ for p in tx:
+ fake_tx.append(p)
+ fake_tx.append(p)
+ self.verify_capture_tunneled_ip4(self.pg0, rx, fake_tx,
+ [VppMplsLabel(44),
+ VppMplsLabel(46),
+ VppMplsLabel(33, ttl=255)])
+
def test_tunnel_uniform(self):
""" MPLS Tunnel Tests - Uniform """
diff --git a/test/vpp_interface.py b/test/vpp_interface.py
index 431a03a6858..a5f6f45fdaf 100644
--- a/test/vpp_interface.py
+++ b/test/vpp_interface.py
@@ -495,3 +495,15 @@ class VppInterface(object):
def get_tx_stats(self):
c = self.test.statistics.get_counter("^/if/tx$")
return c[0][self.sw_if_index]
+
+ def set_l3_mtu(self, mtu):
+ self.test.vapi.sw_interface_set_mtu(self.sw_if_index, [mtu, 0, 0, 0])
+
+ def set_ip4_mtu(self, mtu):
+ self.test.vapi.sw_interface_set_mtu(self.sw_if_index, [0, mtu, 0, 0])
+
+ def set_ip6_mtu(self, mtu):
+ self.test.vapi.sw_interface_set_mtu(self.sw_if_index, [0, 0, mtu, 0])
+
+ def set_mpls_mtu(self, mtu):
+ self.test.vapi.sw_interface_set_mtu(self.sw_if_index, [0, 0, 0, mtu])