aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
authorOle Troan <ot@cisco.com>2019-10-09 13:33:19 +0200
committerNeale Ranns <nranns@cisco.com>2019-11-11 12:33:36 +0000
commiteb284a1f8f10d752285a0a59e75bc54acae50779 (patch)
tree4ce777b223a51fa70f9e0dc18e7842b488b81cd6 /src/vnet
parentd318a996b7bdcf0246b2d9927a918a3773a88fa6 (diff)
ip: functional interface to ip fragmentation
This provides a functional interface to IP fragmentation. Allowing external features to fragment. Supports arbitrary encap size, for e.g. MPLS or inner fragmentation of tunnels. This also removed dual loop in MAP that was fundamentally broken. Type: fix Signed-off-by: Ole Troan <ot@cisco.com> Change-Id: Ia89ecec8ee3cbe2416edbe87630fdb714898c2a8 Signed-off-by: Ole Troan <ot@cisco.com>
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/ip/ip4_forward.c16
-rw-r--r--src/vnet/ip/ip6_forward.c15
-rw-r--r--src/vnet/ip/ip_frag.c181
-rw-r--r--src/vnet/ip/ip_frag.h45
-rw-r--r--src/vnet/mpls/mpls_output.c218
5 files changed, 279 insertions, 196 deletions
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 1550b313915..44a681926b3 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -2293,8 +2293,8 @@ typedef enum
always_inline void
ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
- u16 adj_packet_bytes, bool df, u16 * next, u32 * error,
- u8 is_midchain)
+ u16 adj_packet_bytes, bool df, u16 * next,
+ u8 is_midchain, u32 * error)
{
if (packet_len > adj_packet_bytes)
{
@@ -2312,8 +2312,8 @@ ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
/* IP fragmentation */
ip_frag_set_vnet_buffer (b, adj_packet_bytes,
(is_midchain ?
- IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN :
- IP4_FRAG_NEXT_IP4_REWRITE), 0);
+ IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
+ IP_FRAG_NEXT_IP_REWRITE), 0);
*next = IP4_REWRITE_NEXT_FRAGMENT;
}
}
@@ -2486,12 +2486,12 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0, is_midchain);
+ next + 0, is_midchain, &error0);
ip4_mtu_check (b[1], ip1_len,
adj1[0].rewrite_header.max_l3_packet_bytes,
ip1->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 1, &error1, is_midchain);
+ next + 1, is_midchain, &error1);
if (is_mcast)
{
@@ -2660,7 +2660,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0, is_midchain);
+ next + 0, is_midchain, &error0);
if (is_mcast)
{
@@ -2758,7 +2758,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0, is_midchain);
+ next + 0, is_midchain, &error0);
if (is_mcast)
{
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 50de501fe0d..9656621c13b 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1652,7 +1652,7 @@ typedef enum
always_inline void
ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
u16 adj_packet_bytes, bool is_locally_generated,
- u32 * next, u32 * error)
+ u32 * next, u8 is_midchain, u32 * error)
{
if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes)
{
@@ -1660,7 +1660,9 @@ ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
{
/* IP fragmentation */
ip_frag_set_vnet_buffer (b, adj_packet_bytes,
- IP6_FRAG_NEXT_IP6_REWRITE, 0);
+ (is_midchain ?
+ IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
+ IP_FRAG_NEXT_IP_REWRITE), 0);
*next = IP6_REWRITE_NEXT_FRAGMENT;
*error = IP6_ERROR_MTU_EXCEEDED;
}
@@ -1840,10 +1842,12 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
ip6_mtu_check (p0, ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
- is_locally_originated0, &next0, &error0);
+ is_locally_originated0, &next0, is_midchain,
+ &error0);
ip6_mtu_check (p1, ip1_len,
adj1[0].rewrite_header.max_l3_packet_bytes,
- is_locally_originated1, &next1, &error1);
+ is_locally_originated1, &next1, is_midchain,
+ &error1);
/* Don't adjust the buffer for hop count issue; icmp-error node
* wants to see the IP header */
@@ -2011,7 +2015,8 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
ip6_mtu_check (p0, ip0_len,
adj0[0].rewrite_header.max_l3_packet_bytes,
- is_locally_originated0, &next0, &error0);
+ is_locally_originated0, &next0, is_midchain,
+ &error0);
/* Don't adjust the buffer for hop count issue; icmp-error node
* wants to see the IP header */
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index 54efb63c986..9aa8777514f 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -23,26 +23,6 @@
#include <vnet/ip/ip.h>
-/*
- * Copy the mpls header if present.
- * The current is pointing to the ip header.
- * Adjust the buffer and point to the mpls headers on these fragments
- * before sending the packet back to mpls-output node.
- */
-static inline void
-copy_mpls_hdr (vlib_buffer_t * to_b, vlib_buffer_t * from_b)
-{
- if ((vnet_buffer (from_b)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
- {
- u8 mpls_hdr_length = vnet_buffer (from_b)->mpls.mpls_hdr_length;
- u8 *org_from_mpls_packet =
- from_b->data + (from_b->current_data - mpls_hdr_length);
- clib_memcpy_fast ((to_b->data - mpls_hdr_length), org_from_mpls_packet,
- mpls_hdr_length);
- vlib_buffer_advance (to_b, -vnet_buffer (to_b)->mpls.mpls_hdr_length);
- }
-}
-
typedef struct
{
u8 ipv6;
@@ -87,14 +67,6 @@ frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from)
vnet_buffer2 (to)->qos = vnet_buffer2 (from)->qos;
to->flags |= VNET_BUFFER_F_QOS_DATA_VALID;
}
-
- /* Copy mpls opaque data */
- if ((vnet_buffer (from)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
- {
- vnet_buffer (to)->mpls.pyld_proto = vnet_buffer (from)->mpls.pyld_proto;
- vnet_buffer (to)->mpls.mpls_hdr_length =
- vnet_buffer (from)->mpls.mpls_hdr_length;
- }
}
static vlib_buffer_t *
@@ -116,20 +88,20 @@ frag_buffer_alloc (vlib_buffer_t * org_b, u32 * bi)
* but does not generate buffer chains. I.e. a fragment is always
* contained with in a single buffer and limited to the max buffer
* size.
+ * from_bi: current pointer must point to IPv4 header
*/
-void
-ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
- ip_frag_error_t * error)
+ip_frag_error_t
+ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
+ u16 l2unfragmentablesize, u32 ** buffer)
{
vlib_buffer_t *from_b;
ip4_header_t *ip4;
- u16 mtu, len, max, rem, ip_frag_id, ip_frag_offset;
+ u16 len, max, rem, ip_frag_id, ip_frag_offset;
u8 *org_from_packet, more;
from_b = vlib_get_buffer (vm, from_bi);
- mtu = vnet_buffer (from_b)->ip_frag.mtu;
org_from_packet = vlib_buffer_get_current (from_b);
- ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b);
+ ip4 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
max =
@@ -139,21 +111,18 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t)))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
if (mtu < sizeof (ip4_header_t))
{
- *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
- return;
+ return IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
}
if (ip4->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))
{
- *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
- return;
+ return IP_FRAG_ERROR_DONT_FRAGMENT_SET;
}
if (ip4_is_fragment (ip4))
@@ -174,7 +143,8 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
u8 *from_data = (void *) (ip4 + 1);
vlib_buffer_t *org_from_b = from_b;
u16 fo = 0;
- u16 left_in_from_buffer = from_b->current_length - sizeof (ip4_header_t);
+ u16 left_in_from_buffer =
+ from_b->current_length - (l2unfragmentablesize + sizeof (ip4_header_t));
u16 ptr = 0;
/* Do the actual fragmentation */
@@ -190,17 +160,19 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
len &= ~0x7;
if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
{
- *error = IP_FRAG_ERROR_MEMORY;
- return;
+ return IP_FRAG_ERROR_MEMORY;
}
vec_add1 (*buffer, to_bi);
frag_set_sw_if_index (to_b, org_from_b);
/* Copy ip4 header */
- clib_memcpy_fast (to_b->data, org_from_packet, sizeof (ip4_header_t));
- to_ip4 = vlib_buffer_get_current (to_b);
+ to_data = vlib_buffer_get_current (to_b);
+ clib_memcpy_fast (to_data, org_from_packet,
+ l2unfragmentablesize + sizeof (ip4_header_t));
+ to_ip4 = (ip4_header_t *) (to_data + l2unfragmentablesize);
to_data = (void *) (to_ip4 + 1);
vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data;
+ vlib_buffer_copy_trace_flag (vm, from_b, to_bi);
to_b->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
if (from_b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID)
@@ -232,8 +204,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
/* Move buffer */
if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
from_b = vlib_get_buffer (vm, from_b->next_buffer);
from_data = (u8 *) vlib_buffer_get_current (from_b);
@@ -242,8 +213,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
to_ptr += bytes_to_copy;
}
- to_b->current_length = len + sizeof (ip4_header_t);
to_b->flags |= VNET_BUFFER_F_IS_IP4;
+ to_b->current_length =
+ len + sizeof (ip4_header_t) + l2unfragmentablesize;
to_ip4->fragment_id = ip_frag_id;
to_ip4->flags_and_fragment_offset =
@@ -256,31 +228,11 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
/* we've just done the IP checksum .. */
to_b->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
- if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
- {
- /* Encapsulating ipv4 header */
- ip4_header_t *encap_header4 =
- (ip4_header_t *) vlib_buffer_get_current (to_b);
- encap_header4->length = clib_host_to_net_u16 (to_b->current_length);
- encap_header4->checksum = ip4_header_checksum (encap_header4);
- }
- else if (vnet_buffer (org_from_b)->
- ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
- {
- /* Encapsulating ipv6 header */
- ip6_header_t *encap_header6 =
- (ip6_header_t *) vlib_buffer_get_current (to_b);
- encap_header6->payload_length =
- clib_host_to_net_u16 (to_b->current_length -
- sizeof (*encap_header6));
- }
-
- /* Copy mpls header if present */
- copy_mpls_hdr (to_b, org_from_b);
-
rem -= len;
fo += len;
}
+
+ return IP_FRAG_ERROR_NONE;
}
void
@@ -322,19 +274,19 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
pi0 = from[0];
from += 1;
n_left_from -= 1;
- error0 = IP_FRAG_ERROR_NONE;
p0 = vlib_get_buffer (vm, pi0);
+ u16 mtu = vnet_buffer (p0)->ip_frag.mtu;
if (is_ip6)
- ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
+ error0 = ip6_frag_do_fragment (vm, pi0, mtu, 0, &buffer);
else
- ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
+ error0 = ip4_frag_do_fragment (vm, pi0, mtu, 0, &buffer);
if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
{
ip_frag_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
+ tr->mtu = mtu;
tr->ipv6 = is_ip6 ? 1 : 0;
tr->n_fragments = vec_len (buffer);
tr->next = vnet_buffer (p0)->ip_frag.next_index;
@@ -345,20 +297,13 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
vnet_buffer (p0)->ip_frag.mtu);
- next0 = IP4_FRAG_NEXT_ICMP_ERROR;
+ next0 = IP_FRAG_NEXT_ICMP_ERROR;
}
else
{
- if (is_ip6)
- next0 =
- (error0 ==
- IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
- ip_frag.next_index : IP6_FRAG_NEXT_DROP;
- else
- next0 =
- (error0 ==
- IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
- ip_frag.next_index : IP4_FRAG_NEXT_DROP;
+ next0 = (error0 == IP_FRAG_ERROR_NONE ?
+ vnet_buffer (p0)->ip_frag.next_index :
+ IP_FRAG_NEXT_DROP);
}
if (error0 == IP_FRAG_ERROR_NONE)
@@ -431,18 +376,20 @@ ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
/*
* Fragments the packet given in from_bi. Fragments are returned in the buffer vector.
* Caller must ensure the original packet is freed.
+ * from_bi: current pointer must point to IPv6 header
*/
-void
-ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
- ip_frag_error_t * error)
+ip_frag_error_t
+ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
+ u16 l2unfragmentablesize, u32 ** buffer)
{
vlib_buffer_t *from_b;
ip6_header_t *ip6;
- u16 mtu, len, max, rem, ip_frag_id;
+ u16 len, max, rem, ip_frag_id;
+ u8 *org_from_packet;
from_b = vlib_get_buffer (vm, from_bi);
- mtu = vnet_buffer (from_b)->ip_frag.mtu;
- ip6 = (ip6_header_t *) vlib_buffer_get_current (from_b);
+ org_from_packet = vlib_buffer_get_current (from_b);
+ ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
rem = clib_net_to_host_u16 (ip6->payload_length);
max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct??
@@ -450,21 +397,20 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
/* TODO: Look through header chain for fragmentation header */
if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
u8 *from_data = (void *) (ip6 + 1);
vlib_buffer_t *org_from_b = from_b;
u16 fo = 0;
- u16 left_in_from_buffer = from_b->current_length - sizeof (ip6_header_t);
+ u16 left_in_from_buffer =
+ from_b->current_length - (l2unfragmentablesize + sizeof (ip6_header_t));
u16 ptr = 0;
ip_frag_id = ++running_fragment_id; // Fix
@@ -485,14 +431,14 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
len &= ~0x7;
if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
{
- *error = IP_FRAG_ERROR_MEMORY;
- return;
+ return IP_FRAG_ERROR_MEMORY;
}
vec_add1 (*buffer, to_bi);
frag_set_sw_if_index (to_b, org_from_b);
/* Copy ip6 header */
- clib_memcpy_fast (to_b->data, ip6, sizeof (ip6_header_t));
+ clib_memcpy_fast (to_b->data, org_from_packet,
+ l2unfragmentablesize + sizeof (ip6_header_t));
to_ip6 = vlib_buffer_get_current (to_b);
to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
to_data = (void *) (to_frag_hdr + 1);
@@ -530,8 +476,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
/* Move buffer */
if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
{
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
+ return IP_FRAG_ERROR_MALFORMED;
}
from_b = vlib_get_buffer (vm, from_b->next_buffer);
from_data = (u8 *) vlib_buffer_get_current (from_b);
@@ -551,12 +496,11 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
to_frag_hdr->next_hdr = ip6->protocol;
to_frag_hdr->rsv = 0;
- /* Copy mpls header if present */
- copy_mpls_hdr (to_b, org_from_b);
-
rem -= len;
fo += len;
}
+
+ return IP_FRAG_ERROR_NONE;
}
static char *ip4_frag_error_strings[] = {
@@ -576,15 +520,14 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
.n_errors = IP_FRAG_N_ERROR,
.error_strings = ip4_frag_error_strings,
- .n_next_nodes = IP4_FRAG_N_NEXT,
+ .n_next_nodes = IP_FRAG_N_NEXT,
.next_nodes = {
- [IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite",
- [IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN] = "ip4-midchain",
- [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
- [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [IP4_FRAG_NEXT_DROP] = "ip4-drop"
+ [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP_FRAG_NEXT_DROP] = "ip4-drop"
},
};
/* *INDENT-ON* */
@@ -600,14 +543,14 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
.n_errors = IP_FRAG_N_ERROR,
.error_strings = ip4_frag_error_strings,
- .n_next_nodes = IP6_FRAG_N_NEXT,
+ .n_next_nodes = IP_FRAG_N_NEXT,
.next_nodes = {
- [IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite",
- [IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN] = "ip6-midchain",
- [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
- [IP6_FRAG_NEXT_DROP] = "ip6-drop"
+ [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
+ [IP_FRAG_NEXT_DROP] = "ip6-drop"
},
};
/* *INDENT-ON* */
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
index ce4236b8465..86462e6c7d2 100644
--- a/src/vnet/ip/ip_frag.h
+++ b/src/vnet/ip/ip_frag.h
@@ -39,7 +39,6 @@
#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
-#define IP_FRAG_FLAG_MPLS_HEADER 0x04 //Encapsulating MPLS header
#define IP4_FRAG_NODE_NAME "ip4-frag"
#define IP6_FRAG_NODE_NAME "ip6-frag"
@@ -49,26 +48,14 @@ extern vlib_node_registration_t ip6_frag_node;
typedef enum
{
- IP4_FRAG_NEXT_IP4_REWRITE,
- IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN,
- IP4_FRAG_NEXT_IP4_LOOKUP,
- IP4_FRAG_NEXT_IP6_LOOKUP,
- IP4_FRAG_NEXT_MPLS_OUTPUT,
- IP4_FRAG_NEXT_ICMP_ERROR,
- IP4_FRAG_NEXT_DROP,
- IP4_FRAG_N_NEXT
-} ip4_frag_next_t;
-
-typedef enum
-{
- IP6_FRAG_NEXT_IP4_LOOKUP,
- IP6_FRAG_NEXT_IP6_LOOKUP,
- IP6_FRAG_NEXT_IP6_REWRITE,
- IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN,
- IP6_FRAG_NEXT_MPLS_OUTPUT,
- IP6_FRAG_NEXT_DROP,
- IP6_FRAG_N_NEXT
-} ip6_frag_next_t;
+ IP_FRAG_NEXT_IP_REWRITE,
+ IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN,
+ IP_FRAG_NEXT_IP4_LOOKUP,
+ IP_FRAG_NEXT_IP6_LOOKUP,
+ IP_FRAG_NEXT_ICMP_ERROR,
+ IP_FRAG_NEXT_DROP,
+ IP_FRAG_N_NEXT
+} ip_frag_next_t;
#define foreach_ip_frag_error \
/* Must be first. */ \
@@ -91,12 +78,16 @@ typedef enum
void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu,
u8 next_index, u8 flags);
-void
-ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
- ip_frag_error_t * error);
-void
-ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
- ip_frag_error_t * error);
+
+extern ip_frag_error_t ip4_frag_do_fragment (vlib_main_t * vm,
+ u32 from_bi,
+ u16 mtu,
+ u16 encapsize, u32 ** buffer);
+extern ip_frag_error_t ip6_frag_do_fragment (vlib_main_t * vm,
+ u32 from_bi,
+ u16 mtu,
+ u16 encapsize, u32 ** buffer);
+
#endif /* ifndef IP_FRAG_H */
/*
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 5ede22aa410..247f531df9f 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -34,8 +34,7 @@ typedef enum {
#define foreach_mpls_output_next \
_(DROP, "error-drop") \
-_(IP4_FRAG, "ip4-frag") \
-_(IP6_FRAG, "ip6-frag")
+_(FRAG, "mpls-frag")
typedef enum {
#define _(s,n) MPLS_OUTPUT_NEXT_##s,
@@ -58,31 +57,6 @@ format_mpls_output_trace (u8 * s, va_list * args)
return s;
}
-/*
- * Save the mpls header length and adjust the current to ip header
- */
-static inline u32
-set_mpls_fragmentation(vlib_buffer_t * p0, ip_adjacency_t * adj0)
-{
- u32 next0;
-
- /* advance size of (all) mpls header to ip header before fragmenting */
- /* save the current pointing to first mpls header. */
- vnet_buffer (p0)->mpls.mpls_hdr_length = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
- vlib_buffer_advance (p0, vnet_buffer (p0)->mpls.mpls_hdr_length);
-
- /* IP fragmentation */
- ip_frag_set_vnet_buffer (p0, adj0[0].rewrite_header.max_l3_packet_bytes,
- IP4_FRAG_NEXT_MPLS_OUTPUT,
- ((vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4) ? IP_FRAG_FLAG_IP4_HEADER:IP_FRAG_FLAG_IP6_HEADER));
-
- /* Tell ip_frag to retain certain mpls parameters after fragmentation of mpls packet */
- vnet_buffer (p0)->ip_frag.flags = (vnet_buffer (p0)->ip_frag.flags | IP_FRAG_FLAG_MPLS_HEADER);
- next0 = (vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4)? MPLS_OUTPUT_NEXT_IP4_FRAG:MPLS_OUTPUT_NEXT_IP6_FRAG;
-
- return next0;
-}
-
static inline uword
mpls_output_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -196,7 +170,7 @@ mpls_output_inline (vlib_main_t * vm,
else
{
error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = set_mpls_fragmentation (p0, adj0);
+ next0 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
@@ -219,7 +193,7 @@ mpls_output_inline (vlib_main_t * vm,
else
{
error1 = IP4_ERROR_MTU_EXCEEDED;
- next1 = set_mpls_fragmentation (p1, adj1);
+ next1 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
@@ -308,7 +282,7 @@ mpls_output_inline (vlib_main_t * vm,
else
{
error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = set_mpls_fragmentation (p0, adj0);
+ next0 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
@@ -371,11 +345,9 @@ VLIB_REGISTER_NODE (mpls_output_node) = {
.n_next_nodes = MPLS_OUTPUT_N_NEXT,
.next_nodes = {
-#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n,
- foreach_mpls_output_next
-#undef _
- },
-
+ [MPLS_OUTPUT_NEXT_DROP] = "mpls-drop",
+ [MPLS_OUTPUT_NEXT_FRAG] = "mpls-frag",
+ },
.format_trace = format_mpls_output_trace,
};
@@ -390,12 +362,184 @@ VLIB_REGISTER_NODE (mpls_midchain_node) = {
.name = "mpls-midchain",
.vector_size = sizeof (u32),
- .format_trace = format_mpls_output_trace,
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
.sibling_of = "mpls-output",
+ .format_trace = format_mpls_output_trace,
};
-/**
+static char *mpls_frag_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_frag_error
+#undef _
+};
+
+typedef struct mpls_frag_trace_t_
+{
+ u16 pkt_size;
+ u16 mtu;
+} mpls_frag_trace_t;
+
+typedef enum
+{
+ MPLS_FRAG_NEXT_REWRITE,
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
+ MPLS_FRAG_NEXT_ICMP_ERROR,
+ MPLS_FRAG_NEXT_DROP,
+ MPLS_FRAG_N_NEXT,
+} mpls_frag_next_t;
+
+static uword
+mpls_frag (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags;
+ vlib_node_runtime_t * error_node;
+
+ error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ frags = NULL;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ mpls_frag_next_t next0;
+ u32 pi0, adj_index0;
+ ip_frag_error_t error0 = IP_FRAG_ERROR_NONE;
+ i16 encap_size;
+ u8 is_ip4;
+
+ pi0 = to_next[0] = from[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ from += 1;
+ n_left_from -= 1;
+ is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4;
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get(adj_index0);
+
+ /* the size of the MPLS stack */
+ encap_size = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
+
+ /* IP fragmentation */
+ if (is_ip4)
+ error0 = ip4_frag_do_fragment (vm, pi0,
+ adj0->rewrite_header.max_l3_packet_bytes,
+ encap_size, &frags);
+ else
+ error0 = ip6_frag_do_fragment (vm, pi0,
+ adj0->rewrite_header.max_l3_packet_bytes,
+ encap_size, &frags);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->mtu = adj0->rewrite_header.max_l3_packet_bytes;
+ tr->pkt_size = vlib_buffer_length_in_chain(vm, p0);
+ }
+
+ if (PREDICT_TRUE(error0 == IP_FRAG_ERROR_NONE))
+ {
+ /* Free original buffer chain */
+ vlib_buffer_free_one (vm, pi0); /* Free original packet */
+ next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ?
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN :
+ MPLS_FRAG_NEXT_REWRITE);
+ }
+ else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+ {
+ icmp4_error_set_vnet_buffer (
+ p0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ vnet_buffer (p0)->ip_frag.mtu);
+ next0 = MPLS_FRAG_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ vlib_error_count (vm, next_index, error0, 1);
+ vec_add1 (frags, pi0); /* Get rid of the original buffer */
+ next0 = MPLS_FRAG_NEXT_DROP;
+ }
+
+ /* Send fragments that were added in the frame */
+ u32 *frag_from, frag_left;
+
+ frag_from = frags;
+ frag_left = vec_len (frags);
+
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, i);
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (frags);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vec_free (frags);
+
+ return frame->n_vectors;
+}
+
+static u8 *
+format_mpls_frag_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_frag_trace_t *t = va_arg (*args, mpls_frag_trace_t *);
+
+ s = format (s, "mtu:%d pkt-size:%d", t->mtu, t->pkt_size);
+ return s;
+}
+
+VLIB_REGISTER_NODE (mpls_frag_node) = {
+ .function = mpls_frag,
+ .name = "mpls-frag",
+ .vector_size = sizeof (u32),
+ .format_trace = format_mpls_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = mpls_frag_error_strings,
+
+ .n_next_nodes = MPLS_FRAG_N_NEXT,
+ .next_nodes = {
+ [MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
+ [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
+ [MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [MPLS_FRAG_NEXT_DROP] = "mpls-drop"
+ },
+};
+
+/*
* @brief Next index values from the MPLS incomplete adj node
*/
#define foreach_mpls_adj_incomplete_next \