aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRajesh Goel <rajegoel@cisco.com>2019-10-06 13:17:36 +0530
committerAndrew Yourtchenko <ayourtch@gmail.com>2019-10-22 11:15:17 +0000
commitbe2d536f87e996ba0067af0ea5fab6dd45d20822 (patch)
treec8f564fe9283e7addb0895de153a96ceadfe470b
parentd3b170254bc41495cca90c6bfe77ffb02aaa661a (diff)
mpls: support fragmentation of mpls output packet
Type: fix Signed-off-by: Rajesh Goel <rajegoel@cisco.com> Change-Id: Ie4372c5cf58ab215cdec5ce56f8a994daaba2844 (cherry picked from commit d6f1c9c5141c177a14d011a514e392a9357398fb)
-rw-r--r--src/vnet/buffer.h4
-rw-r--r--src/vnet/dpo/mpls_label_dpo.c18
-rw-r--r--src/vnet/ip/ip_frag.c40
-rw-r--r--src/vnet/ip/ip_frag.h3
-rw-r--r--src/vnet/mpls/error.def1
-rw-r--r--src/vnet/mpls/mpls_output.c70
-rw-r--r--test/test_mpls.py56
7 files changed, 176 insertions, 16 deletions
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index 5a3bc3fc279..413d721222d 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -221,8 +221,12 @@ typedef struct
u8 ttl;
u8 exp;
u8 first;
+ u8 pyld_proto:3; /* dpo_proto_t */
+ u8 rsvd:5;
/* Rewrite length */
u32 save_rewrite_length;
+ /* Save the mpls header length including all label stack */
+ u8 mpls_hdr_length;
/*
* BIER - the number of bytes in the header.
* the len field in the header is not authoritative. It's the
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index 1074a959310..9d147f98f13 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -484,6 +484,12 @@ mpls_label_imposition_inline (vlib_main_t * vm,
exp2 = ip_dscp_to_mpls_exp(ip2->tos);
exp3 = ip_dscp_to_mpls_exp(ip3->tos);
}
+
+ /* save the payload proto information in mpls opaque */
+ vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP4;
+ vnet_buffer(b1)->mpls.pyld_proto = DPO_PROTO_IP4;
+ vnet_buffer(b2)->mpls.pyld_proto = DPO_PROTO_IP4;
+ vnet_buffer(b3)->mpls.pyld_proto = DPO_PROTO_IP4;
}
else if (DPO_PROTO_IP6 == dproto)
{
@@ -518,6 +524,12 @@ mpls_label_imposition_inline (vlib_main_t * vm,
exp3 = ip_dscp_to_mpls_exp(
ip6_traffic_class_network_order(ip3));
}
+
+ /* save the payload proto information in mpls opaque */
+ vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP6;
+ vnet_buffer(b1)->mpls.pyld_proto = DPO_PROTO_IP6;
+ vnet_buffer(b2)->mpls.pyld_proto = DPO_PROTO_IP6;
+ vnet_buffer(b3)->mpls.pyld_proto = DPO_PROTO_IP6;
}
else
{
@@ -787,6 +799,9 @@ mpls_label_imposition_inline (vlib_main_t * vm,
ttl0 = ip0->ttl;
exp0 = ip_dscp_to_mpls_exp(ip0->tos);
}
+
+ /* save the payload proto information in mpls opaque */
+ vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP4;
}
else if (DPO_PROTO_IP6 == dproto)
{
@@ -805,6 +820,9 @@ mpls_label_imposition_inline (vlib_main_t * vm,
exp0 = ip_dscp_to_mpls_exp(
ip6_traffic_class_network_order(ip0));
}
+
+ /* save the payload proto information in mpls opaque */
+ vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP6;
}
else
{
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index fd5bc6fa0ba..230722c45db 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -23,6 +23,25 @@
#include <vnet/ip/ip.h>
+/*
+ * Copy the mpls header if present.
+ * The current is pointing to the ip header.
+ * Adjust the buffer and point to the mpls headers on these fragments
+ * before sending the packet back to mpls-output node.
+ */
+static inline void
+copy_mpls_hdr (vlib_buffer_t * to_b, vlib_buffer_t * from_b)
+{
+ if ((vnet_buffer (from_b)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
+ {
+ u8 mpls_hdr_length = vnet_buffer (from_b)->mpls.mpls_hdr_length;
+ u8 *org_from_mpls_packet =
+ from_b->data + (from_b->current_data - mpls_hdr_length);
+ clib_memcpy_fast ((to_b->data - mpls_hdr_length), org_from_mpls_packet,
+ mpls_hdr_length);
+ vlib_buffer_advance (to_b, -vnet_buffer (to_b)->mpls.mpls_hdr_length);
+ }
+}
typedef struct
{
@@ -38,8 +57,8 @@ format_ip_frag_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
- s = format (s, "IPv%s mtu: %u fragments: %u",
- t->ipv6 ? "6" : "4", t->mtu, t->n_fragments);
+ s = format (s, "IPv%s mtu: %u fragments: %u next: %d",
+ t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next);
return s;
}
@@ -68,6 +87,14 @@ frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from)
vnet_buffer2 (to)->qos = vnet_buffer2 (from)->qos;
to->flags |= VNET_BUFFER_F_QOS_DATA_VALID;
}
+
+ /* Copy mpls opaque data */
+ if ((vnet_buffer (from)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
+ {
+ vnet_buffer (to)->mpls.pyld_proto = vnet_buffer (from)->mpls.pyld_proto;
+ vnet_buffer (to)->mpls.mpls_hdr_length =
+ vnet_buffer (from)->mpls.mpls_hdr_length;
+ }
}
static vlib_buffer_t *
@@ -232,6 +259,10 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
clib_host_to_net_u16 (to_b->current_length -
sizeof (*encap_header6));
}
+
+ /* Copy mpls header if present */
+ copy_mpls_hdr (to_b, org_from_b);
+
rem -= len;
fo += len;
}
@@ -492,6 +523,9 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
to_frag_hdr->next_hdr = ip6->protocol;
to_frag_hdr->rsv = 0;
+ /* Copy mpls header if present */
+ copy_mpls_hdr (to_b, org_from_b);
+
rem -= len;
fo += len;
}
@@ -519,6 +553,7 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
[IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite",
[IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
[IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
[IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
[IP4_FRAG_NEXT_DROP] = "ip4-drop"
},
@@ -541,6 +576,7 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
[IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite",
[IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
[IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
[IP6_FRAG_NEXT_DROP] = "ip6-drop"
},
};
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
index 06eeee82bc6..b66db416129 100644
--- a/src/vnet/ip/ip_frag.h
+++ b/src/vnet/ip/ip_frag.h
@@ -39,6 +39,7 @@
#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
+#define IP_FRAG_FLAG_MPLS_HEADER 0x04 //Encapsulating MPLS header
#define IP4_FRAG_NODE_NAME "ip4-frag"
#define IP6_FRAG_NODE_NAME "ip6-frag"
@@ -51,6 +52,7 @@ typedef enum
IP4_FRAG_NEXT_IP4_REWRITE,
IP4_FRAG_NEXT_IP4_LOOKUP,
IP4_FRAG_NEXT_IP6_LOOKUP,
+ IP4_FRAG_NEXT_MPLS_OUTPUT,
IP4_FRAG_NEXT_ICMP_ERROR,
IP4_FRAG_NEXT_DROP,
IP4_FRAG_N_NEXT
@@ -61,6 +63,7 @@ typedef enum
IP6_FRAG_NEXT_IP4_LOOKUP,
IP6_FRAG_NEXT_IP6_LOOKUP,
IP6_FRAG_NEXT_IP6_REWRITE,
+ IP6_FRAG_NEXT_MPLS_OUTPUT,
IP6_FRAG_NEXT_DROP,
IP6_FRAG_N_NEXT
} ip6_frag_next_t;
diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def
index 34a46522894..9941b18baf4 100644
--- a/src/vnet/mpls/error.def
+++ b/src/vnet/mpls/error.def
@@ -20,6 +20,7 @@ mpls_error (UNKNOWN_PROTOCOL, "unknown protocol")
mpls_error (UNSUPPORTED_VERSION, "unsupported version")
mpls_error (PKTS_DECAP, "MPLS input packets decapsulated")
mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated")
+mpls_error (PKTS_NEED_FRAG, "MPLS output packets needs fragmentation")
mpls_error (NO_LABEL, "MPLS no label for fib/dst")
mpls_error (TTL_EXPIRED, "MPLS ttl expired")
mpls_error (S_NOT_SET, "MPLS s-bit not set")
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 68577e711cc..5ede22aa410 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -19,6 +19,7 @@
#include <vnet/pg/pg.h>
#include <vnet/ip/ip.h>
#include <vnet/mpls/mpls.h>
+#include <vnet/ip/ip_frag.h>
typedef struct {
/* Adjacency taken. */
@@ -26,8 +27,15 @@ typedef struct {
u32 flow_hash;
} mpls_output_trace_t;
+typedef enum {
+ MPLS_OUTPUT_MODE,
+ MPLS_OUTPUT_MIDCHAIN_MODE
+}mpls_output_mode_t;
+
#define foreach_mpls_output_next \
-_(DROP, "error-drop")
+_(DROP, "error-drop") \
+_(IP4_FRAG, "ip4-frag") \
+_(IP6_FRAG, "ip6-frag")
typedef enum {
#define _(s,n) MPLS_OUTPUT_NEXT_##s,
@@ -50,11 +58,36 @@ format_mpls_output_trace (u8 * s, va_list * args)
return s;
}
+/*
+ * Save the mpls header length and adjust the current to ip header
+ */
+static inline u32
+set_mpls_fragmentation(vlib_buffer_t * p0, ip_adjacency_t * adj0)
+{
+ u32 next0;
+
+ /* advance size of (all) mpls header to ip header before fragmenting */
+ /* save the current pointing to first mpls header. */
+ vnet_buffer (p0)->mpls.mpls_hdr_length = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
+ vlib_buffer_advance (p0, vnet_buffer (p0)->mpls.mpls_hdr_length);
+
+ /* IP fragmentation */
+ ip_frag_set_vnet_buffer (p0, adj0[0].rewrite_header.max_l3_packet_bytes,
+ IP4_FRAG_NEXT_MPLS_OUTPUT,
+ ((vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4) ? IP_FRAG_FLAG_IP4_HEADER:IP_FRAG_FLAG_IP6_HEADER));
+
+ /* Tell ip_frag to retain certain mpls parameters after fragmentation of mpls packet */
+ vnet_buffer (p0)->ip_frag.flags = (vnet_buffer (p0)->ip_frag.flags | IP_FRAG_FLAG_MPLS_HEADER);
+ next0 = (vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4)? MPLS_OUTPUT_NEXT_IP4_FRAG:MPLS_OUTPUT_NEXT_IP6_FRAG;
+
+ return next0;
+}
+
static inline uword
mpls_output_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame,
- int is_midchain)
+ mpls_output_mode_t mode)
{
u32 n_left_from, next_index, * from, * to_next, thread_index;
vlib_node_runtime_t * error_node;
@@ -162,8 +195,11 @@ mpls_output_inline (vlib_main_t * vm,
}
else
{
- error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = MPLS_OUTPUT_NEXT_DROP;
+ error0 = IP4_ERROR_MTU_EXCEEDED;
+ next0 = set_mpls_fragmentation (p0, adj0);
+ vlib_node_increment_counter (vm, mpls_output_node.index,
+ MPLS_ERROR_PKTS_NEED_FRAG,
+ 1);
}
if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <=
adj1[0].rewrite_header.max_l3_packet_bytes))
@@ -182,10 +218,13 @@ mpls_output_inline (vlib_main_t * vm,
}
else
{
- error1 = IP4_ERROR_MTU_EXCEEDED;
- next1 = MPLS_OUTPUT_NEXT_DROP;
+ error1 = IP4_ERROR_MTU_EXCEEDED;
+ next1 = set_mpls_fragmentation (p1, adj1);
+ vlib_node_increment_counter (vm, mpls_output_node.index,
+ MPLS_ERROR_PKTS_NEED_FRAG,
+ 1);
}
- if (is_midchain)
+ if (mode == MPLS_OUTPUT_MIDCHAIN_MODE)
{
adj0->sub_type.midchain.fixup_func
(vm, adj0, p0,
@@ -221,7 +260,7 @@ mpls_output_inline (vlib_main_t * vm,
while (n_left_from > 0 && n_left_to_next > 0)
{
ip_adjacency_t * adj0;
- mpls_unicast_header_t *hdr0;
+ mpls_unicast_header_t *hdr0;
vlib_buffer_t * p0;
u32 pi0, adj_index0, next0, error0;
word rw_len0;
@@ -233,7 +272,7 @@ mpls_output_inline (vlib_main_t * vm,
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
adj0 = adj_get(adj_index0);
- hdr0 = vlib_buffer_get_current (p0);
+ hdr0 = vlib_buffer_get_current (p0);
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_one_header (adj0[0], hdr0,
@@ -268,10 +307,13 @@ mpls_output_inline (vlib_main_t * vm,
}
else
{
- error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = MPLS_OUTPUT_NEXT_DROP;
+ error0 = IP4_ERROR_MTU_EXCEEDED;
+ next0 = set_mpls_fragmentation (p0, adj0);
+ vlib_node_increment_counter (vm, mpls_output_node.index,
+ MPLS_ERROR_PKTS_NEED_FRAG,
+ 1);
}
- if (is_midchain)
+ if (mode == MPLS_OUTPUT_MIDCHAIN_MODE)
{
adj0->sub_type.midchain.fixup_func
(vm, adj0, p0,
@@ -317,7 +359,7 @@ VLIB_NODE_FN (mpls_output_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
- return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 0));
+ return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MODE));
}
VLIB_REGISTER_NODE (mpls_output_node) = {
@@ -341,7 +383,7 @@ VLIB_NODE_FN (mpls_midchain_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
- return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 1));
+ return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MIDCHAIN_MODE));
}
VLIB_REGISTER_NODE (mpls_midchain_node) = {
diff --git a/test/test_mpls.py b/test/test_mpls.py
index 7388cf46c73..5b3054801e7 100644
--- a/test/test_mpls.py
+++ b/test/test_mpls.py
@@ -379,6 +379,30 @@ class TestMPLS(VppTestCase):
except:
raise
+ def verify_capture_fragmented_labelled_ip4(self, src_if, capture, sent,
+ mpls_labels, ip_ttl=None):
+ try:
+ capture = verify_filter(capture, sent)
+
+ for i in range(len(capture)):
+ tx = sent[0]
+ rx = capture[i]
+ tx_ip = tx[IP]
+ rx_ip = rx[IP]
+
+ verify_mpls_stack(self, rx, mpls_labels)
+
+ self.assertEqual(rx_ip.src, tx_ip.src)
+ self.assertEqual(rx_ip.dst, tx_ip.dst)
+ if not ip_ttl:
+ # IP processing post pop has decremented the TTL
+ self.assertEqual(rx_ip.ttl + 1, tx_ip.ttl)
+ else:
+ self.assertEqual(rx_ip.ttl, ip_ttl)
+
+ except:
+ raise
+
def test_swap(self):
""" MPLS label swap tests """
@@ -851,6 +875,38 @@ class TestMPLS(VppTestCase):
route_10_0_0_2.remove_vpp_config()
route_10_0_0_1.remove_vpp_config()
+ def test_imposition_fragmentation(self):
+ """ MPLS label imposition fragmentation test """
+
+ #
+ # Add a ipv4 non-recursive route with a single out label
+ #
+ route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32,
+ [VppRoutePath(self.pg0.remote_ip4,
+ self.pg0.sw_if_index,
+ labels=[VppMplsLabel(32)])])
+ route_10_0_0_1.add_vpp_config()
+
+ #
+ # a stream that matches the route for 10.0.0.1
+ # PG0 is in the default table
+ #
+ tx = self.create_stream_ip4(self.pg0, "10.0.0.1")
+ for i in range(0, 257):
+ self.extend_packet(tx[i], 10000)
+
+ #
+ # 5 fragments per packet (257*5=1285)
+ #
+ rx = self.send_and_expect(self.pg0, tx, self.pg0, 1285)
+ self.verify_capture_fragmented_labelled_ip4(self.pg0, rx, tx,
+ [VppMplsLabel(32)])
+
+ #
+ # cleanup
+ #
+ route_10_0_0_1.remove_vpp_config()
+
def test_tunnel_pipe(self):
""" MPLS Tunnel Tests - Pipe """