From d6f1c9c5141c177a14d011a514e392a9357398fb Mon Sep 17 00:00:00 2001 From: Rajesh Goel Date: Sun, 6 Oct 2019 13:17:36 +0530 Subject: mpls: support fragmentation of mpls output packet Type: fix Signed-off-by: Rajesh Goel Change-Id: Ie4372c5cf58ab215cdec5ce56f8a994daaba2844 --- src/vnet/buffer.h | 4 +++ src/vnet/dpo/mpls_label_dpo.c | 18 +++++++++++ src/vnet/ip/ip_frag.c | 40 +++++++++++++++++++++++-- src/vnet/ip/ip_frag.h | 3 ++ src/vnet/mpls/error.def | 1 + src/vnet/mpls/mpls_output.c | 70 ++++++++++++++++++++++++++++++++++--------- test/test_mpls.py | 56 ++++++++++++++++++++++++++++++++++ 7 files changed, 176 insertions(+), 16 deletions(-) diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 708e399047d..d160ae8c9af 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -246,8 +246,12 @@ typedef struct u8 ttl; u8 exp; u8 first; + u8 pyld_proto:3; /* dpo_proto_t */ + u8 rsvd:5; /* Rewrite length */ u32 save_rewrite_length; + /* Save the mpls header length including all label stack */ + u8 mpls_hdr_length; /* * BIER - the number of bytes in the header. * the len field in the header is not authoritative. It's the diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index 1074a959310..9d147f98f13 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -484,6 +484,12 @@ mpls_label_imposition_inline (vlib_main_t * vm, exp2 = ip_dscp_to_mpls_exp(ip2->tos); exp3 = ip_dscp_to_mpls_exp(ip3->tos); } + + /* save the payload proto information in mpls opaque */ + vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP4; + vnet_buffer(b1)->mpls.pyld_proto = DPO_PROTO_IP4; + vnet_buffer(b2)->mpls.pyld_proto = DPO_PROTO_IP4; + vnet_buffer(b3)->mpls.pyld_proto = DPO_PROTO_IP4; } else if (DPO_PROTO_IP6 == dproto) { @@ -518,6 +524,12 @@ mpls_label_imposition_inline (vlib_main_t * vm, exp3 = ip_dscp_to_mpls_exp( ip6_traffic_class_network_order(ip3)); } + + /* save the payload proto information in mpls opaque */ + vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP6; + vnet_buffer(b1)->mpls.pyld_proto = DPO_PROTO_IP6; + vnet_buffer(b2)->mpls.pyld_proto = DPO_PROTO_IP6; + vnet_buffer(b3)->mpls.pyld_proto = DPO_PROTO_IP6; } else { @@ -787,6 +799,9 @@ mpls_label_imposition_inline (vlib_main_t * vm, ttl0 = ip0->ttl; exp0 = ip_dscp_to_mpls_exp(ip0->tos); } + + /* save the payload proto information in mpls opaque */ + vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP4; } else if (DPO_PROTO_IP6 == dproto) { @@ -805,6 +820,9 @@ mpls_label_imposition_inline (vlib_main_t * vm, exp0 = ip_dscp_to_mpls_exp( ip6_traffic_class_network_order(ip0)); } + + /* save the payload proto information in mpls opaque */ + vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP6; } else { diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index fd5bc6fa0ba..230722c45db 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -23,6 +23,25 @@ #include +/* + * Copy the mpls header if present. + * The current is pointing to the ip header. + * Adjust the buffer and point to the mpls headers on these fragments + * before sending the packet back to mpls-output node. + */ +static inline void +copy_mpls_hdr (vlib_buffer_t * to_b, vlib_buffer_t * from_b) +{ + if ((vnet_buffer (from_b)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER) + { + u8 mpls_hdr_length = vnet_buffer (from_b)->mpls.mpls_hdr_length; + u8 *org_from_mpls_packet = + from_b->data + (from_b->current_data - mpls_hdr_length); + clib_memcpy_fast ((to_b->data - mpls_hdr_length), org_from_mpls_packet, + mpls_hdr_length); + vlib_buffer_advance (to_b, -vnet_buffer (to_b)->mpls.mpls_hdr_length); + } +} typedef struct { @@ -38,8 +57,8 @@ format_ip_frag_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *); - s = format (s, "IPv%s mtu: %u fragments: %u", - t->ipv6 ? "6" : "4", t->mtu, t->n_fragments); + s = format (s, "IPv%s mtu: %u fragments: %u next: %d", + t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next); return s; } @@ -68,6 +87,14 @@ frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from) vnet_buffer2 (to)->qos = vnet_buffer2 (from)->qos; to->flags |= VNET_BUFFER_F_QOS_DATA_VALID; } + + /* Copy mpls opaque data */ + if ((vnet_buffer (from)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER) + { + vnet_buffer (to)->mpls.pyld_proto = vnet_buffer (from)->mpls.pyld_proto; + vnet_buffer (to)->mpls.mpls_hdr_length = + vnet_buffer (from)->mpls.mpls_hdr_length; + } } static vlib_buffer_t * @@ -232,6 +259,10 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, clib_host_to_net_u16 (to_b->current_length - sizeof (*encap_header6)); } + + /* Copy mpls header if present */ + copy_mpls_hdr (to_b, org_from_b); + rem -= len; fo += len; } @@ -492,6 +523,9 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, to_frag_hdr->next_hdr = ip6->protocol; to_frag_hdr->rsv = 0; + /* Copy mpls header if present */ + copy_mpls_hdr (to_b, org_from_b); + rem -= len; fo += len; } @@ -519,6 +553,7 @@ VLIB_REGISTER_NODE (ip4_frag_node) = { [IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite", [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output", [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP4_FRAG_NEXT_DROP] = "ip4-drop" }, @@ -541,6 +576,7 @@ VLIB_REGISTER_NODE (ip6_frag_node) = { [IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite", [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output", [IP6_FRAG_NEXT_DROP] = "ip6-drop" }, }; diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h index 06eeee82bc6..b66db416129 100644 --- a/src/vnet/ip/ip_frag.h +++ b/src/vnet/ip/ip_frag.h @@ -39,6 +39,7 @@ #define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header #define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header +#define IP_FRAG_FLAG_MPLS_HEADER 0x04 //Encapsulating MPLS header #define IP4_FRAG_NODE_NAME "ip4-frag" #define IP6_FRAG_NODE_NAME "ip6-frag" @@ -51,6 +52,7 @@ typedef enum IP4_FRAG_NEXT_IP4_REWRITE, IP4_FRAG_NEXT_IP4_LOOKUP, IP4_FRAG_NEXT_IP6_LOOKUP, + IP4_FRAG_NEXT_MPLS_OUTPUT, IP4_FRAG_NEXT_ICMP_ERROR, IP4_FRAG_NEXT_DROP, IP4_FRAG_N_NEXT @@ -61,6 +63,7 @@ typedef enum IP6_FRAG_NEXT_IP4_LOOKUP, IP6_FRAG_NEXT_IP6_LOOKUP, IP6_FRAG_NEXT_IP6_REWRITE, + IP6_FRAG_NEXT_MPLS_OUTPUT, IP6_FRAG_NEXT_DROP, IP6_FRAG_N_NEXT } ip6_frag_next_t; diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def index 34a46522894..9941b18baf4 100644 --- a/src/vnet/mpls/error.def +++ b/src/vnet/mpls/error.def @@ -20,6 +20,7 @@ mpls_error (UNKNOWN_PROTOCOL, "unknown protocol") mpls_error (UNSUPPORTED_VERSION, "unsupported version") mpls_error (PKTS_DECAP, "MPLS input packets decapsulated") mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated") +mpls_error (PKTS_NEED_FRAG, "MPLS output packets needs fragmentation") mpls_error (NO_LABEL, "MPLS no label for fib/dst") mpls_error (TTL_EXPIRED, "MPLS ttl expired") mpls_error (S_NOT_SET, "MPLS s-bit not set") diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index 68577e711cc..5ede22aa410 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -19,6 +19,7 @@ #include #include #include +#include typedef struct { /* Adjacency taken. */ @@ -26,8 +27,15 @@ typedef struct { u32 flow_hash; } mpls_output_trace_t; +typedef enum { + MPLS_OUTPUT_MODE, + MPLS_OUTPUT_MIDCHAIN_MODE +}mpls_output_mode_t; + #define foreach_mpls_output_next \ -_(DROP, "error-drop") +_(DROP, "error-drop") \ +_(IP4_FRAG, "ip4-frag") \ +_(IP6_FRAG, "ip6-frag") typedef enum { #define _(s,n) MPLS_OUTPUT_NEXT_##s, @@ -50,11 +58,36 @@ format_mpls_output_trace (u8 * s, va_list * args) return s; } +/* + * Save the mpls header length and adjust the current to ip header + */ +static inline u32 +set_mpls_fragmentation(vlib_buffer_t * p0, ip_adjacency_t * adj0) +{ + u32 next0; + + /* advance size of (all) mpls header to ip header before fragmenting */ + /* save the current pointing to first mpls header. */ + vnet_buffer (p0)->mpls.mpls_hdr_length = vnet_buffer(p0)->l3_hdr_offset - p0->current_data; + vlib_buffer_advance (p0, vnet_buffer (p0)->mpls.mpls_hdr_length); + + /* IP fragmentation */ + ip_frag_set_vnet_buffer (p0, adj0[0].rewrite_header.max_l3_packet_bytes, + IP4_FRAG_NEXT_MPLS_OUTPUT, + ((vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4) ? IP_FRAG_FLAG_IP4_HEADER:IP_FRAG_FLAG_IP6_HEADER)); + + /* Tell ip_frag to retain certain mpls parameters after fragmentation of mpls packet */ + vnet_buffer (p0)->ip_frag.flags = (vnet_buffer (p0)->ip_frag.flags | IP_FRAG_FLAG_MPLS_HEADER); + next0 = (vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4)? MPLS_OUTPUT_NEXT_IP4_FRAG:MPLS_OUTPUT_NEXT_IP6_FRAG; + + return next0; +} + static inline uword mpls_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, - int is_midchain) + mpls_output_mode_t mode) { u32 n_left_from, next_index, * from, * to_next, thread_index; vlib_node_runtime_t * error_node; @@ -162,8 +195,11 @@ mpls_output_inline (vlib_main_t * vm, } else { - error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = MPLS_OUTPUT_NEXT_DROP; + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = set_mpls_fragmentation (p0, adj0); + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_NEED_FRAG, + 1); } if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <= adj1[0].rewrite_header.max_l3_packet_bytes)) @@ -182,10 +218,13 @@ mpls_output_inline (vlib_main_t * vm, } else { - error1 = IP4_ERROR_MTU_EXCEEDED; - next1 = MPLS_OUTPUT_NEXT_DROP; + error1 = IP4_ERROR_MTU_EXCEEDED; + next1 = set_mpls_fragmentation (p1, adj1); + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_NEED_FRAG, + 1); } - if (is_midchain) + if (mode == MPLS_OUTPUT_MIDCHAIN_MODE) { adj0->sub_type.midchain.fixup_func (vm, adj0, p0, @@ -221,7 +260,7 @@ mpls_output_inline (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next > 0) { ip_adjacency_t * adj0; - mpls_unicast_header_t *hdr0; + mpls_unicast_header_t *hdr0; vlib_buffer_t * p0; u32 pi0, adj_index0, next0, error0; word rw_len0; @@ -233,7 +272,7 @@ mpls_output_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj0 = adj_get(adj_index0); - hdr0 = vlib_buffer_get_current (p0); + hdr0 = vlib_buffer_get_current (p0); /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], hdr0, @@ -268,10 +307,13 @@ mpls_output_inline (vlib_main_t * vm, } else { - error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = MPLS_OUTPUT_NEXT_DROP; + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = set_mpls_fragmentation (p0, adj0); + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_NEED_FRAG, + 1); } - if (is_midchain) + if (mode == MPLS_OUTPUT_MIDCHAIN_MODE) { adj0->sub_type.midchain.fixup_func (vm, adj0, p0, @@ -317,7 +359,7 @@ VLIB_NODE_FN (mpls_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 0)); + return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MODE)); } VLIB_REGISTER_NODE (mpls_output_node) = { @@ -341,7 +383,7 @@ VLIB_NODE_FN (mpls_midchain_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 1)); + return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MIDCHAIN_MODE)); } VLIB_REGISTER_NODE (mpls_midchain_node) = { diff --git a/test/test_mpls.py b/test/test_mpls.py index 7388cf46c73..5b3054801e7 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -379,6 +379,30 @@ class TestMPLS(VppTestCase): except: raise + def verify_capture_fragmented_labelled_ip4(self, src_if, capture, sent, + mpls_labels, ip_ttl=None): + try: + capture = verify_filter(capture, sent) + + for i in range(len(capture)): + tx = sent[0] + rx = capture[i] + tx_ip = tx[IP] + rx_ip = rx[IP] + + verify_mpls_stack(self, rx, mpls_labels) + + self.assertEqual(rx_ip.src, tx_ip.src) + self.assertEqual(rx_ip.dst, tx_ip.dst) + if not ip_ttl: + # IP processing post pop has decremented the TTL + self.assertEqual(rx_ip.ttl + 1, tx_ip.ttl) + else: + self.assertEqual(rx_ip.ttl, ip_ttl) + + except: + raise + def test_swap(self): """ MPLS label swap tests """ @@ -851,6 +875,38 @@ class TestMPLS(VppTestCase): route_10_0_0_2.remove_vpp_config() route_10_0_0_1.remove_vpp_config() + def test_imposition_fragmentation(self): + """ MPLS label imposition fragmentation test """ + + # + # Add a ipv4 non-recursive route with a single out label + # + route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32, + [VppRoutePath(self.pg0.remote_ip4, + self.pg0.sw_if_index, + labels=[VppMplsLabel(32)])]) + route_10_0_0_1.add_vpp_config() + + # + # a stream that matches the route for 10.0.0.1 + # PG0 is in the default table + # + tx = self.create_stream_ip4(self.pg0, "10.0.0.1") + for i in range(0, 257): + self.extend_packet(tx[i], 10000) + + # + # 5 fragments per packet (257*5=1285) + # + rx = self.send_and_expect(self.pg0, tx, self.pg0, 1285) + self.verify_capture_fragmented_labelled_ip4(self.pg0, rx, tx, + [VppMplsLabel(32)]) + + # + # cleanup + # + route_10_0_0_1.remove_vpp_config() + def test_tunnel_pipe(self): """ MPLS Tunnel Tests - Pipe """ -- cgit 1.2.3-korg