diff options
author | Rajesh Goel <rajegoel@cisco.com> | 2019-10-06 13:17:36 +0530 |
---|---|---|
committer | Ole Trøan <otroan@employees.org> | 2019-10-09 09:20:51 +0000 |
commit | d6f1c9c5141c177a14d011a514e392a9357398fb (patch) | |
tree | c4a08dd88f69035ae659301980a97fe83923f85a /src | |
parent | ab96454e3aba8e60094e773a39e9d28f50d7192e (diff) |
mpls: support fragmentation of mpls output packet
Type: fix
Signed-off-by: Rajesh Goel <rajegoel@cisco.com>
Change-Id: Ie4372c5cf58ab215cdec5ce56f8a994daaba2844
Diffstat (limited to 'src')
-rw-r--r-- | src/vnet/buffer.h | 4 | ||||
-rw-r--r-- | src/vnet/dpo/mpls_label_dpo.c | 18 | ||||
-rw-r--r-- | src/vnet/ip/ip_frag.c | 40 | ||||
-rw-r--r-- | src/vnet/ip/ip_frag.h | 3 | ||||
-rw-r--r-- | src/vnet/mpls/error.def | 1 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_output.c | 70 |
6 files changed, 120 insertions, 16 deletions
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 708e399047d..d160ae8c9af 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -246,8 +246,12 @@ typedef struct u8 ttl; u8 exp; u8 first; + u8 pyld_proto:3; /* dpo_proto_t */ + u8 rsvd:5; /* Rewrite length */ u32 save_rewrite_length; + /* Save the mpls header length including all label stack */ + u8 mpls_hdr_length; /* * BIER - the number of bytes in the header. * the len field in the header is not authoritative. It's the diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index 1074a959310..9d147f98f13 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -484,6 +484,12 @@ mpls_label_imposition_inline (vlib_main_t * vm, exp2 = ip_dscp_to_mpls_exp(ip2->tos); exp3 = ip_dscp_to_mpls_exp(ip3->tos); } + + /* save the payload proto information in mpls opaque */ + vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP4; + vnet_buffer(b1)->mpls.pyld_proto = DPO_PROTO_IP4; + vnet_buffer(b2)->mpls.pyld_proto = DPO_PROTO_IP4; + vnet_buffer(b3)->mpls.pyld_proto = DPO_PROTO_IP4; } else if (DPO_PROTO_IP6 == dproto) { @@ -518,6 +524,12 @@ mpls_label_imposition_inline (vlib_main_t * vm, exp3 = ip_dscp_to_mpls_exp( ip6_traffic_class_network_order(ip3)); } + + /* save the payload proto information in mpls opaque */ + vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP6; + vnet_buffer(b1)->mpls.pyld_proto = DPO_PROTO_IP6; + vnet_buffer(b2)->mpls.pyld_proto = DPO_PROTO_IP6; + vnet_buffer(b3)->mpls.pyld_proto = DPO_PROTO_IP6; } else { @@ -787,6 +799,9 @@ mpls_label_imposition_inline (vlib_main_t * vm, ttl0 = ip0->ttl; exp0 = ip_dscp_to_mpls_exp(ip0->tos); } + + /* save the payload proto information in mpls opaque */ + vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP4; } else if (DPO_PROTO_IP6 == dproto) { @@ -805,6 +820,9 @@ mpls_label_imposition_inline (vlib_main_t * vm, exp0 = ip_dscp_to_mpls_exp( ip6_traffic_class_network_order(ip0)); } + + /* save the payload proto information in mpls opaque */ + vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP6; } else { diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index fd5bc6fa0ba..230722c45db 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -23,6 +23,25 @@ #include <vnet/ip/ip.h> +/* + * Copy the mpls header if present. + * The current is pointing to the ip header. + * Adjust the buffer and point to the mpls headers on these fragments + * before sending the packet back to mpls-output node. + */ +static inline void +copy_mpls_hdr (vlib_buffer_t * to_b, vlib_buffer_t * from_b) +{ + if ((vnet_buffer (from_b)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER) + { + u8 mpls_hdr_length = vnet_buffer (from_b)->mpls.mpls_hdr_length; + u8 *org_from_mpls_packet = + from_b->data + (from_b->current_data - mpls_hdr_length); + clib_memcpy_fast ((to_b->data - mpls_hdr_length), org_from_mpls_packet, + mpls_hdr_length); + vlib_buffer_advance (to_b, -vnet_buffer (to_b)->mpls.mpls_hdr_length); + } +} typedef struct { @@ -38,8 +57,8 @@ format_ip_frag_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *); - s = format (s, "IPv%s mtu: %u fragments: %u", - t->ipv6 ? "6" : "4", t->mtu, t->n_fragments); + s = format (s, "IPv%s mtu: %u fragments: %u next: %d", + t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next); return s; } @@ -68,6 +87,14 @@ frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from) vnet_buffer2 (to)->qos = vnet_buffer2 (from)->qos; to->flags |= VNET_BUFFER_F_QOS_DATA_VALID; } + + /* Copy mpls opaque data */ + if ((vnet_buffer (from)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER) + { + vnet_buffer (to)->mpls.pyld_proto = vnet_buffer (from)->mpls.pyld_proto; + vnet_buffer (to)->mpls.mpls_hdr_length = + vnet_buffer (from)->mpls.mpls_hdr_length; + } } static vlib_buffer_t * @@ -232,6 +259,10 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, clib_host_to_net_u16 (to_b->current_length - sizeof (*encap_header6)); } + + /* Copy mpls header if present */ + copy_mpls_hdr (to_b, org_from_b); + rem -= len; fo += len; } @@ -492,6 +523,9 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, to_frag_hdr->next_hdr = ip6->protocol; to_frag_hdr->rsv = 0; + /* Copy mpls header if present */ + copy_mpls_hdr (to_b, org_from_b); + rem -= len; fo += len; } @@ -519,6 +553,7 @@ VLIB_REGISTER_NODE (ip4_frag_node) = { [IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite", [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output", [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP4_FRAG_NEXT_DROP] = "ip4-drop" }, @@ -541,6 +576,7 @@ VLIB_REGISTER_NODE (ip6_frag_node) = { [IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite", [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output", [IP6_FRAG_NEXT_DROP] = "ip6-drop" }, }; diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h index 06eeee82bc6..b66db416129 100644 --- a/src/vnet/ip/ip_frag.h +++ b/src/vnet/ip/ip_frag.h @@ -39,6 +39,7 @@ #define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header #define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header +#define IP_FRAG_FLAG_MPLS_HEADER 0x04 //Encapsulating MPLS header #define IP4_FRAG_NODE_NAME "ip4-frag" #define IP6_FRAG_NODE_NAME "ip6-frag" @@ -51,6 +52,7 @@ typedef enum IP4_FRAG_NEXT_IP4_REWRITE, IP4_FRAG_NEXT_IP4_LOOKUP, IP4_FRAG_NEXT_IP6_LOOKUP, + IP4_FRAG_NEXT_MPLS_OUTPUT, IP4_FRAG_NEXT_ICMP_ERROR, IP4_FRAG_NEXT_DROP, IP4_FRAG_N_NEXT @@ -61,6 +63,7 @@ typedef enum IP6_FRAG_NEXT_IP4_LOOKUP, IP6_FRAG_NEXT_IP6_LOOKUP, IP6_FRAG_NEXT_IP6_REWRITE, + IP6_FRAG_NEXT_MPLS_OUTPUT, IP6_FRAG_NEXT_DROP, IP6_FRAG_N_NEXT } ip6_frag_next_t; diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def index 34a46522894..9941b18baf4 100644 --- a/src/vnet/mpls/error.def +++ b/src/vnet/mpls/error.def @@ -20,6 +20,7 @@ mpls_error (UNKNOWN_PROTOCOL, "unknown protocol") mpls_error (UNSUPPORTED_VERSION, "unsupported version") mpls_error (PKTS_DECAP, "MPLS input packets decapsulated") mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated") +mpls_error (PKTS_NEED_FRAG, "MPLS output packets needs fragmentation") mpls_error (NO_LABEL, "MPLS no label for fib/dst") mpls_error (TTL_EXPIRED, "MPLS ttl expired") mpls_error (S_NOT_SET, "MPLS s-bit not set") diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index 68577e711cc..5ede22aa410 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -19,6 +19,7 @@ #include <vnet/pg/pg.h> #include <vnet/ip/ip.h> #include <vnet/mpls/mpls.h> +#include <vnet/ip/ip_frag.h> typedef struct { /* Adjacency taken. */ @@ -26,8 +27,15 @@ typedef struct { u32 flow_hash; } mpls_output_trace_t; +typedef enum { + MPLS_OUTPUT_MODE, + MPLS_OUTPUT_MIDCHAIN_MODE +}mpls_output_mode_t; + #define foreach_mpls_output_next \ -_(DROP, "error-drop") +_(DROP, "error-drop") \ +_(IP4_FRAG, "ip4-frag") \ +_(IP6_FRAG, "ip6-frag") typedef enum { #define _(s,n) MPLS_OUTPUT_NEXT_##s, @@ -50,11 +58,36 @@ format_mpls_output_trace (u8 * s, va_list * args) return s; } +/* + * Save the mpls header length and adjust the current to ip header + */ +static inline u32 +set_mpls_fragmentation(vlib_buffer_t * p0, ip_adjacency_t * adj0) +{ + u32 next0; + + /* advance size of (all) mpls header to ip header before fragmenting */ + /* save the current pointing to first mpls header. */ + vnet_buffer (p0)->mpls.mpls_hdr_length = vnet_buffer(p0)->l3_hdr_offset - p0->current_data; + vlib_buffer_advance (p0, vnet_buffer (p0)->mpls.mpls_hdr_length); + + /* IP fragmentation */ + ip_frag_set_vnet_buffer (p0, adj0[0].rewrite_header.max_l3_packet_bytes, + IP4_FRAG_NEXT_MPLS_OUTPUT, + ((vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4) ? IP_FRAG_FLAG_IP4_HEADER:IP_FRAG_FLAG_IP6_HEADER)); + + /* Tell ip_frag to retain certain mpls parameters after fragmentation of mpls packet */ + vnet_buffer (p0)->ip_frag.flags = (vnet_buffer (p0)->ip_frag.flags | IP_FRAG_FLAG_MPLS_HEADER); + next0 = (vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4)? MPLS_OUTPUT_NEXT_IP4_FRAG:MPLS_OUTPUT_NEXT_IP6_FRAG; + + return next0; +} + static inline uword mpls_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, - int is_midchain) + mpls_output_mode_t mode) { u32 n_left_from, next_index, * from, * to_next, thread_index; vlib_node_runtime_t * error_node; @@ -162,8 +195,11 @@ mpls_output_inline (vlib_main_t * vm, } else { - error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = MPLS_OUTPUT_NEXT_DROP; + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = set_mpls_fragmentation (p0, adj0); + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_NEED_FRAG, + 1); } if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <= adj1[0].rewrite_header.max_l3_packet_bytes)) @@ -182,10 +218,13 @@ mpls_output_inline (vlib_main_t * vm, } else { - error1 = IP4_ERROR_MTU_EXCEEDED; - next1 = MPLS_OUTPUT_NEXT_DROP; + error1 = IP4_ERROR_MTU_EXCEEDED; + next1 = set_mpls_fragmentation (p1, adj1); + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_NEED_FRAG, + 1); } - if (is_midchain) + if (mode == MPLS_OUTPUT_MIDCHAIN_MODE) { adj0->sub_type.midchain.fixup_func (vm, adj0, p0, @@ -221,7 +260,7 @@ mpls_output_inline (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next > 0) { ip_adjacency_t * adj0; - mpls_unicast_header_t *hdr0; + mpls_unicast_header_t *hdr0; vlib_buffer_t * p0; u32 pi0, adj_index0, next0, error0; word rw_len0; @@ -233,7 +272,7 @@ mpls_output_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj0 = adj_get(adj_index0); - hdr0 = vlib_buffer_get_current (p0); + hdr0 = vlib_buffer_get_current (p0); /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], hdr0, @@ -268,10 +307,13 @@ mpls_output_inline (vlib_main_t * vm, } else { - error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = MPLS_OUTPUT_NEXT_DROP; + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = set_mpls_fragmentation (p0, adj0); + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_NEED_FRAG, + 1); } - if (is_midchain) + if (mode == MPLS_OUTPUT_MIDCHAIN_MODE) { adj0->sub_type.midchain.fixup_func (vm, adj0, p0, @@ -317,7 +359,7 @@ VLIB_NODE_FN (mpls_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 0)); + return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MODE)); } VLIB_REGISTER_NODE (mpls_output_node) = { @@ -341,7 +383,7 @@ VLIB_NODE_FN (mpls_midchain_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 1)); + return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MIDCHAIN_MODE)); } VLIB_REGISTER_NODE (mpls_midchain_node) = { |