From f126e746fc01c75bc99329d10ce9127b26b23814 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Thu, 10 Oct 2019 09:46:06 +0000 Subject: nat: use SVR Remove NAT's implementation of shallow virtual reassembly with corresponding CLIs, APIs & tests. Replace with standalone shallow virtual reassembly provided by ipX-sv-reass* nodes. Type: refactor Change-Id: I7e6c7487a5a500d591f6871474a359e0993e59b6 Signed-off-by: Klement Sekera --- src/vnet/ip/ip4_packet.h | 4 +- src/vnet/ip/ip4_to_ip6.h | 225 +-------------------------- src/vnet/ip/ip6_packet.h | 61 ++++++++ src/vnet/ip/ip6_to_ip4.h | 325 ++++++++++----------------------------- src/vnet/ip/reass/ip4_sv_reass.c | 208 ++++++++++++++++++++++--- src/vnet/ip/reass/ip4_sv_reass.h | 2 + src/vnet/ip/reass/ip6_sv_reass.c | 100 +++++++++--- 7 files changed, 413 insertions(+), 512 deletions(-) (limited to 'src/vnet/ip') diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index 79cf22c4d70..1d3607ea34c 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -196,13 +196,13 @@ typedef union #define IP4_ROUTER_ALERT_OPTION 20 -always_inline int +always_inline u16 ip4_get_fragment_offset (const ip4_header_t * i) { return clib_net_to_host_u16 (i->flags_and_fragment_offset) & 0x1fff; } -always_inline int +always_inline u16 ip4_get_fragment_more (const ip4_header_t * i) { return clib_net_to_host_u16 (i->flags_and_fragment_offset) & diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h index b1905e4154b..21538a90e44 100644 --- a/src/vnet/ip/ip4_to_ip6.h +++ b/src/vnet/ip/ip4_to_ip6.h @@ -25,8 +25,8 @@ /** * IPv4 to IPv6 set call back function type */ -typedef int (*ip4_to_ip6_set_fn_t) (ip4_header_t * ip4, ip6_header_t * ip6, - void *ctx); +typedef int (*ip4_to_ip6_set_fn_t) (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *ctx); /* *INDENT-OFF* */ static u8 icmp_to_icmp6_updater_pointer_table[] = @@ -261,7 +261,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, sizeof (*inner_frag)); ip6 = vlib_buffer_get_current (p); memmove (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, - 20 + 8); + 20 + 8); ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); icmp = (icmp46_header_t *) (ip4 + 1); @@ -287,7 +287,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, vlib_buffer_advance (p, -2 * (sizeof (*ip6) - sizeof (*ip4))); ip6 = vlib_buffer_get_current (p); memmove (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, - 20 + 8); + 20 + 8); ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); icmp = (icmp46_header_t *) u8_ptr_add (ip4, sizeof (*ip4)); @@ -340,7 +340,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, inner_ip6->hop_limit = inner_ip4->ttl; inner_ip6->protocol = inner_ip4->protocol; - if ((rv = inner_fn (inner_ip4, inner_ip6, inner_ctx)) != 0) + if ((rv = inner_fn (p, inner_ip4, inner_ip6, inner_ctx)) != 0) return rv; if (PREDICT_FALSE (inner_frag != NULL)) @@ -411,7 +411,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, ip6->hop_limit = ip4->ttl; ip6->protocol = IP_PROTOCOL_ICMP6; - if ((rv = fn (ip4, ip6, ctx)) != 0) + if ((rv = fn (p, ip4, ip6, ctx)) != 0) return rv; //Truncate when the packet exceeds the minimal IPv6 MTU @@ -437,217 +437,4 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, return 0; } -/** - * @brief Translate IPv4 fragmented packet to IPv6. - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip4_to_ip6_fragmented (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) -{ - ip4_header_t *ip4; - ip6_header_t *ip6; - ip6_frag_hdr_t *frag; - int rv; - - ip4 = vlib_buffer_get_current (p); - frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); - ip6 = - (ip6_header_t *) u8_ptr_add (ip4, - sizeof (*ip4) - sizeof (*frag) - - sizeof (*ip6)); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); - - //We know that the protocol was one of ICMP, TCP or UDP - //because the first fragment was found and cached - frag->next_hdr = - (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol; - frag->identification = frag_id_4to6 (ip4->fragment_id); - frag->rsv = 0; - frag->fragment_offset_and_more = - ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4), - clib_net_to_host_u16 - (ip4->flags_and_fragment_offset) & - IP4_HEADER_FLAG_MORE_FRAGMENTS); - - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); - ip6->payload_length = - clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - - sizeof (*ip4) + sizeof (*frag)); - ip6->hop_limit = ip4->ttl; - ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - - if ((rv = fn (ip4, ip6, ctx)) != 0) - return rv; - - return 0; -} - -/** - * @brief Translate IPv4 UDP/TCP packet to IPv6. - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) -{ - ip4_header_t *ip4; - ip6_header_t *ip6; - ip_csum_t csum; - u16 *checksum; - ip6_frag_hdr_t *frag; - u32 frag_id; - int rv; - ip4_address_t old_src, old_dst; - - ip4 = vlib_buffer_get_current (p); - - if (ip4->protocol == IP_PROTOCOL_UDP) - { - udp_header_t *udp = ip4_next_header (ip4); - checksum = &udp->checksum; - - //UDP checksum is optional over IPv4 but mandatory for IPv6 - //We do not check udp->length sanity but use our safe computed value instead - if (PREDICT_FALSE (!*checksum)) - { - u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4); - csum = ip_incremental_checksum (0, udp, udp_len); - csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); - csum = - ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP)); - csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address))); - *checksum = ~ip_csum_fold (csum); - } - } - else - { - tcp_header_t *tcp = ip4_next_header (ip4); - checksum = &tcp->checksum; - } - - old_src.as_u32 = ip4->src_address.as_u32; - old_dst.as_u32 = ip4->dst_address.as_u32; - - // Deal with fragmented packets - if (PREDICT_FALSE (ip4->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))) - { - ip6 = - (ip6_header_t *) u8_ptr_add (ip4, - sizeof (*ip4) - sizeof (*ip6) - - sizeof (*frag)); - frag = - (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); - frag_id = frag_id_4to6 (ip4->fragment_id); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); - } - else - { - ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); - frag = NULL; - } - - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); - ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); - ip6->hop_limit = ip4->ttl; - ip6->protocol = ip4->protocol; - - if (PREDICT_FALSE (frag != NULL)) - { - frag->next_hdr = ip6->protocol; - frag->identification = frag_id; - frag->rsv = 0; - frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1); - ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); - } - - if ((rv = fn (ip4, ip6, ctx)) != 0) - return rv; - - csum = ip_csum_sub_even (*checksum, old_src.as_u32); - csum = ip_csum_sub_even (csum, old_dst.as_u32); - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); - *checksum = ip_csum_fold (csum); - - return 0; -} - -/** - * @brief Translate IPv4 packet to IPv6 (IP header only). - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip4_to_ip6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) -{ - ip4_header_t *ip4; - ip6_header_t *ip6; - ip6_frag_hdr_t *frag; - u32 frag_id; - int rv; - - ip4 = vlib_buffer_get_current (p); - - // Deal with fragmented packets - if (PREDICT_FALSE (ip4->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))) - { - ip6 = - (ip6_header_t *) u8_ptr_add (ip4, - sizeof (*ip4) - sizeof (*ip6) - - sizeof (*frag)); - frag = - (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); - frag_id = frag_id_4to6 (ip4->fragment_id); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); - } - else - { - ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); - frag = NULL; - } - - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); - ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); - ip6->hop_limit = ip4->ttl; - ip6->protocol = ip4->protocol; - - if (PREDICT_FALSE (frag != NULL)) - { - frag->next_hdr = ip6->protocol; - frag->identification = frag_id; - frag->rsv = 0; - frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1); - ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); - } - - if ((rv = fn (ip4, ip6, ctx)) != 0) - return rv; - - return 0; -} - #endif /* __included_ip4_to_ip6_h__ */ diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 2ba55b75e09..e823214dac9 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -579,6 +579,67 @@ ip6_ext_header_find (vlib_main_t * vm, vlib_buffer_t * b, return result; } +/* + * walk extension headers, looking for a specific extension header and last + * extension header, calculating length of all extension headers + * + * @param vm + * @param b buffer to limit search to + * @param ip6_header ipv6 header + * @param find_hdr extension header to look for (ignored if ext_hdr is NULL) + * @param length[out] length of all extension headers + * @param ext_hdr[out] extension header of type find_hdr (may be NULL) + * @param last_ext_hdr[out] last extension header (may be NULL) + * + * @return 0 on success, -1 on failure (ext headers crossing buffer boundary) + */ +always_inline int +ip6_walk_ext_hdr (vlib_main_t * vm, vlib_buffer_t * b, + const ip6_header_t * ip6_header, u8 find_hdr, u32 * length, + ip6_ext_header_t ** ext_hdr, + ip6_ext_header_t ** last_ext_hdr) +{ + if (!ip6_ext_hdr (ip6_header->protocol)) + { + *length = 0; + *ext_hdr = NULL; + *last_ext_hdr = NULL; + return 0; + } + *length = 0; + ip6_ext_header_t *h = (void *) (ip6_header + 1); + if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h))) + { + return -1; + } + *length += ip6_ext_header_len (h); + *last_ext_hdr = h; + *ext_hdr = NULL; + if (ip6_header->protocol == find_hdr) + { + *ext_hdr = h; + } + while (ip6_ext_hdr (h->next_hdr)) + { + if (h->next_hdr == find_hdr) + { + h = ip6_ext_next_header (h); + *ext_hdr = h; + } + else + { + h = ip6_ext_next_header (h); + } + if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h))) + { + return -1; + } + *length += ip6_ext_header_len (h); + *last_ext_hdr = h; + } + return 0; +} + /* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u8 next_hdr; diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h index d13a0c1f104..17a11e6df0a 100644 --- a/src/vnet/ip/ip6_to_ip4.h +++ b/src/vnet/ip/ip6_to_ip4.h @@ -24,8 +24,12 @@ /** * IPv6 to IPv4 set call back function type */ -typedef int (*ip6_to_ip4_set_fn_t) (ip6_header_t * ip6, ip4_header_t * ip4, - void *ctx); +typedef int (*ip6_to_ip4_icmp_set_fn_t) (ip6_header_t * ip6, + ip4_header_t * ip4, void *ctx); + +typedef int (*ip6_to_ip4_tcp_udp_set_fn_t) (vlib_buffer_t * b, + ip6_header_t * ip6, + ip4_header_t * ip4, void *ctx); /* *INDENT-OFF* */ static u8 icmp6_to_icmp_updater_pointer_table[] = @@ -47,6 +51,8 @@ static u8 icmp6_to_icmp_updater_pointer_table[] = /** * @brief Parse some useful information from IPv6 header. * + * @param vm vlib main + * @param b vlib buffer * @param ip6 IPv6 header. * @param buff_len Buffer length. * @param l4_protocol L4 protocol number. @@ -56,21 +62,37 @@ static u8 icmp6_to_icmp_updater_pointer_table[] = * @returns 0 on success, non-zero value otherwise. */ static_always_inline int -ip6_parse (const ip6_header_t * ip6, u32 buff_len, - u8 * l4_protocol, u16 * l4_offset, u16 * frag_hdr_offset) +ip6_parse (vlib_main_t * vm, vlib_buffer_t * b, const ip6_header_t * ip6, + u32 buff_len, u8 * l4_protocol, u16 * l4_offset, + u16 * frag_hdr_offset) { - if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + ip6_ext_header_t *last_hdr, *frag_hdr; + u32 length; + if (ip6_walk_ext_hdr + (vm, b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &length, &frag_hdr, + &last_hdr)) { - *l4_protocol = ((ip6_frag_hdr_t *) (ip6 + 1))->next_hdr; - *frag_hdr_offset = sizeof (*ip6); - *l4_offset = sizeof (*ip6) + sizeof (ip6_frag_hdr_t); + return -1; + } + + if (length > 0) + { + if (frag_hdr) + { + *frag_hdr_offset = (u8 *) frag_hdr - (u8 *) ip6; + } + else + { + *frag_hdr_offset = 0; + } + *l4_protocol = last_hdr->next_hdr; } else { - *l4_protocol = ip6->protocol; *frag_hdr_offset = 0; - *l4_offset = sizeof (*ip6); + *l4_protocol = ip6->protocol; } + *l4_offset = sizeof (*ip6) + length; return (buff_len < (*l4_offset + 4)) || (clib_net_to_host_u16 (ip6->payload_length) < @@ -78,23 +100,32 @@ ip6_parse (const ip6_header_t * ip6, u32 buff_len, } /** - * @brief Get TCP/UDP port number or ICMP id from IPv6 packet. + * @brief Get L4 information like port number or ICMP id from IPv6 packet. * * @param ip6 IPv6 header. - * @param sender 1 get sender port, 0 get receiver port. * @param buffer_len Buffer length. + * @param ip_protocol L4 protocol + * @param src_port L4 src port or icmp id + * @param dst_post L4 dst port or icmp id + * @param icmp_type_or_tcp_flags ICMP type or TCP flags, if applicable + * @param tcp_ack_number TCP ack number, if applicable + * @param tcp_seq_number TCP seq number, if applicable * - * @returns Port number on success, 0 otherwise. + * @returns 1 on success, 0 otherwise. */ always_inline u16 -ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len) +ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6, + u16 buffer_len, u8 * ip_protocol, u16 * src_port, + u16 * dst_port, u8 * icmp_type_or_tcp_flags, + u32 * tcp_ack_number, u32 * tcp_seq_number) { u8 l4_protocol; u16 l4_offset; u16 frag_offset; u8 *l4; - if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) + if (ip6_parse + (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) return 0; if (frag_offset && @@ -102,26 +133,45 @@ ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len) u8_ptr_add (ip6, frag_offset)))) return 0; //Can't deal with non-first fragment for now + if (ip_protocol) + { + *ip_protocol = l4_protocol; + } l4 = u8_ptr_add (ip6, l4_offset); if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP) { - return (sender) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t - *) - (l4))->dst_port; + if (src_port) + *src_port = ((udp_header_t *) (l4))->src_port; + if (dst_port) + *dst_port = ((udp_header_t *) (l4))->dst_port; + if (icmp_type_or_tcp_flags && l4_protocol == IP_PROTOCOL_TCP) + *icmp_type_or_tcp_flags = ((tcp_header_t *) (l4))->flags; + if (tcp_ack_number && l4_protocol == IP_PROTOCOL_TCP) + *tcp_ack_number = ((tcp_header_t *) (l4))->ack_number; + if (tcp_seq_number && l4_protocol == IP_PROTOCOL_TCP) + *tcp_seq_number = ((tcp_header_t *) (l4))->seq_number; } else if (l4_protocol == IP_PROTOCOL_ICMP6) { icmp46_header_t *icmp = (icmp46_header_t *) (l4); + if (icmp_type_or_tcp_flags) + *icmp_type_or_tcp_flags = ((icmp46_header_t *) (l4))->type; if (icmp->type == ICMP6_echo_request) { - return (sender) ? ((u16 *) (icmp))[2] : -1; + if (src_port) + *src_port = ((u16 *) (icmp))[2]; + if (dst_port) + *dst_port = ((u16 *) (icmp))[2]; } else if (icmp->type == ICMP6_echo_reply) { - return (sender) ? -1 : ((u16 *) (icmp))[2]; + if (src_port) + *src_port = ((u16 *) (icmp))[2]; + if (dst_port) + *dst_port = ((u16 *) (icmp))[2]; } } - return 0; + return 1; } /** @@ -223,14 +273,14 @@ icmp6_to_icmp_header (icmp46_header_t * icmp, ip6_header_t ** inner_ip6) /** * @brief Translate TOS value from IPv6 to IPv4. * - * @param ip6 IPv6 header. + * @param ip_version_traffic_class_and_flow_label in network byte order * * @returns IPv4 TOS value. */ static_always_inline u8 -ip6_translate_tos (const ip6_header_t * ip6) +ip6_translate_tos (u32 ip_version_traffic_class_and_flow_label) { - return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label) + return (clib_net_to_host_u32 (ip_version_traffic_class_and_flow_label) & 0x0ff00000) >> 20; } @@ -246,8 +296,9 @@ ip6_translate_tos (const ip6_header_t * ip6) * @returns 0 on success, non-zero value otherwise. */ always_inline int -icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, - ip6_to_ip4_set_fn_t inner_fn, void *inner_ctx) +icmp6_to_icmp (vlib_main_t * vm, vlib_buffer_t * p, + ip6_to_ip4_icmp_set_fn_t fn, void *ctx, + ip6_to_ip4_icmp_set_fn_t inner_fn, void *inner_ctx) { ip6_header_t *ip6, *inner_ip6; ip4_header_t *ip4, *inner_ip4; @@ -285,7 +336,7 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, // TO // [ IPv4][IC][ IPv4][L4 header ... - if (ip6_parse (inner_ip6, ip6_pay_len - 8, + if (ip6_parse (vm, p, inner_ip6, ip6_pay_len - 8, &inner_protocol, &inner_l4_offset, &inner_frag_offset)) return -1; @@ -336,7 +387,9 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, inner_ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - inner_ip4->tos = ip6_translate_tos (inner_ip6); + inner_ip4->tos = + ip6_translate_tos + (inner_ip6->ip_version_traffic_class_and_flow_label); inner_ip4->length = u16_net_add (inner_ip6->payload_length, sizeof (*ip4) + sizeof (*ip6) - inner_l4_offset); @@ -389,7 +442,7 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); + ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label); ip4->fragment_id = 0; ip4->flags_and_fragment_offset = 0; ip4->ttl = ip6->hop_limit; @@ -412,220 +465,6 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, return 0; } -/** - * @brief Translate IPv6 fragmented packet to IPv4. - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip6_to_ip4_fragmented (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx) -{ - ip6_header_t *ip6; - ip6_frag_hdr_t *frag; - ip4_header_t *ip4; - u16 frag_id; - u8 frag_more; - u16 frag_offset; - u8 l4_protocol; - u16 l4_offset; - int rv; - - ip6 = vlib_buffer_get_current (p); - - if (ip6_parse - (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) - return -1; - - frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); - ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); - vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); - - frag_id = frag_id_6to4 (frag->identification); - frag_more = ip6_frag_hdr_more (frag); - frag_offset = ip6_frag_hdr_offset (frag); - - if ((rv = fn (ip6, ip4, ctx)) != 0) - return rv; - - ip4->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); - ip4->length = u16_net_add (ip6->payload_length, - sizeof (*ip4) - l4_offset + sizeof (*ip6)); - ip4->fragment_id = frag_id; - ip4->flags_and_fragment_offset = - clib_host_to_net_u16 (frag_offset | - (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); - ip4->ttl = ip6->hop_limit; - ip4->protocol = - (l4_protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : l4_protocol; - ip4->checksum = ip4_header_checksum (ip4); - - return 0; -} - -/** - * @brief Translate IPv6 UDP/TCP packet to IPv4. - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip6_to_ip4_tcp_udp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, - u8 udp_checksum) -{ - ip6_header_t *ip6; - u16 *checksum; - ip_csum_t csum = 0; - ip4_header_t *ip4; - u16 fragment_id; - u16 flags; - u16 frag_offset; - u8 l4_protocol; - u16 l4_offset; - int rv; - ip6_address_t old_src, old_dst; - - ip6 = vlib_buffer_get_current (p); - - if (ip6_parse - (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) - return -1; - - if (l4_protocol == IP_PROTOCOL_TCP) - { - tcp_header_t *tcp = ip6_next_header (ip6); - checksum = &tcp->checksum; - } - else - { - udp_header_t *udp = ip6_next_header (ip6); - checksum = &udp->checksum; - } - - old_src.as_u64[0] = ip6->src_address.as_u64[0]; - old_src.as_u64[1] = ip6->src_address.as_u64[1]; - old_dst.as_u64[0] = ip6->dst_address.as_u64[0]; - old_dst.as_u64[1] = ip6->dst_address.as_u64[1]; - - ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); - - vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); - - if (PREDICT_FALSE (frag_offset)) - { - //Only the first fragment - ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); - fragment_id = frag_id_6to4 (hdr->identification); - flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); - } - else - { - fragment_id = 0; - flags = 0; - } - - if ((rv = fn (ip6, ip4, ctx)) != 0) - return rv; - - ip4->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); - ip4->length = u16_net_add (ip6->payload_length, - sizeof (*ip4) + sizeof (*ip6) - l4_offset); - ip4->fragment_id = fragment_id; - ip4->flags_and_fragment_offset = flags; - ip4->ttl = ip6->hop_limit; - ip4->protocol = l4_protocol; - ip4->checksum = ip4_header_checksum (ip4); - - //UDP checksum is optional over IPv4 - if (!udp_checksum && l4_protocol == IP_PROTOCOL_UDP) - { - *checksum = 0; - } - else - { - csum = ip_csum_sub_even (*checksum, old_src.as_u64[0]); - csum = ip_csum_sub_even (csum, old_src.as_u64[1]); - csum = ip_csum_sub_even (csum, old_dst.as_u64[0]); - csum = ip_csum_sub_even (csum, old_dst.as_u64[1]); - csum = ip_csum_add_even (csum, ip4->dst_address.as_u32); - csum = ip_csum_add_even (csum, ip4->src_address.as_u32); - *checksum = ip_csum_fold (csum); - } - - return 0; -} - -/** - * @brief Translate IPv6 packet to IPv4 (IP header only). - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip6_to_ip4 (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx) -{ - ip6_header_t *ip6; - ip4_header_t *ip4; - u16 fragment_id; - u16 flags; - u16 frag_offset; - u8 l4_protocol; - u16 l4_offset; - int rv; - - ip6 = vlib_buffer_get_current (p); - - if (ip6_parse - (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) - return -1; - - ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); - - vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); - - if (PREDICT_FALSE (frag_offset)) - { - //Only the first fragment - ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); - fragment_id = frag_id_6to4 (hdr->identification); - flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); - } - else - { - fragment_id = 0; - flags = 0; - } - - if ((rv = fn (ip6, ip4, ctx)) != 0) - return rv; - - ip4->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); - ip4->length = u16_net_add (ip6->payload_length, - sizeof (*ip4) + sizeof (*ip6) - l4_offset); - ip4->fragment_id = fragment_id; - ip4->flags_and_fragment_offset = flags; - ip4->ttl = ip6->hop_limit; - ip4->protocol = l4_protocol; - ip4->checksum = ip4_header_checksum (ip4); - - return 0; -} - #endif /* __included_ip6_to_ip4_h__ */ /* diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c index d7130629219..b94e9b28cea 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.c +++ b/src/vnet/ip/reass/ip4_sv_reass.c @@ -98,6 +98,9 @@ typedef struct bool is_complete; // ip protocol u8 ip_proto; + u8 icmp_type_or_tcp_flags; + u32 tcp_ack_number; + u32 tcp_seq_number; // l4 src port u16 l4_src_port; // l4 dst port @@ -151,6 +154,9 @@ typedef struct // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; + // reference count for enabling/disabling feature - per interface + u32 *output_feature_use_refcount_per_intf; + } ip4_sv_reass_main_t; extern ip4_sv_reass_main_t ip4_sv_reass_main; @@ -172,6 +178,7 @@ typedef enum REASS_FRAGMENT_CACHE, REASS_FINISH, REASS_FRAGMENT_FORWARD, + REASS_PASSTHROUGH, } ip4_sv_reass_trace_operation_e; typedef struct @@ -193,7 +200,10 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *); - s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); + if (REASS_PASSTHROUGH != t->action) + { + s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); + } switch (t->action) { case REASS_FRAGMENT_CACHE: @@ -211,6 +221,9 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args) t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), clib_net_to_host_u16 (t->l4_dst_port)); break; + case REASS_PASSTHROUGH: + s = format (s, "[not-fragmented]"); + break; } return s; } @@ -223,13 +236,16 @@ ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, { vlib_buffer_t *b = vlib_get_buffer (vm, bi); ip4_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0])); - t->reass_id = reass->id; + if (reass) + { + t->reass_id = reass->id; + t->op_id = reass->trace_op_counter; + ++reass->trace_op_counter; + } t->action = action; - t->op_id = reass->trace_op_counter; t->ip_proto = ip_proto; t->l4_src_port = l4_src_port; t->l4_dst_port = l4_dst_port; - ++reass->trace_op_counter; #if 0 static u8 *s = NULL; s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t); @@ -358,19 +374,29 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm, always_inline ip4_sv_reass_rc_t ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, ip4_sv_reass_main_t * rm, ip4_sv_reass_per_thread_t * rt, - ip4_sv_reass_t * reass, u32 bi0) + ip4_header_t * ip0, ip4_sv_reass_t * reass, u32 bi0) { - vlib_buffer_t *fb = vlib_get_buffer (vm, bi0); + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK; - ip4_header_t *fip = vlib_buffer_get_current (fb); - const u32 fragment_first = ip4_get_fragment_offset_bytes (fip); + const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); if (0 == fragment_first) { - reass->ip_proto = fip->protocol; - reass->l4_src_port = ip4_get_port (fip, 1); - reass->l4_dst_port = ip4_get_port (fip, 0); + reass->ip_proto = ip0->protocol; + reass->l4_src_port = ip4_get_port (ip0, 1); + reass->l4_dst_port = ip4_get_port (ip0, 0); if (!reass->l4_src_port || !reass->l4_dst_port) return IP4_SV_REASS_RC_UNSUPP_IP_PROTO; + if (IP_PROTOCOL_TCP == reass->ip_proto) + { + reass->icmp_type_or_tcp_flags = ((tcp_header_t *) (ip0 + 1))->flags; + reass->tcp_ack_number = ((tcp_header_t *) (ip0 + 1))->ack_number; + reass->tcp_seq_number = ((tcp_header_t *) (ip0 + 1))->seq_number; + } + else if (IP_PROTOCOL_ICMP == reass->ip_proto) + { + reass->icmp_type_or_tcp_flags = + ((icmp46_header_t *) (ip0 + 1))->type; + } reass->is_complete = true; vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -383,7 +409,7 @@ ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, vec_add1 (reass->cached_buffers, bi0); if (!reass->is_complete) { - if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip4_sv_reass_add_trace (vm, node, rm, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0); @@ -398,7 +424,8 @@ ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, always_inline uword ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature, bool is_custom) + vlib_frame_t * frame, bool is_feature, + bool is_output_feature, bool is_custom) { u32 *from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, *to_next, next_index; @@ -423,14 +450,52 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); - ip4_header_t *ip0 = vlib_buffer_get_current (b0); + ip4_header_t *ip0 = + (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), + is_output_feature * + vnet_buffer (b0)-> + ip.save_rewrite_length); if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0)) { // this is a regular packet - no fragmentation + if (is_custom) + { + next0 = vnet_buffer (b0)->ip.reass.next_index; + } + else + { + next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; + } + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; + if (IP_PROTOCOL_TCP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((tcp_header_t *) (ip0 + 1))->flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + ((tcp_header_t *) (ip0 + 1))->ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + ((tcp_header_t *) (ip0 + 1))->seq_number; + } + else if (IP_PROTOCOL_ICMP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((icmp46_header_t *) (ip0 + 1))->type; + } vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); - next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_add_trace (vm, node, rm, NULL, bi0, + REASS_PASSTHROUGH, + vnet_buffer (b0)->ip.reass.ip_proto, + vnet_buffer (b0)->ip. + reass.l4_src_port, + vnet_buffer (b0)->ip. + reass.l4_dst_port); + } goto packet_enqueue; } const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); @@ -474,10 +539,27 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (reass->is_complete) { + if (is_custom) + { + next0 = vnet_buffer (b0)->ip.reass.next_index; + } + else + { + next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; + } + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + ! !fragment_first; vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; error0 = IP4_ERROR_NONE; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -491,7 +573,7 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } ip4_sv_reass_rc_t rc = - ip4_sv_reass_update (vm, node, rm, rt, reass, bi0); + ip4_sv_reass_update (vm, node, rm, rt, ip0, reass, bi0); switch (rc) { case IP4_SV_REASS_RC_OK: @@ -538,7 +620,18 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, to_next[0] = bi0; to_next += 1; n_left_to_next -= 1; + ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14)); + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + ! !ip4_get_fragment_offset (vlib_buffer_get_current (b0)); vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -568,10 +661,6 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - if (is_custom) - { - next0 = vnet_buffer (b0)->ip.reass.next_index; - } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); @@ -599,6 +688,7 @@ VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ , + false /* is_output_feature */ , false /* is_custom */ ); } @@ -625,6 +715,7 @@ VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ , + false /* is_output_feature */ , false /* is_custom */ ); } @@ -654,6 +745,42 @@ VNET_FEATURE_INIT (ip4_sv_reass_feature) = { }; /* *INDENT-ON* */ +VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ , + true /* is_output_feature */ , + false /* is_custom */ ); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { + .name = "ip4-sv-reassembly-output-feature", + .vector_size = sizeof (u32), + .format_trace = format_ip4_sv_reass_trace, + .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings), + .error_strings = ip4_sv_reass_error_strings, + .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input", + [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop", + [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = { + .arc_name = "ip4-output", + .node_name = "ip4-sv-reassembly-output-feature", + .runs_before = 0, + .runs_after = 0, +}; +/* *INDENT-ON* */ + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { .name = "ip4-sv-reassembly-custom-next", @@ -677,6 +804,7 @@ VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ , + false /* is_output_feature */ , true /* is_custom */ ); } @@ -824,6 +952,7 @@ ip4_sv_reass_init_function (vlib_main_t * vm) vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0); rm->feature_use_refcount_per_intf = NULL; + rm->output_feature_use_refcount_per_intf = NULL; return error; } @@ -1010,9 +1139,8 @@ VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = { vnet_api_error_t ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable) { - return vnet_feature_enable_disable ("ip4-unicast", - "ip4-sv-reassembly-feature", - sw_if_index, enable_disable, 0, 0); + return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, + enable_disable); } #endif /* CLIB_MARCH_VARIANT */ @@ -1177,7 +1305,8 @@ ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) } else { - --rm->feature_use_refcount_per_intf[sw_if_index]; + if (rm->feature_use_refcount_per_intf[sw_if_index]) + --rm->feature_use_refcount_per_intf[sw_if_index]; if (!rm->feature_use_refcount_per_intf[sw_if_index]) return vnet_feature_enable_disable ("ip4-unicast", "ip4-sv-reassembly-feature", @@ -1192,6 +1321,35 @@ ip4_sv_reass_custom_register_next_node (uword node_index) return vlib_node_add_next (vlib_get_main (), ip4_sv_reass_custom_node.index, node_index); } + +int +ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, + int is_enable) +{ + ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; + vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index); + if (is_enable) + { + if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) + { + ++rm->output_feature_use_refcount_per_intf[sw_if_index]; + return vnet_feature_enable_disable ("ip4-output", + "ip4-sv-reassembly-output-feature", + sw_if_index, 1, 0, 0); + } + ++rm->output_feature_use_refcount_per_intf[sw_if_index]; + } + else + { + if (rm->output_feature_use_refcount_per_intf[sw_if_index]) + --rm->output_feature_use_refcount_per_intf[sw_if_index]; + if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) + return vnet_feature_enable_disable ("ip4-output", + "ip4-sv-reassembly-output-feature", + sw_if_index, 0, 0, 0); + } + return 0; +} #endif /* diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h index cf9f36502a0..e926dbeebcc 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.h +++ b/src/vnet/ip/reass/ip4_sv_reass.h @@ -45,6 +45,8 @@ vnet_api_error_t ip4_sv_reass_enable_disable (u32 sw_if_index, int ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); +int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, + int is_enable); uword ip4_sv_reass_custom_register_next_node (uword node_index); diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c index 0837f0606ec..442617703a1 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.c +++ b/src/vnet/ip/reass/ip6_sv_reass.c @@ -96,6 +96,9 @@ typedef struct bool is_complete; // ip protocol u8 ip_proto; + u8 icmp_type_or_tcp_flags; + u32 tcp_ack_number; + u32 tcp_seq_number; // l4 src port u16 l4_src_port; // l4 dst port @@ -170,6 +173,7 @@ typedef enum REASS_FRAGMENT_CACHE, REASS_FINISH, REASS_FRAGMENT_FORWARD, + REASS_PASSTHROUGH, } ip6_sv_reass_trace_operation_e; typedef struct @@ -188,7 +192,10 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_sv_reass_trace_t *t = va_arg (*args, ip6_sv_reass_trace_t *); - s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); + if (REASS_PASSTHROUGH != t->action) + { + s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); + } switch (t->action) { case REASS_FRAGMENT_CACHE: @@ -206,6 +213,9 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args) t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), clib_net_to_host_u16 (t->l4_dst_port)); break; + case REASS_PASSTHROUGH: + s = format (s, "[not-fragmented]"); + break; } return s; } @@ -219,13 +229,16 @@ ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, { vlib_buffer_t *b = vlib_get_buffer (vm, bi); ip6_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0])); - t->reass_id = reass->id; + if (reass) + { + t->reass_id = reass->id; + t->op_id = reass->trace_op_counter; + ++reass->trace_op_counter; + } t->action = action; - t->op_id = reass->trace_op_counter; t->ip_proto = ip_proto; t->l4_src_port = l4_src_port; t->l4_dst_port = l4_dst_port; - ++reass->trace_op_counter; #if 0 static u8 *s = NULL; s = format (s, "%U", format_ip6_sv_reass_trace, NULL, NULL, t); @@ -391,18 +404,13 @@ ip6_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, fvnb->ip.reass.next_range_bi = ~0; if (0 == fragment_first) { - ip6_ext_header_t *ext_hdr = (void *) frag_hdr; - while (ip6_ext_hdr (ext_hdr->next_hdr) - && vlib_object_within_buffer_data (vm, fb, ext_hdr, - ext_hdr->n_data_u64s * 8)) - { - ext_hdr = ip6_ext_next_header (ext_hdr); - } - reass->ip_proto = ext_hdr->next_hdr; - reass->l4_src_port = ip6_get_port (fip, 1, fb->current_length); - reass->l4_dst_port = ip6_get_port (fip, 0, fb->current_length); - if (!reass->l4_src_port || !reass->l4_dst_port) + if (!ip6_get_port + (vm, fb, fip, fb->current_length, &reass->ip_proto, + &reass->l4_src_port, &reass->l4_dst_port, + &reass->icmp_type_or_tcp_flags, &reass->tcp_ack_number, + &reass->tcp_seq_number)) return IP6_SV_REASS_RC_UNSUPP_IP_PROTO; + reass->is_complete = true; vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -538,12 +546,34 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, if (!frag_hdr) { // this is a regular packet - no fragmentation - vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - vnet_buffer (b0)->ip.reass.l4_src_port = - ip6_get_port (ip0, 1, b0->current_length); - vnet_buffer (b0)->ip.reass.l4_dst_port = - ip6_get_port (ip0, 0, b0->current_length); + if (!ip6_get_port + (vm, b0, ip0, b0->current_length, + &(vnet_buffer (b0)->ip.reass.ip_proto), + &(vnet_buffer (b0)->ip.reass.l4_src_port), + &(vnet_buffer (b0)->ip.reass.l4_dst_port), + &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags), + &(vnet_buffer (b0)->ip.reass.tcp_ack_number), + &(vnet_buffer (b0)->ip.reass.tcp_seq_number))) + { + error0 = IP6_ERROR_REASS_UNSUPP_IP_PROTO; + next0 = IP6_SV_REASSEMBLY_NEXT_DROP; + goto packet_enqueue; + } + ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14)); + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; next0 = IP6_SV_REASSEMBLY_NEXT_INPUT; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip6_sv_reass_add_trace (vm, node, rm, NULL, bi0, + REASS_PASSTHROUGH, + vnet_buffer (b0)->ip.reass.ip_proto, + vnet_buffer (b0)->ip. + reass.l4_src_port, + vnet_buffer (b0)->ip. + reass.l4_dst_port); + } goto packet_enqueue; } vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset = @@ -601,7 +631,18 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, if (reass->is_complete) { + ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14)); + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + ! !ip6_frag_hdr_offset (frag_hdr); vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; next0 = IP6_SV_REASSEMBLY_NEXT_INPUT; @@ -668,7 +709,21 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, { vnet_feature_next (&next0, b0); } + frag_hdr = + vlib_buffer_get_current (b0) + + vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset; + ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14)); + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + ! !ip6_frag_hdr_offset (frag_hdr); vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -1124,9 +1179,8 @@ VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = { vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable) { - return vnet_feature_enable_disable ("ip6-unicast", - "ip6-sv-reassembly-feature", - sw_if_index, enable_disable, 0, 0); + return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, + enable_disable); } #endif /* CLIB_MARCH_VARIANT */ -- cgit 1.2.3-korg