diff options
author | Ole Troan <ot@cisco.com> | 2019-10-09 13:33:19 +0200 |
---|---|---|
committer | Neale Ranns <nranns@cisco.com> | 2019-11-11 12:33:36 +0000 |
commit | eb284a1f8f10d752285a0a59e75bc54acae50779 (patch) | |
tree | 4ce777b223a51fa70f9e0dc18e7842b488b81cd6 /src/plugins/map/ip4_map.c | |
parent | d318a996b7bdcf0246b2d9927a918a3773a88fa6 (diff) |
ip: functional interface to ip fragmentation
This provides a functional interface to IP fragmentation.
Allowing external features to fragment. Supports
arbitrary encap size, for e.g. MPLS or inner fragmentation
of tunnels.
This also removed dual loop in MAP that was fundamentally broken.
Type: fix
Signed-off-by: Ole Troan <ot@cisco.com>
Change-Id: Ia89ecec8ee3cbe2416edbe87630fdb714898c2a8
Signed-off-by: Ole Troan <ot@cisco.com>
Diffstat (limited to 'src/plugins/map/ip4_map.c')
-rw-r--r-- | src/plugins/map/ip4_map.c | 268 |
1 files changed, 66 insertions, 202 deletions
diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c index ad94907e499..f2a00907a57 100644 --- a/src/plugins/map/ip4_map.c +++ b/src/plugins/map/ip4_map.c @@ -26,8 +26,6 @@ enum ip4_map_next_e #ifdef MAP_SKIP_IP6_LOOKUP IP4_MAP_NEXT_IP6_REWRITE, #endif - IP4_MAP_NEXT_IP4_FRAGMENT, - IP4_MAP_NEXT_IP6_FRAGMENT, IP4_MAP_NEXT_ICMP_ERROR, IP4_MAP_NEXT_DROP, IP4_MAP_N_NEXT, @@ -117,17 +115,26 @@ ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error) } static u32 -ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error) +ip4_map_fragment (vlib_main_t * vm, u32 bi, u16 mtu, bool df, u32 ** buffers, + u8 * error) { map_main_t *mm = &map_main; + vlib_buffer_t *b = vlib_get_buffer (vm, bi); if (mm->frag_inner) { - // TODO: Fix inner fragmentation after removed inner support from ip-frag. - ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu, - IP4_FRAG_NEXT_IP6_LOOKUP, - IP_FRAG_FLAG_IP6_HEADER); - return (IP4_MAP_NEXT_IP4_FRAGMENT); + /* IPv4 fragmented packets inside of IPv6 */ + ip4_frag_do_fragment (vm, bi, mtu, sizeof (ip6_header_t), buffers); + + /* Fixup */ + u32 *i; + vec_foreach (i, *buffers) + { + vlib_buffer_t *p = vlib_get_buffer (vm, *i); + ip6_header_t *ip6 = vlib_buffer_get_current (p); + ip6->payload_length = + clib_host_to_net_u16 (p->current_length - sizeof (ip6_header_t)); + } } else { @@ -140,10 +147,11 @@ ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error) *error = MAP_ERROR_DF_SET; return (IP4_MAP_NEXT_ICMP_ERROR); } - ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP, - IP_FRAG_FLAG_IP6_HEADER); - return (IP4_MAP_NEXT_IP6_FRAGMENT); + + /* Create IPv6 fragments here */ + ip6_frag_do_fragment (vm, bi, mtu, 0, buffers); } + return (IP4_MAP_NEXT_IP6_LOOKUP); } /* @@ -165,189 +173,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) while (n_left_from > 0) { vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Dual loop */ - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - map_domain_t *d0, *d1; - u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE; - ip4_header_t *ip40, *ip41; - u16 port0 = 0, port1 = 0; - ip6_header_t *ip6h0, *ip6h1; - u32 map_domain_index0 = ~0, map_domain_index1 = ~0; - u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 = - IP4_MAP_NEXT_IP6_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, STORE); - vlib_prefetch_buffer_header (p3, STORE); - /* IPv4 + 8 = 28. possibly plus -40 */ - CLIB_PREFETCH (p2->data - 40, 68, STORE); - CLIB_PREFETCH (p3->data - 40, 68, STORE); - } - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - ip40 = vlib_buffer_get_current (p0); - ip41 = vlib_buffer_get_current (p1); - d0 = - ip4_map_get_domain (&ip40->dst_address, &map_domain_index0, - &error0); - d1 = - ip4_map_get_domain (&ip41->dst_address, &map_domain_index1, - &error1); - - /* - * Shared IPv4 address - */ - port0 = ip4_map_port_and_security_check (d0, p0, &error0); - port1 = ip4_map_port_and_security_check (d1, p1, &error1); - - /* Decrement IPv4 TTL */ - ip4_map_decrement_ttl (ip40, &error0); - ip4_map_decrement_ttl (ip41, &error1); - bool df0 = - ip40->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); - bool df1 = - ip41->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); - - /* MAP calc */ - u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32); - u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32); - u16 dp40 = clib_net_to_host_u16 (port0); - u16 dp41 = clib_net_to_host_u16 (port1); - u64 dal60 = map_get_pfx (d0, da40, dp40); - u64 dal61 = map_get_pfx (d1, da41, dp41); - u64 dar60 = map_get_sfx (d0, da40, dp40); - u64 dar61 = map_get_sfx (d1, da41, dp41); - if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) - error0 = MAP_ERROR_NO_BINDING; - if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE) - error1 = MAP_ERROR_NO_BINDING; - - /* construct ipv6 header */ - vlib_buffer_advance (p0, -sizeof (ip6_header_t)); - vlib_buffer_advance (p1, -sizeof (ip6_header_t)); - ip6h0 = vlib_buffer_get_current (p0); - ip6h1 = vlib_buffer_get_current (p1); - vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - ip6h0->ip_version_traffic_class_and_flow_label = - ip4_map_vtcfl (ip40, p0); - ip6h1->ip_version_traffic_class_and_flow_label = - ip4_map_vtcfl (ip41, p1); - ip6h0->payload_length = ip40->length; - ip6h1->payload_length = ip41->length; - ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; - ip6h1->protocol = IP_PROTOCOL_IP_IN_IP; - ip6h0->hop_limit = 0x40; - ip6h1->hop_limit = 0x40; - ip6h0->src_address = d0->ip6_src; - ip6h1->src_address = d1->ip6_src; - ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60); - ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60); - ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61); - ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61); - - /* - * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop - */ - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) - { - if (PREDICT_FALSE - (d0->mtu - && (clib_net_to_host_u16 (ip6h0->payload_length) + - sizeof (*ip6h0) > d0->mtu))) - { - next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0); - } - else - { - next0 = - ip4_map_ip6_lookup_bypass (p0, - ip40) ? - IP4_MAP_NEXT_IP6_REWRITE : next0; - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip6h0->payload_length) + - 40); - } - } - else - { - next0 = IP4_MAP_NEXT_DROP; - } - - /* - * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop - */ - if (PREDICT_TRUE (error1 == MAP_ERROR_NONE)) - { - if (PREDICT_FALSE - (d1->mtu - && (clib_net_to_host_u16 (ip6h1->payload_length) + - sizeof (*ip6h1) > d1->mtu))) - { - next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1); - } - else - { - next1 = - ip4_map_ip6_lookup_bypass (p1, - ip41) ? - IP4_MAP_NEXT_IP6_REWRITE : next1; - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - map_domain_index1, 1, - clib_net_to_host_u16 - (ip6h1->payload_length) + - 40); - } - } - else - { - next1 = IP4_MAP_NEXT_DROP; - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_add_trace (vm, node, p0, map_domain_index0, port0); - } - if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) - { - map_add_trace (vm, node, p1, map_domain_index1, port0); - } - - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, next0, - next1); - } - while (n_left_from > 0 && n_left_to_next > 0) { u32 pi0; @@ -359,12 +184,13 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip6_header_t *ip6h0; u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP; u32 map_domain_index0 = ~0; + u32 *buffer0 = 0; + bool free_original_buffer0 = false; + u32 *frag_from0, frag_left0; pi0 = to_next[0] = from[0]; from += 1; n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; p0 = vlib_get_buffer (vm, pi0); ip40 = vlib_buffer_get_current (p0); @@ -413,7 +239,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop + * ip6-lookup, ip6-rewrite, error-drop */ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) { @@ -422,7 +248,14 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) && (clib_net_to_host_u16 (ip6h0->payload_length) + sizeof (*ip6h0) > d0->mtu))) { - next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0); + next0 = + ip4_map_fragment (vm, pi0, d0->mtu, df0, &buffer0, + &error0); + + if (error0 == MAP_ERROR_NONE) + { + free_original_buffer0 = true; + } } else { @@ -450,8 +283,41 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0->error = error_node->errors[error0]; exit: - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); + /* Send fragments that were added in the frame */ + if (free_original_buffer0) + { + vlib_buffer_free_one (vm, pi0); /* Free original packet */ + } + else + { + vec_add1 (buffer0, pi0); + } + + frag_from0 = buffer0; + frag_left0 = vec_len (buffer0); + + while (frag_left0 > 0) + { + while (frag_left0 > 0 && n_left_to_next > 0) + { + u32 i0; + i0 = to_next[0] = frag_from0[0]; + frag_from0 += 1; + frag_left0 -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_get_buffer (vm, i0)->error = + error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + i0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + vec_reset_length (buffer0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } @@ -491,8 +357,6 @@ VLIB_REGISTER_NODE(ip4_map_node) = { #ifdef MAP_SKIP_IP6_LOOKUP [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance", #endif - [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", - [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag", [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP4_MAP_NEXT_DROP] = "error-drop", }, |