diff options
author | Klement Sekera <ksekera@cisco.com> | 2019-08-21 10:53:14 +0000 |
---|---|---|
committer | Ole Trøan <otroan@employees.org> | 2019-09-26 16:34:02 +0000 |
commit | a025b3ea353b5c5c356efda0888d75a2ab8979e0 (patch) | |
tree | c0521ada5f6e20875b42c5f2bf7b320700e77cf8 | |
parent | de34c35fc73226943538149fae9dbc5cfbdc6e75 (diff) |
map: use SVR for MAP-E
This change is part of an effort to unify reassembly code. By removing
shallow virtual reassembly functionality in MAP and using the common
vnet provided shallow virtual reassembly, code size and complexity is
reduced.
Type: refactor
Change-Id: I431f47d4db97154fecaeaecd6719cfc3b83cfc4a
Signed-off-by: Klement Sekera <ksekera@cisco.com>
-rw-r--r-- | src/plugins/map/ip4_map.c | 314 | ||||
-rw-r--r-- | src/plugins/map/ip6_map.c | 213 | ||||
-rw-r--r-- | src/plugins/map/map.h | 2 | ||||
-rw-r--r-- | src/plugins/map/map_api.c | 3 | ||||
-rw-r--r-- | src/plugins/map/test/test_map.py | 96 |
5 files changed, 172 insertions, 456 deletions
diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c index 64da602e838..2466f533f37 100644 --- a/src/plugins/map/ip4_map.c +++ b/src/plugins/map/ip4_map.c @@ -20,8 +20,6 @@ #include <vnet/ip/ip_frag.h> #include <vnet/ip/ip4_to_ip6.h> -vlib_node_registration_t ip4_map_reass_node; - enum ip4_map_next_e { IP4_MAP_NEXT_IP6_LOOKUP, @@ -30,79 +28,38 @@ enum ip4_map_next_e #endif IP4_MAP_NEXT_IP4_FRAGMENT, IP4_MAP_NEXT_IP6_FRAGMENT, - IP4_MAP_NEXT_REASS, IP4_MAP_NEXT_ICMP_ERROR, IP4_MAP_NEXT_DROP, IP4_MAP_N_NEXT, }; -enum ip4_map_reass_next_t -{ - IP4_MAP_REASS_NEXT_IP6_LOOKUP, - IP4_MAP_REASS_NEXT_IP4_FRAGMENT, - IP4_MAP_REASS_NEXT_DROP, - IP4_MAP_REASS_N_NEXT, -}; - -typedef struct +static_always_inline u16 +ip4_map_port_and_security_check (map_domain_t * d, vlib_buffer_t * b0, + u8 * error) { - u32 map_domain_index; u16 port; - u8 cached; -} map_ip4_map_reass_trace_t; + if (d->psid_length > 0) + { + ip4_header_t *ip = vlib_buffer_get_current (b0); -u8 * -format_ip4_map_reass_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - map_ip4_map_reass_trace_t *t = va_arg (*args, map_ip4_map_reass_trace_t *); - return format (s, "MAP domain index: %d L4 port: %u Status: %s", - t->map_domain_index, t->port, - t->cached ? "cached" : "forwarded"); -} + if (PREDICT_FALSE + ((ip->ip_version_and_header_length != 0x45) + || clib_host_to_net_u16 (ip->length) < 28)) + { + return 0; + } -static_always_inline u16 -ip4_map_port_and_security_check (map_domain_t * d, ip4_header_t * ip, - u32 * next, u8 * error) -{ - u16 port = 0; + port = vnet_buffer (b0)->ip.reass.l4_dst_port; - if (d->psid_length > 0) - { - if (ip4_get_fragment_offset (ip) == 0) + /* Verify that port is not among the well-known ports */ + if ((d->psid_offset > 0) + && (clib_net_to_host_u16 (port) < (0x1 << (16 - d->psid_offset)))) { - if (PREDICT_FALSE - ((ip->ip_version_and_header_length != 0x45) - || clib_host_to_net_u16 (ip->length) < 28)) - { - return 0; - } - port = ip4_get_port (ip, 0); - if (port) - { - /* Verify that port is not among the well-known ports */ - if ((d->psid_offset > 0) - && (clib_net_to_host_u16 (port) < - (0x1 << (16 - d->psid_offset)))) - { - *error = MAP_ERROR_ENCAP_SEC_CHECK; - } - else - { - if (ip4_get_fragment_more (ip)) - *next = IP4_MAP_NEXT_REASS; - return (port); - } - } - else - { - *error = MAP_ERROR_BAD_PROTOCOL; - } + *error = MAP_ERROR_ENCAP_SEC_CHECK; } else { - *next = IP4_MAP_NEXT_REASS; + return port; } } return (0); @@ -258,8 +215,8 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* * Shared IPv4 address */ - port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0); - port1 = ip4_map_port_and_security_check (d1, ip41, &next1, &error1); + port0 = ip4_map_port_and_security_check (d0, p0, &error0); + port1 = ip4_map_port_and_security_check (d1, p1, &error1); /* Decrement IPv4 TTL */ ip4_map_decrement_ttl (ip40, &error0); @@ -280,11 +237,9 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) u64 dal61 = map_get_pfx (d1, da41, dp41); u64 dar60 = map_get_sfx (d0, da40, dp40); u64 dar61 = map_get_sfx (d1, da41, dp41); - if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE - && next0 != IP4_MAP_NEXT_REASS) + if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) error0 = MAP_ERROR_NO_BINDING; - if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE - && next1 != IP4_MAP_NEXT_REASS) + if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE) error1 = MAP_ERROR_NO_BINDING; /* construct ipv6 header */ @@ -314,7 +269,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop */ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) { @@ -346,7 +301,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop */ if (PREDICT_TRUE (error1 == MAP_ERROR_NONE)) { @@ -430,7 +385,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* * Shared IPv4 address */ - port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0); + port0 = ip4_map_port_and_security_check (d0, p0, &error0); /* Decrement IPv4 TTL */ ip4_map_decrement_ttl (ip40, &error0); @@ -443,8 +398,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) u16 dp40 = clib_net_to_host_u16 (port0); u64 dal60 = map_get_pfx (d0, da40, dp40); u64 dar60 = map_get_sfx (d0, da40, dp40); - if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE - && next0 != IP4_MAP_NEXT_REASS) + if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) error0 = MAP_ERROR_NO_BINDING; /* construct ipv6 header */ @@ -463,7 +417,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop */ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) { @@ -511,195 +465,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return frame->n_vectors; } -/* - * ip4_map_reass - */ -static uword -ip4_map_reass (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip4_map_reass_node.index); - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - map_main_t *mm = &map_main; - vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 thread_index = vm->thread_index; - u32 *fragments_to_drop = NULL; - u32 *fragments_to_loopback = NULL; - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - map_domain_t *d0; - u8 error0 = MAP_ERROR_NONE; - ip4_header_t *ip40; - i32 port0 = 0; - ip6_header_t *ip60; - u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP; - u32 map_domain_index0 = ~0; - u8 cached = 0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - ip60 = vlib_buffer_get_current (p0); - ip40 = (ip4_header_t *) (ip60 + 1); - d0 = - ip4_map_get_domain (&ip40->dst_address, &map_domain_index0, - &error0); - - map_ip4_reass_lock (); - map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32, - ip40->dst_address.as_u32, - ip40->fragment_id, - ip40->protocol, - &fragments_to_drop); - if (PREDICT_FALSE (!r)) - { - // Could not create a caching entry - error0 = MAP_ERROR_FRAGMENT_MEMORY; - } - else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40))) - { - if (r->port >= 0) - { - // We know the port already - port0 = r->port; - } - else if (map_ip4_reass_add_fragment (r, pi0)) - { - // Not enough space for caching - error0 = MAP_ERROR_FRAGMENT_MEMORY; - map_ip4_reass_free (r, &fragments_to_drop); - } - else - { - cached = 1; - } - } - else if ((port0 = ip4_get_port (ip40, 0)) == 0) - { - // Could not find port. We'll free the reassembly. - error0 = MAP_ERROR_BAD_PROTOCOL; - port0 = 0; - map_ip4_reass_free (r, &fragments_to_drop); - } - else - { - r->port = port0; - map_ip4_reass_get_fragments (r, &fragments_to_loopback); - } - -#ifdef MAP_IP4_REASS_COUNT_BYTES - if (!cached && r) - { - r->forwarded += clib_host_to_net_u16 (ip40->length) - 20; - if (!ip4_get_fragment_more (ip40)) - r->expected_total = - ip4_get_fragment_offset (ip40) * 8 + - clib_host_to_net_u16 (ip40->length) - 20; - if (r->forwarded >= r->expected_total) - map_ip4_reass_free (r, &fragments_to_drop); - } -#endif - - map_ip4_reass_unlock (); - - // NOTE: Most operations have already been performed by ip4_map - // All we need is the right destination address - ip60->dst_address.as_u64[0] = - map_get_pfx_net (d0, ip40->dst_address.as_u32, port0); - ip60->dst_address.as_u64[1] = - map_get_sfx_net (d0, ip40->dst_address.as_u32, port0); - - if (PREDICT_FALSE - (d0->mtu - && (clib_net_to_host_u16 (ip60->payload_length) + - sizeof (*ip60) > d0->mtu))) - { - // TODO: vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60); - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; - vnet_buffer (p0)->ip_frag.mtu = d0->mtu; - vnet_buffer (p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; - next0 = IP4_MAP_REASS_NEXT_IP4_FRAGMENT; - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_ip4_map_reass_trace_t *tr = - vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = port0; - tr->cached = cached; - } - - if (cached) - { - //Dequeue the packet - n_left_to_next++; - to_next--; - } - else - { - if (error0 == MAP_ERROR_NONE) - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip60->payload_length) + 40); - next0 = - (error0 == MAP_ERROR_NONE) ? next0 : IP4_MAP_REASS_NEXT_DROP; - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); - } - - //Loopback when we reach the end of the inpu vector - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy_fast (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy_fast (from, fragments_to_loopback + - (len - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - map_send_all_to_node (vm, fragments_to_drop, node, - &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], - IP4_MAP_REASS_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); - return frame->n_vectors; -} - static char *map_error_strings[] = { #define _(sym,string) string, foreach_map_error @@ -712,8 +477,8 @@ VNET_FEATURE_INIT (ip4_map_feature, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-map", - .runs_before = - VNET_FEATURES ("ip4-flow-classify"), + .runs_before = VNET_FEATURES ("ip4-flow-classify"), + .runs_after = VNET_FEATURES("ip4-sv-reassembly-feature"), }; VLIB_REGISTER_NODE(ip4_map_node) = { @@ -734,33 +499,12 @@ VLIB_REGISTER_NODE(ip4_map_node) = { #endif [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag", - [IP4_MAP_NEXT_REASS] = "ip4-map-reass", [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP4_MAP_NEXT_DROP] = "error-drop", }, }; /* *INDENT-ON* */ -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip4_map_reass_node) = { - .function = ip4_map_reass, - .name = "ip4-map-reass", - .vector_size = sizeof(u32), - .format_trace = format_ip4_map_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_error_strings, - - .n_next_nodes = IP4_MAP_REASS_N_NEXT, - .next_nodes = { - [IP4_MAP_REASS_NEXT_IP6_LOOKUP] = "ip6-lookup", - [IP4_MAP_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", - [IP4_MAP_REASS_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c index d9db602bad8..492d5f83927 100644 --- a/src/plugins/map/ip6_map.c +++ b/src/plugins/map/ip6_map.c @@ -17,6 +17,7 @@ #include <vnet/ip/ip_frag.h> #include <vnet/ip/ip4_to_ip6.h> #include <vnet/ip/ip6_to_ip4.h> +#include <vnet/ip/reass/ip4_sv_reass.h> enum ip6_map_next_e { @@ -41,12 +42,12 @@ enum ip6_map_ip6_reass_next_e IP6_MAP_IP6_REASS_N_NEXT, }; -enum ip6_map_ip4_reass_next_e +enum ip6_map_post_ip4_reass_next_e { - IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP, - IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT, - IP6_MAP_IP4_REASS_NEXT_DROP, - IP6_MAP_IP4_REASS_N_NEXT, + IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP, + IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT, + IP6_MAP_POST_IP4_REASS_NEXT_DROP, + IP6_MAP_POST_IP4_REASS_N_NEXT, }; enum ip6_icmp_relay_next_e @@ -56,7 +57,7 @@ enum ip6_icmp_relay_next_e IP6_ICMP_RELAY_N_NEXT, }; -vlib_node_registration_t ip6_map_ip4_reass_node; +vlib_node_registration_t ip6_map_post_ip4_reass_node; vlib_node_registration_t ip6_map_ip6_reass_node; static vlib_node_registration_t ip6_map_icmp_relay_node; @@ -68,14 +69,14 @@ typedef struct } map_ip6_map_ip4_reass_trace_t; u8 * -format_ip6_map_ip4_reass_trace (u8 * s, va_list * args) +format_ip6_map_post_ip4_reass_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); map_ip6_map_ip4_reass_trace_t *t = va_arg (*args, map_ip6_map_ip4_reass_trace_t *); return format (s, "MAP domain index: %d L4 port: %u Status: %s", - t->map_domain_index, t->port, + t->map_domain_index, clib_net_to_host_u16 (t->port), t->cached ? "cached" : "forwarded"); } @@ -117,8 +118,9 @@ ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4, } static_always_inline void -ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4, - ip6_header_t * ip6, u32 * next, u8 * error) +ip6_map_security_check (map_domain_t * d, vlib_buffer_t * b0, + ip4_header_t * ip4, ip6_header_t * ip6, u32 * next, + u8 * error) { map_main_t *mm = &map_main; if (d->ea_bits_len || d->rules) @@ -143,7 +145,12 @@ ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4, } else { - *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next; + if (mm->sec_check_frag) + { + vnet_buffer (b0)->ip.reass.next_index = + map_main.ip4_sv_reass_custom_next_index; + *next = IP6_MAP_NEXT_IP4_REASS; + } } } } @@ -297,7 +304,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (d0) { /* MAP inbound security check */ - ip6_map_security_check (d0, ip40, ip60, &next0, &error0); + ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0); if (PREDICT_TRUE (error0 == MAP_ERROR_NONE && next0 == IP6_MAP_NEXT_IP4_LOOKUP)) @@ -329,7 +336,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (d1) { /* MAP inbound security check */ - ip6_map_security_check (d1, ip41, ip61, &next1, &error1); + ip6_map_security_check (d1, p1, ip41, ip61, &next1, &error1); if (PREDICT_TRUE (error1 == MAP_ERROR_NONE && next1 == IP6_MAP_NEXT_IP4_LOOKUP)) @@ -484,7 +491,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (d0) { /* MAP inbound security check */ - ip6_map_security_check (d0, ip40, ip60, &next0, &error0); + ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0); if (PREDICT_TRUE (error0 == MAP_ERROR_NONE && next0 == IP6_MAP_NEXT_IP4_LOOKUP)) @@ -657,15 +664,6 @@ map_ip6_drop_pi (u32 pi) vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi); } -void -map_ip4_drop_pi (u32 pi) -{ - vlib_main_t *vm = vlib_get_main (); - vlib_node_runtime_t *n = - vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index); - vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi); -} - /* * ip6_reass * TODO: We should count the number of successfully @@ -811,20 +809,18 @@ ip6_map_ip6_reass (vlib_main_t * vm, } /* - * ip6_map_ip4_reass + * ip6_map_post_ip4_reass */ static uword -ip6_map_ip4_reass (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +ip6_map_post_ip4_reass (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, *from, next_index, *to_next, n_left_to_next; vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index); + vlib_node_get_runtime (vm, ip6_map_post_ip4_reass_node.index); map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = mm->domain_counters; u32 thread_index = vm->thread_index; - u32 *fragments_to_drop = NULL; - u32 *fragments_to_loopback = NULL; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -844,8 +840,7 @@ ip6_map_ip4_reass (vlib_main_t * vm, ip6_header_t *ip60; i32 port0 = 0; u32 map_domain_index0 = ~0; - u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP; - u8 cached = 0; + u32 next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP; pi0 = to_next[0] = from[0]; from += 1; @@ -861,65 +856,7 @@ ip6_map_ip4_reass (vlib_main_t * vm, ip4_map_get_domain ((ip4_address_t *) & ip40->src_address.as_u32, &map_domain_index0, &error0); - map_ip4_reass_lock (); - //This node only deals with fragmented ip4 - map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32, - ip40->dst_address.as_u32, - ip40->fragment_id, - ip40->protocol, - &fragments_to_drop); - if (PREDICT_FALSE (!r)) - { - // Could not create a caching entry - error0 = MAP_ERROR_FRAGMENT_MEMORY; - } - else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40))) - { - // This is a fragment - if (r->port >= 0) - { - // We know the port already - port0 = r->port; - } - else if (map_ip4_reass_add_fragment (r, pi0)) - { - // Not enough space for caching - error0 = MAP_ERROR_FRAGMENT_MEMORY; - map_ip4_reass_free (r, &fragments_to_drop); - } - else - { - cached = 1; - } - } - else if ((port0 = ip4_get_port (ip40, 1)) == 0) - { - // Could not find port from first fragment. Stop reassembling. - error0 = MAP_ERROR_BAD_PROTOCOL; - port0 = 0; - map_ip4_reass_free (r, &fragments_to_drop); - } - else - { - // Found port. Remember it and loopback saved fragments - r->port = port0; - map_ip4_reass_get_fragments (r, &fragments_to_loopback); - } - -#ifdef MAP_IP4_REASS_COUNT_BYTES - if (!cached && r) - { - r->forwarded += clib_host_to_net_u16 (ip40->length) - 20; - if (!ip4_get_fragment_more (ip40)) - r->expected_total = - ip4_get_fragment_offset (ip40) * 8 + - clib_host_to_net_u16 (ip40->length) - 20; - if (r->forwarded >= r->expected_total) - map_ip4_reass_free (r, &fragments_to_drop); - } -#endif - - map_ip4_reass_unlock (); + port0 = vnet_buffer (p0)->ip.reass.l4_src_port; if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) error0 = @@ -929,12 +866,12 @@ ip6_map_ip4_reass (vlib_main_t * vm, if (PREDICT_FALSE (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu) - && error0 == MAP_ERROR_NONE && !cached)) + && error0 == MAP_ERROR_NONE)) { vnet_buffer (p0)->ip_frag.flags = 0; vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; vnet_buffer (p0)->ip_frag.mtu = d0->mtu; - next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT; + next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT; } if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) @@ -943,61 +880,24 @@ ip6_map_ip4_reass (vlib_main_t * vm, vlib_add_trace (vm, node, p0, sizeof (*tr)); tr->map_domain_index = map_domain_index0; tr->port = port0; - tr->cached = cached; } - if (cached) - { - //Dequeue the packet - n_left_to_next++; - to_next--; - } - else - { - if (error0 == MAP_ERROR_NONE) - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip40->length)); - next0 = - (error0 == - MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP; - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); - } + if (error0 == MAP_ERROR_NONE) + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, + thread_index, + map_domain_index0, 1, + clib_net_to_host_u16 + (ip40->length)); + next0 = + (error0 == + MAP_ERROR_NONE) ? next0 : IP6_MAP_POST_IP4_REASS_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); - //Loopback when we reach the end of the inpu vector - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy_fast (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy_fast (from, fragments_to_loopback + - (len - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } - map_send_all_to_node (vm, fragments_to_drop, node, - &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], - IP6_MAP_IP4_REASS_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); return frame->n_vectors; } @@ -1195,7 +1095,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = { [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance", #endif [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass", - [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass", + [IP6_MAP_NEXT_IP4_REASS] = "ip4-sv-reassembly-custom-next", [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay", [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local", @@ -1223,19 +1123,19 @@ VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = { /* *INDENT-ON* */ /* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = { - .function = ip6_map_ip4_reass, - .name = "ip6-map-ip4-reass", +VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = { + .function = ip6_map_post_ip4_reass, + .name = "ip6-map-post-ip4-reass", .vector_size = sizeof(u32), - .format_trace = format_ip6_map_ip4_reass_trace, + .format_trace = format_ip6_map_post_ip4_reass_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = MAP_N_ERROR, .error_strings = map_error_strings, - .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT, + .n_next_nodes = IP6_MAP_POST_IP4_REASS_N_NEXT, .next_nodes = { - [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", - [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop", + [IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP6_MAP_POST_IP4_REASS_NEXT_DROP] = "error-drop", }, }; /* *INDENT-ON* */ @@ -1257,6 +1157,19 @@ VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = { }; /* *INDENT-ON* */ +clib_error_t * +ip6_map_init (vlib_main_t * vm) +{ + map_main.ip4_sv_reass_custom_next_index = + ip4_sv_reass_custom_register_next_node + (ip6_map_post_ip4_reass_node.index); + return 0; +} + +VLIB_INIT_FUNCTION (ip6_map_init) = +{ +.runs_after = VLIB_INITS ("map_init"),}; + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/map/map.h b/src/plugins/map/map.h index a65a16a06d5..a4aced5952d 100644 --- a/src/plugins/map/map.h +++ b/src/plugins/map/map.h @@ -342,6 +342,8 @@ typedef struct { lpm_t *ip4_prefix_tbl; lpm_t *ip6_prefix_tbl; lpm_t *ip6_src_prefix_tbl; + + uword ip4_sv_reass_custom_next_index; } map_main_t; /* diff --git a/src/plugins/map/map_api.c b/src/plugins/map/map_api.c index 159c9d7569b..c29a2a12375 100644 --- a/src/plugins/map/map_api.c +++ b/src/plugins/map/map_api.c @@ -22,6 +22,8 @@ #include <map/map.api_enum.h> #include <map/map.api_types.h> #include <vnet/ip/ip.h> +#include <vnet/ip/reass/ip4_sv_reass.h> +#include <vnet/ip/reass/ip6_sv_reass.h> #include <vnet/fib/fib_table.h> #include <vlibmemory/api.h> @@ -614,6 +616,7 @@ map_if_enable_disable (bool is_enable, u32 sw_if_index, bool is_translation) if (is_translation == false) { + ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_enable); vnet_feature_enable_disable ("ip4-unicast", "ip4-map", sw_if_index, is_enable ? 1 : 0, 0, 0); vnet_feature_enable_disable ("ip6-unicast", "ip6-map", sw_if_index, diff --git a/src/plugins/map/test/test_map.py b/src/plugins/map/test/test_map.py index f1388b39c65..cf1e6f89616 100644 --- a/src/plugins/map/test/test_map.py +++ b/src/plugins/map/test/test_map.py @@ -7,6 +7,7 @@ from ipaddress import IPv6Network, IPv4Network from framework import VppTestCase, VppTestRunner from vpp_ip import DpoProto from vpp_ip_route import VppIpRoute, VppRoutePath +from util import fragment_rfc791 import scapy.compat from scapy.layers.l2 import Ether, Raw @@ -49,22 +50,25 @@ class TestMAP(VppTestCase): i.unconfig_ip6() i.admin_down() - def send_and_assert_encapped(self, tx, ip6_src, ip6_dst, dmac=None): + def send_and_assert_encapped(self, packets, ip6_src, ip6_dst, dmac=None): if not dmac: dmac = self.pg1.remote_mac - self.pg0.add_stream(tx) + self.pg0.add_stream(packets) self.pg_enable_capture(self.pg_interfaces) self.pg_start() - rx = self.pg1.get_capture(1) - rx = rx[0] + capture = self.pg1.get_capture(len(packets)) + for rx, tx in zip(capture, packets): + self.assertEqual(rx[Ether].dst, dmac) + self.assertEqual(rx[IP].src, tx[IP].src) + self.assertEqual(rx[IPv6].src, ip6_src) + self.assertEqual(rx[IPv6].dst, ip6_dst) - self.assertEqual(rx[Ether].dst, dmac) - self.assertEqual(rx[IP].src, tx[IP].src) - self.assertEqual(rx[IPv6].src, ip6_src) - self.assertEqual(rx[IPv6].dst, ip6_dst) + def send_and_assert_encapped_one(self, packet, ip6_src, ip6_dst, + dmac=None): + return self.send_and_assert_encapped([packet], ip6_src, ip6_dst, dmac) def test_api_map_domain_dump(self): map_dst = '2001::/64' @@ -75,7 +79,6 @@ class TestMAP(VppTestCase): ip6_prefix=map_dst, ip6_src=map_src, tag=tag).index - rv = self.vapi.map_domain_dump() # restore the state early so as to not impact subsequent tests. @@ -101,7 +104,7 @@ class TestMAP(VppTestCase): # Add a route to the MAP-BR # map_br_pfx = "2001::" - map_br_pfx_len = 64 + map_br_pfx_len = 32 map_route = VppIpRoute(self, map_br_pfx, map_br_pfx_len, @@ -112,15 +115,21 @@ class TestMAP(VppTestCase): # # Add a domain that maps from pg0 to pg1 # - map_dst = '2001::/64' + map_dst = '2001::/32' map_src = '3000::1/128' client_pfx = '192.168.0.0/16' + map_translated_addr = '2001:0:101:7000:0:c0a8:101:7' tag = 'MAP-E tag.' self.vapi.map_add_domain(ip4_prefix=client_pfx, ip6_prefix=map_dst, ip6_src=map_src, + ea_bits_len=20, + psid_offset=4, + psid_length=4, tag=tag) + self.vapi.map_param_set_security_check(enable=1, fragments=1) + # Enable MAP on interface. self.vapi.map_if_enable_disable(is_enable=1, sw_if_index=self.pg0.sw_if_index, @@ -137,6 +146,8 @@ class TestMAP(VppTestCase): for p in rx: self.validate(p[1], v4_reply) + self.logger.debug("show trace") + # # Fire in a v4 packet that will be encapped to the BR # @@ -145,7 +156,23 @@ class TestMAP(VppTestCase): UDP(sport=20000, dport=10000) / Raw('\xa5' * 100)) - self.send_and_assert_encapped(v4, "3000::1", "2001::c0a8:0:0") + self.send_and_assert_encapped_one(v4, "3000::1", map_translated_addr) + + self.logger.debug("show trace") + # + # Verify reordered fragments are able to pass as well + # + v4 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(id=1, src=self.pg0.remote_ip4, dst='192.168.1.1') / + UDP(sport=20000, dport=10000) / + Raw('\xa5' * 1000)) + + frags = fragment_rfc791(v4, 400) + frags.reverse() + + self.send_and_assert_encapped(frags, "3000::1", map_translated_addr) + + self.logger.debug("show trace") # Enable MAP on interface. self.vapi.map_if_enable_disable(is_enable=1, @@ -165,12 +192,12 @@ class TestMAP(VppTestCase): # # Fire in a V6 encapped packet. - # expect a decapped packet on the inside ip4 link + # expect a decapped packet on the inside ip4 link # p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / - IPv6(dst='3000::1', src="2001::1") / + IPv6(dst='3000::1', src=map_translated_addr) / IP(dst=self.pg0.remote_ip4, src='192.168.1.1') / - UDP(sport=20000, dport=10000) / + UDP(sport=10000, dport=20000) / Raw('\xa5' * 100)) self.pg1.add_stream(p) @@ -186,6 +213,33 @@ class TestMAP(VppTestCase): self.assertEqual(rx[IP].dst, p[IP].dst) # + # Verify encapped reordered fragments pass as well + # + p = (IP(id=1, dst=self.pg0.remote_ip4, src='192.168.1.1') / + UDP(sport=10000, dport=20000) / + Raw('\xa5' * 1500)) + frags = fragment_rfc791(p, 400) + frags.reverse() + + stream = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IPv6(dst='3000::1', src=map_translated_addr) / + x for x in frags) + + self.pg1.add_stream(stream) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg0.get_capture(len(frags)) + + for r in rx: + self.assertFalse(r.haslayer(IPv6)) + self.assertEqual(r[IP].src, p[IP].src) + self.assertEqual(r[IP].dst, p[IP].dst) + + return + + # # Pre-resolve. No API for this!! # self.vapi.ppcli("map params pre-resolve ip6-nh 4001::1") @@ -202,9 +256,9 @@ class TestMAP(VppTestCase): self.pg1.sw_if_index)]) pre_res_route.add_vpp_config() - self.send_and_assert_encapped(v4, "3000::1", - "2001::c0a8:0:0", - dmac=self.pg1.remote_hosts[2].mac) + self.send_and_assert_encapped_one(v4, "3000::1", + "2001::c0a8:0:0", + dmac=self.pg1.remote_hosts[2].mac) # # change the route to the pre-solved next-hop @@ -213,9 +267,9 @@ class TestMAP(VppTestCase): self.pg1.sw_if_index)]) pre_res_route.add_vpp_config() - self.send_and_assert_encapped(v4, "3000::1", - "2001::c0a8:0:0", - dmac=self.pg1.remote_hosts[3].mac) + self.send_and_assert_encapped_one(v4, "3000::1", + "2001::c0a8:0:0", + dmac=self.pg1.remote_hosts[3].mac) # # cleanup. The test infra's object registry will ensure |