From 640edcd9016f2381ea1efeaab78e834068c0e862 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Mon, 23 Sep 2019 09:00:30 +0000 Subject: map: use SVR for MAP-T This change is part of an effort to unify reassembly code. By removing shallow virtual reassembly functionality in MAP and using the common vnet provided shallow virtual reassembly, code size and complexity is reduced. Type: refactor Change-Id: Iec8edd039f7b967b53e17bb9bca228a8b452ac0c Signed-off-by: Klement Sekera --- src/plugins/map/ip4_map.c | 12 +- src/plugins/map/ip4_map_t.c | 79 +++------- src/plugins/map/ip6_map.c | 12 +- src/plugins/map/ip6_map_t.c | 101 +++---------- src/plugins/map/map.api | 4 - src/plugins/map/map.c | 304 +-------------------------------------- src/plugins/map/map.h | 121 ++-------------- src/plugins/map/map_api.c | 48 +------ src/plugins/map/test/test_map.py | 12 +- 9 files changed, 69 insertions(+), 624 deletions(-) (limited to 'src/plugins') diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c index 2466f533f37..ad94907e499 100644 --- a/src/plugins/map/ip4_map.c +++ b/src/plugins/map/ip4_map.c @@ -333,15 +333,11 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) { - map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = port0; + map_add_trace (vm, node, p0, map_domain_index0, port0); } if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) { - map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr)); - tr->map_domain_index = map_domain_index1; - tr->port = port1; + map_add_trace (vm, node, p1, map_domain_index1, port0); } p0->error = error_node->errors[error0]; @@ -449,9 +445,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) { - map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = port0; + map_add_trace (vm, node, p0, map_domain_index0, port0); } p0->error = error_node->errors[error0]; diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c index 21d17d77910..2ab1af95922 100644 --- a/src/plugins/map/ip4_map_t.c +++ b/src/plugins/map/ip4_map_t.c @@ -62,41 +62,6 @@ typedef CLIB_PACKED (struct { }) ip4_mapt_pseudo_header_t; /* *INDENT-ON* */ - -static_always_inline int -ip4_map_fragment_cache (ip4_header_t * ip4, u16 port) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = - map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32, - ip4->fragment_id, - (ip4->protocol == - IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, - &ignore); - if (r) - r->port = port; - - map_ip4_reass_unlock (); - return !r; -} - -static_always_inline i32 -ip4_map_fragment_get_port (ip4_header_t * ip4) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = - map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32, - ip4->fragment_id, - (ip4->protocol == - IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, - &ignore); - i32 ret = r ? r->port : -1; - map_ip4_reass_unlock (); - return ret; -} - typedef struct { map_domain_t *d; @@ -505,7 +470,7 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, static_always_inline void ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, ip4_header_t * ip40, u16 ip4_len0, i32 * dst_port0, - u8 * error0, ip4_mapt_next_t * next0) + u8 * error0, ip4_mapt_next_t * next0, u16 l4_dst_port) { if (PREDICT_FALSE (ip4_get_fragment_offset (ip40))) { @@ -516,7 +481,7 @@ ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, } else { - *dst_port0 = ip4_map_fragment_get_port (ip40); + *dst_port0 = l4_dst_port; *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0; } } @@ -525,14 +490,14 @@ ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, vnet_buffer (p0)->map_t.checksum_offset = 36; *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0; - *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2)); + *dst_port0 = l4_dst_port; } else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_UDP)) { vnet_buffer (p0)->map_t.checksum_offset = 26; *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0; - *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2)); + *dst_port0 = l4_dst_port; } else if (ip40->protocol == IP_PROTOCOL_ICMP) { @@ -544,7 +509,7 @@ ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, || ((icmp46_header_t *) u8_ptr_add (ip40, sizeof (*ip40)))->code == ICMP4_echo_request) - *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 6)); + *dst_port0 = l4_dst_port; } else { @@ -588,6 +553,9 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) error0 = MAP_ERROR_NONE; p0 = vlib_get_buffer (vm, pi0); + + u16 l4_dst_port = vnet_buffer (p0)->ip.reass.l4_dst_port; + ip40 = vlib_buffer_get_current (p0); ip4_len0 = clib_host_to_net_u16 (ip40->length); if (PREDICT_FALSE (p0->current_length < ip4_len0 || @@ -610,7 +578,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) dst_port0 = -1; ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0, - &next0); + &next0, l4_dst_port); /* Verify that port is not among the well-known ports */ if ((d0->psid_length > 0 && d0->psid_offset > 0) @@ -632,18 +600,6 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) pheader0->daddr.as_u64[1] = map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0); - // It is important to cache at this stage because the result - // might be necessary for packets within the same vector. - // Actually, this approach even provides some limited - // out-of-order fragments support - if (PREDICT_FALSE - (ip4_is_first_fragment (ip40) && (dst_port0 != -1) - && (d0->ea_bits_len != 0 || !d0->rules) - && ip4_map_fragment_cache (ip40, dst_port0))) - { - error0 = MAP_ERROR_UNKNOWN; - } - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) { @@ -657,6 +613,11 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; p0->error = error_node->errors[error0]; + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_add_trace (vm, node, p0, d0 - map_main.domains, dst_port0); + } exit: vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, @@ -673,12 +634,14 @@ static char *map_t_error_strings[] = { #undef _ }; -VNET_FEATURE_INIT (ip4_map_t_feature, static) = -{ -.arc_name = "ip4-unicast",.node_name = "ip4-map-t",.runs_before = - VNET_FEATURES ("ip4-flow-classify"),}; - /* *INDENT-OFF* */ +VNET_FEATURE_INIT (ip4_map_t_feature, static) = { + .arc_name = "ip4-unicast", + .node_name = "ip4-map-t", + .runs_before = VNET_FEATURES ("ip4-flow-classify"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), +}; + VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = { .function = ip4_map_t_fragmented, .name = "ip4-map-t-fragmented", diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c index 492d5f83927..f14b880de3f 100644 --- a/src/plugins/map/ip6_map.c +++ b/src/plugins/map/ip6_map.c @@ -368,16 +368,12 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) { - map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = port0; + map_add_trace (vm, node, p0, map_domain_index0, port0); } if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) { - map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr)); - tr->map_domain_index = map_domain_index1; - tr->port = port1; + map_add_trace (vm, node, p1, map_domain_index1, port1); } if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled) @@ -523,9 +519,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) { - map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = (u16) port0; + map_add_trace (vm, node, p0, map_domain_index0, port0); } if (mm->icmp6_enabled && diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c index 21d6e10c5ae..6e9c0d7640c 100644 --- a/src/plugins/map/ip6_map_t.c +++ b/src/plugins/map/ip6_map_t.c @@ -51,45 +51,6 @@ typedef enum IP6_MAPT_FRAGMENTED_N_NEXT } ip6_mapt_fragmented_next_t; -static_always_inline int -ip6_map_fragment_cache (ip6_header_t * ip6, ip6_frag_hdr_t * frag, - map_domain_t * d, u16 port) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = - map_ip4_reass_get (map_get_ip4 (&ip6->src_address, d->ip6_src_len), - ip6_map_t_embedded_address (d, &ip6->dst_address), - frag_id_6to4 (frag->identification), - (ip6->protocol == - IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol, - &ignore); - if (r) - r->port = port; - - map_ip4_reass_unlock (); - return !r; -} - -/* Returns the associated port or -1 */ -static_always_inline i32 -ip6_map_fragment_get (ip6_header_t * ip6, ip6_frag_hdr_t * frag, - map_domain_t * d) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = - map_ip4_reass_get (map_get_ip4 (&ip6->src_address, d->ip6_src_len), - ip6_map_t_embedded_address (d, &ip6->dst_address), - frag_id_6to4 (frag->identification), - (ip6->protocol == - IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol, - &ignore); - i32 ret = r ? r->port : -1; - map_ip4_reass_unlock (); - return ret; -} - typedef struct { map_domain_t *d; @@ -529,8 +490,9 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) to_next += 1; n_left_to_next -= 1; error0 = MAP_ERROR_NONE; - p0 = vlib_get_buffer (vm, pi0); + u16 l4_dst_port = vnet_buffer (p0)->ip.reass.l4_dst_port; + ip60 = vlib_buffer_get_current (p0); d0 = @@ -572,13 +534,8 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) (vnet_buffer (p0)->map_t.v6.frag_offset && ip6_frag_hdr_offset (frag0))) { - map_port0 = ip6_map_fragment_get (ip60, frag0, d0); - if (map_port0 == -1) - error0 = - error0 == - MAP_ERROR_NONE ? MAP_ERROR_FRAGMENT_MEMORY : error0; - else - next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; + map_port0 = l4_dst_port; + next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; } else if (PREDICT_TRUE @@ -590,10 +547,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (p0)->map_t.checksum_offset = vnet_buffer (p0)->map_t.v6.l4_offset + 16; next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; - map_port0 = - (i32) * - ((u16 *) - u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset)); + map_port0 = l4_dst_port; } else if (PREDICT_TRUE @@ -605,10 +559,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (p0)->map_t.checksum_offset = vnet_buffer (p0)->map_t.v6.l4_offset + 6; next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; - map_port0 = - (i32) * - ((u16 *) - u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset)); + map_port0 = l4_dst_port; } else if (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6) @@ -625,11 +576,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset))-> code == ICMP6_echo_request) - map_port0 = - (i32) * - ((u16 *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6.l4_offset + 6)); + map_port0 = l4_dst_port; } else { @@ -654,25 +601,6 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) error0 == MAP_ERROR_NONE ? MAP_ERROR_SEC_CHECK : error0; } - // Fragmented first packet needs to be cached for following packets - if (PREDICT_FALSE - (vnet_buffer (p0)->map_t.v6.frag_offset - && !ip6_frag_hdr_offset ((ip6_frag_hdr_t *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.frag_offset))) - && (map_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) - && (error0 == MAP_ERROR_NONE)) - { - ip6_map_fragment_cache (ip60, - (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer - (p0)-> - map_t.v6. - frag_offset), - d0, map_port0); - } - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) { @@ -686,6 +614,12 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; p0->error = error_node->errors[error0]; + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_add_trace (vm, node, p0, + vnet_buffer (p0)->map_t.map_domain_index, + map_port0); + } exit: vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, @@ -766,10 +700,11 @@ VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = { /* *INDENT-ON* */ /* *INDENT-OFF* */ -VNET_FEATURE_INIT(ip4_map_t_feature, static) = { - .arc_name = "ip6-unicast", - .node_name = "ip6-map-t", - .runs_before = VNET_FEATURES("ip6-flow-classify"), +VNET_FEATURE_INIT (ip6_map_t_feature, static) = { + .arc_name = "ip6-unicast", + .node_name = "ip6-map-t", + .runs_before = VNET_FEATURES ("ip6-flow-classify"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), }; VLIB_REGISTER_NODE(ip6_map_t_node) = { diff --git a/src/plugins/map/map.api b/src/plugins/map/map.api index b1f78124c2c..f2a7f84de98 100644 --- a/src/plugins/map/map.api +++ b/src/plugins/map/map.api @@ -326,10 +326,6 @@ define map_param_get @param icmp6_enable_unreachable - 1 = send ICMP unreachable err msgs @param ip4_nh_address - direct IP4 next-hop address @param ip6_nh_address - direct IP6 next-hop address - @param ip4_lifetime_ms - reassembly valid lifetime, or ~0 - @param ip4_pool_size - max number of reassemblies, or ~0 - @param ip4_buffers - max number of reassembly buffers, or ~0 - @param ip4_ht_ratio - reassembly valid lifetime, or ~0 @param ip6_lifetime_ms - max number of reassemblies, or ~0 @param ip6_pool_size - max number of reassemblies, or ~0 @param ip6_buffers - max number of reassembly buffers, or ~0 diff --git a/src/plugins/map/map.c b/src/plugins/map/map.c index 6ec9e72b413..904f0e9fd5e 100644 --- a/src/plugins/map/map.c +++ b/src/plugins/map/map.c @@ -966,24 +966,6 @@ format_map_domain (u8 * s, va_list * args) return s; } -static u8 * -format_map_ip4_reass (u8 * s, va_list * args) -{ - map_main_t *mm = &map_main; - map_ip4_reass_t *r = va_arg (*args, map_ip4_reass_t *); - map_ip4_reass_key_t *k = &r->key; - f64 now = vlib_time_now (mm->vlib_main); - f64 lifetime = (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000); - f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1; - s = format (s, - "ip4-reass src=%U dst=%U protocol=%d identifier=%d port=%d lifetime=%.3lf\n", - format_ip4_address, &k->src.as_u8, format_ip4_address, - &k->dst.as_u8, k->protocol, - clib_net_to_host_u16 (k->fragment_id), - (r->port >= 0) ? clib_net_to_host_u16 (r->port) : -1, dt); - return s; -} - static u8 * format_map_ip6_reass (u8 * s, va_list * args) { @@ -1064,12 +1046,8 @@ show_map_fragments_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { map_main_t *mm = &map_main; - map_ip4_reass_t *f4; map_ip6_reass_t *f6; - /* *INDENT-OFF* */ - pool_foreach(f4, mm->ip4_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip4_reass, f4);})); - /* *INDENT-ON* */ /* *INDENT-OFF* */ pool_foreach(f6, mm->ip6_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip6_reass, f6);})); /* *INDENT-ON* */ @@ -1197,7 +1175,7 @@ map_params_reass_command_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; u32 lifetime = ~0; - f64 ht_ratio = (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1); + f64 ht_ratio = (MAP_IP6_REASS_CONF_HT_RATIO_MAX + 1); u32 pool_size = ~0; u64 buffers = ~(0ull); u8 ip4 = 0, ip6 = 0; @@ -1232,19 +1210,8 @@ map_params_reass_command_fn (vlib_main_t * vm, unformat_input_t * input, if (ip4) { - if (pool_size != ~0 && pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) - return clib_error_return (0, "invalid ip4-reass pool-size ( > %d)", - MAP_IP4_REASS_CONF_POOL_SIZE_MAX); - if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1) - && ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) - return clib_error_return (0, "invalid ip4-reass ht-ratio ( > %d)", - MAP_IP4_REASS_CONF_HT_RATIO_MAX); - if (lifetime != ~0 && lifetime > MAP_IP4_REASS_CONF_LIFETIME_MAX) - return clib_error_return (0, "invalid ip4-reass lifetime ( > %d)", - MAP_IP4_REASS_CONF_LIFETIME_MAX); - if (buffers != ~(0ull) && buffers > MAP_IP4_REASS_CONF_BUFFERS_MAX) - return clib_error_return (0, "invalid ip4-reass buffers ( > %ld)", - MAP_IP4_REASS_CONF_BUFFERS_MAX); + return clib_error_return (0, + "ip4 reassembly no longer supported in map"); } if (ip6) @@ -1252,8 +1219,7 @@ map_params_reass_command_fn (vlib_main_t * vm, unformat_input_t * input, if (pool_size != ~0 && pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX) return clib_error_return (0, "invalid ip6-reass pool-size ( > %d)", MAP_IP6_REASS_CONF_POOL_SIZE_MAX); - if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1) - && ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) + if (ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) return clib_error_return (0, "invalid ip6-reass ht-log2len ( > %d)", MAP_IP6_REASS_CONF_HT_RATIO_MAX); if (lifetime != ~0 && lifetime > MAP_IP6_REASS_CONF_LIFETIME_MAX) @@ -1367,168 +1333,6 @@ format_map_trace (u8 * s, va_list * args) return s; } -static_always_inline map_ip4_reass_t * -map_ip4_reass_lookup (map_ip4_reass_key_t * k, u32 bucket, f64 now) -{ - map_main_t *mm = &map_main; - u32 ri = mm->ip4_reass_hash_table[bucket]; - while (ri != MAP_REASS_INDEX_NONE) - { - map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri); - if (r->key.as_u64[0] == k->as_u64[0] && - r->key.as_u64[1] == k->as_u64[1] && - now < r->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000)) - { - return r; - } - ri = r->bucket_next; - } - return NULL; -} - -#define map_ip4_reass_pool_index(r) (r - map_main.ip4_reass_pool) - -void -map_ip4_reass_free (map_ip4_reass_t * r, u32 ** pi_to_drop) -{ - map_main_t *mm = &map_main; - map_ip4_reass_get_fragments (r, pi_to_drop); - - // Unlink in hash bucket - map_ip4_reass_t *r2 = NULL; - u32 r2i = mm->ip4_reass_hash_table[r->bucket]; - while (r2i != map_ip4_reass_pool_index (r)) - { - ASSERT (r2i != MAP_REASS_INDEX_NONE); - r2 = pool_elt_at_index (mm->ip4_reass_pool, r2i); - r2i = r2->bucket_next; - } - if (r2) - { - r2->bucket_next = r->bucket_next; - } - else - { - mm->ip4_reass_hash_table[r->bucket] = r->bucket_next; - } - - // Unlink in list - if (r->fifo_next == map_ip4_reass_pool_index (r)) - { - mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; - } - else - { - if (mm->ip4_reass_fifo_last == map_ip4_reass_pool_index (r)) - mm->ip4_reass_fifo_last = r->fifo_prev; - pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next = - r->fifo_next; - pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev = - r->fifo_prev; - } - - pool_put (mm->ip4_reass_pool, r); - mm->ip4_reass_allocated--; -} - -map_ip4_reass_t * -map_ip4_reass_get (u32 src, u32 dst, u16 fragment_id, - u8 protocol, u32 ** pi_to_drop) -{ - map_ip4_reass_t *r; - map_main_t *mm = &map_main; - map_ip4_reass_key_t k = {.src.data_u32 = src, - .dst.data_u32 = dst, - .fragment_id = fragment_id, - .protocol = protocol - }; - - u32 h = 0; -#ifdef clib_crc32c_uses_intrinsics - h = clib_crc32c ((u8 *) k.as_u32, 16); -#else - u64 tmp = k.as_u32[0] ^ k.as_u32[1] ^ k.as_u32[2] ^ k.as_u32[3]; - h = clib_xxhash (tmp); -#endif - h = h >> (32 - mm->ip4_reass_ht_log2len); - - f64 now = vlib_time_now (mm->vlib_main); - - //Cache garbage collection - while (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) - { - map_ip4_reass_t *last = - pool_elt_at_index (mm->ip4_reass_pool, mm->ip4_reass_fifo_last); - if (last->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000) < now) - map_ip4_reass_free (last, pi_to_drop); - else - break; - } - - if ((r = map_ip4_reass_lookup (&k, h, now))) - return r; - - if (mm->ip4_reass_allocated >= mm->ip4_reass_conf_pool_size) - return NULL; - - pool_get (mm->ip4_reass_pool, r); - mm->ip4_reass_allocated++; - int i; - for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - r->fragments[i] = ~0; - - u32 ri = map_ip4_reass_pool_index (r); - - //Link in new bucket - r->bucket = h; - r->bucket_next = mm->ip4_reass_hash_table[h]; - mm->ip4_reass_hash_table[h] = ri; - - //Link in fifo - if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) - { - r->fifo_next = - pool_elt_at_index (mm->ip4_reass_pool, - mm->ip4_reass_fifo_last)->fifo_next; - r->fifo_prev = mm->ip4_reass_fifo_last; - pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next = ri; - pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev = ri; - } - else - { - r->fifo_next = r->fifo_prev = ri; - mm->ip4_reass_fifo_last = ri; - } - - //Set other fields - r->ts = now; - r->key = k; - r->port = -1; -#ifdef MAP_IP4_REASS_COUNT_BYTES - r->expected_total = 0xffff; - r->forwarded = 0; -#endif - - return r; -} - -int -map_ip4_reass_add_fragment (map_ip4_reass_t * r, u32 pi) -{ - if (map_main.ip4_reass_buffered_counter >= map_main.ip4_reass_conf_buffers) - return -1; - - int i; - for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - if (r->fragments[i] == ~0) - { - r->fragments[i] = pi; - map_main.ip4_reass_buffered_counter++; - return 0; - } - return -1; -} - static_always_inline map_ip6_reass_t * map_ip6_reass_lookup (map_ip6_reass_key_t * k, u32 bucket, f64 now) { @@ -1754,44 +1558,6 @@ map_ip6_reass_add_fragment (map_ip6_reass_t * r, u32 pi, return 0; } -void -map_ip4_reass_reinit (u32 * trashed_reass, u32 * dropped_packets) -{ - map_main_t *mm = &map_main; - int i; - - if (dropped_packets) - *dropped_packets = mm->ip4_reass_buffered_counter; - if (trashed_reass) - *trashed_reass = mm->ip4_reass_allocated; - if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) - { - u16 ri = mm->ip4_reass_fifo_last; - do - { - map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri); - for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - if (r->fragments[i] != ~0) - map_ip4_drop_pi (r->fragments[i]); - - ri = r->fifo_next; - pool_put (mm->ip4_reass_pool, r); - } - while (ri != mm->ip4_reass_fifo_last); - } - - vec_free (mm->ip4_reass_hash_table); - vec_resize (mm->ip4_reass_hash_table, 1 << mm->ip4_reass_ht_log2len); - for (i = 0; i < (1 << mm->ip4_reass_ht_log2len); i++) - mm->ip4_reass_hash_table[i] = MAP_REASS_INDEX_NONE; - pool_free (mm->ip4_reass_pool); - pool_alloc (mm->ip4_reass_pool, mm->ip4_reass_conf_pool_size); - - mm->ip4_reass_allocated = 0; - mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; - mm->ip4_reass_buffered_counter = 0; -} - u8 map_get_ht_log2len (f32 ht_ratio, u16 pool_size) { @@ -1803,52 +1569,6 @@ map_get_ht_log2len (f32 ht_ratio, u16 pool_size) return 4; } -int -map_ip4_reass_conf_ht_ratio (f32 ht_ratio, u32 * trashed_reass, - u32 * dropped_packets) -{ - map_main_t *mm = &map_main; - if (ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) - return -1; - - map_ip4_reass_lock (); - mm->ip4_reass_conf_ht_ratio = ht_ratio; - mm->ip4_reass_ht_log2len = - map_get_ht_log2len (ht_ratio, mm->ip4_reass_conf_pool_size); - map_ip4_reass_reinit (trashed_reass, dropped_packets); - map_ip4_reass_unlock (); - return 0; -} - -int -map_ip4_reass_conf_pool_size (u16 pool_size, u32 * trashed_reass, - u32 * dropped_packets) -{ - map_main_t *mm = &map_main; - if (pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) - return -1; - - map_ip4_reass_lock (); - mm->ip4_reass_conf_pool_size = pool_size; - map_ip4_reass_reinit (trashed_reass, dropped_packets); - map_ip4_reass_unlock (); - return 0; -} - -int -map_ip4_reass_conf_lifetime (u16 lifetime_ms) -{ - map_main.ip4_reass_conf_lifetime_ms = lifetime_ms; - return 0; -} - -int -map_ip4_reass_conf_buffers (u32 buffers) -{ - map_main.ip4_reass_conf_buffers = buffers; - return 0; -} - void map_ip6_reass_reinit (u32 * trashed_reass, u32 * dropped_packets) { @@ -1880,7 +1600,7 @@ map_ip6_reass_reinit (u32 * trashed_reass, u32 * dropped_packets) for (i = 0; i < (1 << mm->ip6_reass_ht_log2len); i++) mm->ip6_reass_hash_table[i] = MAP_REASS_INDEX_NONE; pool_free (mm->ip6_reass_pool); - pool_alloc (mm->ip6_reass_pool, mm->ip4_reass_conf_pool_size); + pool_alloc (mm->ip6_reass_pool, mm->ip6_reass_conf_pool_size); mm->ip6_reass_allocated = 0; mm->ip6_reass_buffered_counter = 0; @@ -2262,20 +1982,6 @@ map_init (vlib_main_t * vm) vlib_zero_simple_counter (&mm->icmp_relayed, 0); mm->icmp_relayed.stat_segment_name = "/map/icmp-relayed"; - /* IP4 virtual reassembly */ - mm->ip4_reass_hash_table = 0; - mm->ip4_reass_pool = 0; - clib_spinlock_init (&mm->ip4_reass_lock); - mm->ip4_reass_conf_ht_ratio = MAP_IP4_REASS_HT_RATIO_DEFAULT; - mm->ip4_reass_conf_lifetime_ms = MAP_IP4_REASS_LIFETIME_DEFAULT; - mm->ip4_reass_conf_pool_size = MAP_IP4_REASS_POOL_SIZE_DEFAULT; - mm->ip4_reass_conf_buffers = MAP_IP4_REASS_BUFFERS_DEFAULT; - mm->ip4_reass_ht_log2len = - map_get_ht_log2len (mm->ip4_reass_conf_ht_ratio, - mm->ip4_reass_conf_pool_size); - mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; - map_ip4_reass_reinit (NULL, NULL); - /* IP6 virtual reassembly */ mm->ip6_reass_hash_table = 0; mm->ip6_reass_pool = 0; diff --git a/src/plugins/map/map.h b/src/plugins/map/map.h index a4aced5952d..57ca1363c1b 100644 --- a/src/plugins/map/map.h +++ b/src/plugins/map/map.h @@ -32,6 +32,7 @@ #define MAP_ERR_BAD_LIFETIME -3 #define MAP_ERR_BAD_BUFFERS -4 #define MAP_ERR_BAD_BUFFERS_TOO_LARGE -5 +#define MAP_ERR_UNSUPPORTED -6 int map_create_domain (ip4_address_t * ip4_prefix, u8 ip4_prefix_len, ip6_address_t * ip6_prefix, u8 ip6_prefix_len, @@ -49,9 +50,9 @@ int map_param_set_fragmentation (bool inner, bool ignore_df); int map_param_set_icmp (ip4_address_t * ip4_err_relay_src); int map_param_set_icmp6 (u8 enable_unreachable); void map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, bool is_del); -int map_param_set_reassembly (bool is_ipv6, u16 lifetime_ms, - u16 pool_size, u32 buffers, f64 ht_ratio, - u32 * reass, u32 * packets); +int map_param_set_reassembly (bool is_ipv6, u16 lifetime_ms, u16 pool_size, + u32 buffers, f64 ht_ratio, u32 * reass, + u32 * packets); int map_param_set_security_check (bool enable, bool fragments); int map_param_set_traffic_class (bool copy, u8 tc); int map_param_set_tcp (u16 tcp_mss); @@ -64,26 +65,6 @@ typedef enum MAP_DOMAIN_RFC6052 = 1 << 2, } __attribute__ ((__packed__)) map_domain_flags_e; -/** - * IP4 reassembly logic: - * One virtually reassembled flow requires a map_ip4_reass_t structure in order - * to keep the first-fragment port number and, optionally, cache out of sequence - * packets. - * There are up to MAP_IP4_REASS_MAX_REASSEMBLY such structures. - * When in use, those structures are stored in a hash table of MAP_IP4_REASS_BUCKETS buckets. - * When a new structure needs to be used, it is allocated from available ones. - * If there is no structure available, the oldest in use is selected and used if and - * only if it was first allocated more than MAP_IP4_REASS_LIFETIME seconds ago. - * In case no structure can be allocated, the fragment is dropped. - */ - -#define MAP_IP4_REASS_LIFETIME_DEFAULT (100) /* ms */ -#define MAP_IP4_REASS_HT_RATIO_DEFAULT (1.0) -#define MAP_IP4_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures -#define MAP_IP4_REASS_BUFFERS_DEFAULT 2048 - -#define MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 // Number of fragment per reassembly - #define MAP_IP6_REASS_LIFETIME_DEFAULT (100) /* ms */ #define MAP_IP6_REASS_HT_RATIO_DEFAULT (1.0) #define MAP_IP6_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures @@ -92,7 +73,6 @@ typedef enum #define MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 #define MAP_IP6_REASS_COUNT_BYTES -#define MAP_IP4_REASS_COUNT_BYTES //#define IP6_MAP_T_OVERRIDE_TOS 0 @@ -143,38 +123,6 @@ typedef struct #define MAP_REASS_INDEX_NONE ((u16)0xffff) -/* - * Hash key, padded out to 16 bytes for fast compare - */ -/* *INDENT-OFF* */ -typedef union { - CLIB_PACKED (struct { - ip4_address_t src; - ip4_address_t dst; - u16 fragment_id; - u8 protocol; - }); - u64 as_u64[2]; - u32 as_u32[4]; -} map_ip4_reass_key_t; -/* *INDENT-ON* */ - -typedef struct -{ - map_ip4_reass_key_t key; - f64 ts; -#ifdef MAP_IP4_REASS_COUNT_BYTES - u16 expected_total; - u16 forwarded; -#endif - i32 port; - u16 bucket; - u16 bucket_next; - u16 fifo_prev; - u16 fifo_next; - u32 fragments[MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; -} map_ip4_reass_t; - /* * MAP domain counters */ @@ -291,26 +239,6 @@ typedef struct { vlib_main_t *vlib_main; vnet_main_t *vnet_main; - /* - * IPv4 encap and decap reassembly - */ - /* Configuration */ - f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) - u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures - u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms - u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly - - /* Runtime */ - map_ip4_reass_t *ip4_reass_pool; - u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len - u16 ip4_reass_allocated; - u16 *ip4_reass_hash_table; - u16 ip4_reass_fifo_last; - clib_spinlock_t ip4_reass_lock; - - /* Counters */ - u32 ip4_reass_buffered_counter; - bool frag_inner; /* Inner or outer fragmentation */ bool frag_ignore_df; /* Fragment (outer) packet even if DF is set */ @@ -382,6 +310,15 @@ typedef struct { u16 port; } map_trace_t; +always_inline void +map_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b, + u32 map_domain_index, u16 port) +{ + map_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr)); + tr->map_domain_index = map_domain_index; + tr->port = port; +} + extern map_main_t map_main; extern vlib_node_registration_t ip4_map_node; @@ -498,31 +435,8 @@ ip6_map_get_domain (ip6_address_t *addr, return pool_elt_at_index(mm->domains, mdi); } -map_ip4_reass_t * -map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id, - u8 protocol, u32 **pi_to_drop); -void -map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop); - -#define map_ip4_reass_lock() clib_spinlock_lock (&map_main.ip4_reass_lock) -#define map_ip4_reass_unlock() clib_spinlock_unlock (&map_main.ip4_reass_lock) - -static_always_inline void -map_ip4_reass_get_fragments(map_ip4_reass_t *r, u32 **pi) -{ - int i; - for (i=0; ifragments[i] != ~0) { - vec_add1(*pi, r->fragments[i]); - r->fragments[i] = ~0; - map_main.ip4_reass_buffered_counter--; - } -} - clib_error_t * map_plugin_api_hookup (vlib_main_t * vm); -int map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi); - map_ip6_reass_t * map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id, u8 protocol, u32 **pi_to_drop); @@ -539,15 +453,6 @@ map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi, void map_ip4_drop_pi(u32 pi); -int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); -#define MAP_IP4_REASS_CONF_HT_RATIO_MAX 100 -int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); -#define MAP_IP4_REASS_CONF_POOL_SIZE_MAX (0xfeff) -int map_ip4_reass_conf_lifetime(u16 lifetime_ms); -#define MAP_IP4_REASS_CONF_LIFETIME_MAX 0xffff -int map_ip4_reass_conf_buffers(u32 buffers); -#define MAP_IP4_REASS_CONF_BUFFERS_MAX (0xffffffff) - void map_ip6_drop_pi(u32 pi); diff --git a/src/plugins/map/map_api.c b/src/plugins/map/map_api.c index c29a2a12375..ea3212a4960 100644 --- a/src/plugins/map/map_api.c +++ b/src/plugins/map/map_api.c @@ -385,45 +385,7 @@ map_param_set_reassembly (bool is_ipv6, } else { - if (pool_size != (u16) ~ 0) - { - if (pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) - return MAP_ERR_BAD_POOL_SIZE; - if (map_ip4_reass_conf_pool_size - (pool_size, &ps_reass, &ps_packets)) - return MAP_ERR_BAD_POOL_SIZE; - } - - if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1)) - { - if (ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) - return MAP_ERR_BAD_HT_RATIO; - if (map_ip4_reass_conf_ht_ratio (ht_ratio, &ht_reass, &ht_packets)) - return MAP_ERR_BAD_HT_RATIO; - } - - if (lifetime_ms != (u16) ~ 0) - { - if (lifetime_ms > MAP_IP4_REASS_CONF_LIFETIME_MAX) - return MAP_ERR_BAD_LIFETIME; - if (map_ip4_reass_conf_lifetime (lifetime_ms)) - return MAP_ERR_BAD_LIFETIME; - } - - if (buffers != ~0) - { - if (buffers > MAP_IP4_REASS_CONF_BUFFERS_MAX) - return MAP_ERR_BAD_BUFFERS; - if (map_ip4_reass_conf_buffers (buffers)) - return MAP_ERR_BAD_BUFFERS; - } - - if (map_main.ip4_reass_conf_buffers > - map_main.ip4_reass_conf_pool_size * - MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) - { - return MAP_ERR_BAD_BUFFERS_TOO_LARGE; - } + return MAP_ERR_UNSUPPORTED; } if (reass) @@ -568,12 +530,6 @@ vl_api_map_param_get_t_handler (vl_api_map_param_get_t * mp) clib_memset (&rmp->ip4_nh_address, 0, sizeof (rmp->ip4_nh_address)); clib_memset (&rmp->ip6_nh_address, 0, sizeof (rmp->ip6_nh_address)); - rmp->ip4_lifetime_ms = - clib_net_to_host_u16 (mm->ip4_reass_conf_lifetime_ms); - rmp->ip4_pool_size = clib_net_to_host_u16 (mm->ip4_reass_conf_pool_size); - rmp->ip4_buffers = clib_net_to_host_u32 (mm->ip4_reass_conf_buffers); - rmp->ip4_ht_ratio = clib_net_to_host_f64 (mm->ip4_reass_conf_ht_ratio); - rmp->ip6_lifetime_ms = clib_net_to_host_u16 (mm->ip6_reass_conf_lifetime_ms); rmp->ip6_pool_size = clib_net_to_host_u16 (mm->ip6_reass_conf_pool_size); @@ -627,6 +583,8 @@ map_if_enable_disable (bool is_enable, u32 sw_if_index, bool is_translation) } else { + ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_enable); + ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_enable); vnet_feature_enable_disable ("ip4-unicast", "ip4-map-t", sw_if_index, is_enable ? 1 : 0, 0, 0); vnet_feature_enable_disable ("ip6-unicast", "ip6-map-t", sw_if_index, diff --git a/src/plugins/map/test/test_map.py b/src/plugins/map/test/test_map.py index cf1e6f89616..368cf51092d 100644 --- a/src/plugins/map/test/test_map.py +++ b/src/plugins/map/test/test_map.py @@ -146,8 +146,6 @@ class TestMAP(VppTestCase): for p in rx: self.validate(p[1], v4_reply) - self.logger.debug("show trace") - # # Fire in a v4 packet that will be encapped to the BR # @@ -158,7 +156,6 @@ class TestMAP(VppTestCase): self.send_and_assert_encapped_one(v4, "3000::1", map_translated_addr) - self.logger.debug("show trace") # # Verify reordered fragments are able to pass as well # @@ -172,8 +169,6 @@ class TestMAP(VppTestCase): self.send_and_assert_encapped(frags, "3000::1", map_translated_addr) - self.logger.debug("show trace") - # Enable MAP on interface. self.vapi.map_if_enable_disable(is_enable=1, sw_if_index=self.pg1.sw_if_index, @@ -237,8 +232,6 @@ class TestMAP(VppTestCase): self.assertEqual(r[IP].src, p[IP].src) self.assertEqual(r[IP].dst, p[IP].dst) - return - # # Pre-resolve. No API for this!! # @@ -257,7 +250,7 @@ class TestMAP(VppTestCase): pre_res_route.add_vpp_config() self.send_and_assert_encapped_one(v4, "3000::1", - "2001::c0a8:0:0", + map_translated_addr, dmac=self.pg1.remote_hosts[2].mac) # @@ -268,7 +261,7 @@ class TestMAP(VppTestCase): pre_res_route.add_vpp_config() self.send_and_assert_encapped_one(v4, "3000::1", - "2001::c0a8:0:0", + map_translated_addr, dmac=self.pg1.remote_hosts[3].mac) # @@ -447,6 +440,7 @@ class TestMAP(VppTestCase): for p in rx: pass # p.show2() + # reass_pkt = reassemble(rx) # p4_reply.ttl -= 1 # p4_reply.id = 256 -- cgit 1.2.3-korg