diff options
author | Klement Sekera <ksekera@cisco.com> | 2019-02-13 11:01:32 +0100 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2019-02-25 19:30:18 +0000 |
commit | f883f6a1132ad4bb7aa9d9a79d420274fbcf3b64 (patch) | |
tree | cf6867cd2eb1867a75cf2e5b4f27ab4f29e23b98 /src/vnet/ip | |
parent | b0789106cb4f4c7ac529c02a5ff1ac346f0913dd (diff) |
buffer chain linearization
Rewrite vlib_buffer_chain_linearize function so that it works as intended.
Linearize buffer chains coming out of reassembly to work around some
dpdk-tx issues. Note that this is not a complete workaround
as a sufficiently large packet will still cause the resulting chain to
be too long.
Drop features from reassembly code which relies on knowing which and how
many buffers were freed during linearization, buffer counts and tracing
capabilities for these cases.
Change-Id: Ic65de53ecb5c78cd96b178033f6a576ab4060ed1
Signed-off-by: Klement Sekera <ksekera@cisco.com>
Diffstat (limited to 'src/vnet/ip')
-rw-r--r-- | src/vnet/ip/ip4_error.h | 1 | ||||
-rw-r--r-- | src/vnet/ip/ip4_reassembly.c | 198 | ||||
-rw-r--r-- | src/vnet/ip/ip6_reassembly.c | 219 |
3 files changed, 82 insertions, 336 deletions
diff --git a/src/vnet/ip/ip4_error.h b/src/vnet/ip/ip4_error.h index 338d91ec0ae..badcc6609e9 100644 --- a/src/vnet/ip/ip4_error.h +++ b/src/vnet/ip/ip4_error.h @@ -86,7 +86,6 @@ /* Errors signalled by ip4-reassembly */ \ _ (REASS_DUPLICATE_FRAGMENT, "duplicate/overlapping fragments") \ _ (REASS_LIMIT_REACHED, "drops due to concurrent reassemblies limit") \ - _ (REASS_TIMEOUT, "fragments dropped due to reassembly timeout") \ _ (REASS_MALFORMED_PACKET, "malformed packets") \ _ (REASS_INTERNAL_ERROR, "drops due to internal reassembly error") diff --git a/src/vnet/ip/ip4_reassembly.c b/src/vnet/ip/ip4_reassembly.c index 4bf39144ddb..b54279c7ab7 100644 --- a/src/vnet/ip/ip4_reassembly.c +++ b/src/vnet/ip/ip4_reassembly.c @@ -58,6 +58,7 @@ typedef enum { IP4_REASS_RC_OK, IP4_REASS_RC_INTERNAL_ERROR, + IP4_REASS_RC_NO_BUF, } ip4_reass_rc_t; typedef struct @@ -118,7 +119,6 @@ typedef struct { ip4_reass_t *pool; u32 reass_n; - u32 buffers_n; u32 id_counter; clib_spinlock_t lock; } ip4_reass_per_thread_t; @@ -292,11 +292,12 @@ ip4_reass_free (ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt, always_inline void ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm, - ip4_reass_t * reass, u32 ** vec_drop_timeout) + ip4_reass_t * reass) { u32 range_bi = reass->first_bi; vlib_buffer_t *range_b; vnet_buffer_opaque_t *range_vnb; + u32 *to_free = NULL; while (~0 != range_bi) { range_b = vlib_get_buffer (vm, range_bi); @@ -304,7 +305,7 @@ ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm, u32 bi = range_bi; while (~0 != bi) { - vec_add1 (*vec_drop_timeout, bi); + vec_add1 (to_free, bi); vlib_buffer_t *b = vlib_get_buffer (vm, bi); if (b->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -318,12 +319,13 @@ ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm, } range_bi = range_vnb->ip.reass.next_range_bi; } + vlib_buffer_free (vm, to_free, vec_len (to_free)); + vec_free (to_free); } ip4_reass_t * ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm, - ip4_reass_per_thread_t * rt, - ip4_reass_key_t * k, u32 ** vec_drop_timeout) + ip4_reass_per_thread_t * rt, ip4_reass_key_t * k) { ip4_reass_t *reass = NULL; f64 now = vlib_time_now (rm->vlib_main); @@ -336,7 +338,7 @@ ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm, reass = pool_elt_at_index (rt->pool, value.value); if (now > reass->last_heard + rm->timeout) { - ip4_reass_on_timeout (vm, rm, reass, vec_drop_timeout); + ip4_reass_on_timeout (vm, rm, reass); ip4_reass_free (rm, rt, reass); reass = NULL; } @@ -383,16 +385,14 @@ ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm, always_inline ip4_reass_rc_t ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt, - ip4_reass_t * reass, u32 * bi0, u32 * next0, - u32 * error0, u32 ** vec_drop_compress, - u32 ** vec_drop_overlap, bool is_feature) + ip4_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0, + bool is_feature) { vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi); vlib_buffer_t *last_b = NULL; u32 sub_chain_bi = reass->first_bi; u32 total_length = 0; u32 buf_cnt = 0; - u32 dropped_cnt = 0; do { u32 tmp_bi = sub_chain_bi; @@ -435,8 +435,7 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, if (trim_front > tmp->current_length) { /* drop whole buffer */ - vec_add1 (*vec_drop_compress, tmp_bi); - ++dropped_cnt; + vlib_buffer_free_one (vm, tmp_bi); trim_front -= tmp->current_length; if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT)) { @@ -478,12 +477,11 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - vec_add1 (*vec_drop_overlap, tmp_bi); + vlib_buffer_free_one (vm, tmp_bi); if (reass->first_bi == tmp_bi) { return IP4_REASS_RC_INTERNAL_ERROR; } - ++dropped_cnt; } if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -506,7 +504,6 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, return IP4_REASS_RC_INTERNAL_ERROR; } last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; - rt->buffers_n -= buf_cnt - dropped_cnt; if (total_length < first_b->current_length) { return IP4_REASS_RC_INTERNAL_ERROR; @@ -518,9 +515,10 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, ip->flags_and_fragment_offset = 0; ip->length = clib_host_to_net_u16 (first_b->current_length + total_length); ip->checksum = ip4_header_checksum (ip); - u32 before = vec_len (*vec_drop_compress); - vlib_buffer_chain_compress (vm, first_b, vec_drop_compress); - rt->buffers_n += vec_len (*vec_drop_compress) - before; + if (!vlib_buffer_chain_linearize (vm, first_b)) + { + return IP4_REASS_RC_NO_BUF; + } if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED)) { @@ -568,25 +566,6 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, return IP4_REASS_RC_OK; } -always_inline u32 -ip4_reass_get_buffer_chain_length (vlib_main_t * vm, vlib_buffer_t * b) -{ - u32 len = 0; - while (b) - { - ++len; - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b = vlib_get_buffer (vm, b->next_buffer); - } - else - { - break; - } - } - return len; -} - always_inline ip4_reass_rc_t ip4_reass_insert_range_in_chain (vlib_main_t * vm, ip4_reass_main_t * rm, @@ -618,7 +597,6 @@ ip4_reass_insert_range_in_chain (vlib_main_t * vm, return IP4_REASS_RC_INTERNAL_ERROR; } reass->data_len += ip4_reass_buffer_get_data_len (new_next_b); - rt->buffers_n += ip4_reass_get_buffer_chain_length (vm, new_next_b); return IP4_REASS_RC_OK; } @@ -626,7 +604,6 @@ always_inline ip4_reass_rc_t ip4_reass_remove_range_from_chain (vlib_main_t * vm, vlib_node_runtime_t * node, ip4_reass_main_t * rm, - u32 ** vec_drop_overlap, ip4_reass_t * reass, u32 prev_range_bi, u32 discard_bi) { @@ -655,7 +632,7 @@ ip4_reass_remove_range_from_chain (vlib_main_t * vm, reass->data_len -= ip4_reass_buffer_get_data_len (discard_b); while (1) { - vec_add1 (*vec_drop_overlap, discard_bi); + vlib_buffer_free_one (vm, discard_bi); if (PREDICT_FALSE (discard_b->flags & VLIB_BUFFER_IS_TRACED)) { ip4_reass_add_trace (vm, node, rm, reass, discard_bi, RANGE_DISCARD, @@ -678,9 +655,8 @@ ip4_reass_remove_range_from_chain (vlib_main_t * vm, always_inline ip4_reass_rc_t ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt, - ip4_reass_t * reass, u32 * bi0, u32 * next0, - u32 * error0, u32 ** vec_drop_overlap, - u32 ** vec_drop_compress, bool is_feature) + ip4_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0, + bool is_feature) { ip4_reass_rc_t rc = IP4_REASS_RC_OK; int consumed = 0; @@ -849,8 +825,7 @@ ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, u32 next_range_bi = candidate_vnb->ip.reass.next_range_bi; // discard candidate range, probe next range rc = - ip4_reass_remove_range_from_chain (vm, node, rm, - vec_drop_overlap, reass, + ip4_reass_remove_range_from_chain (vm, node, rm, reass, prev_range_bi, candidate_range_bi); if (IP4_REASS_RC_OK != rc) @@ -889,7 +864,6 @@ ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, reass->data_len == reass->last_packet_octet + 1) { return ip4_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0, - vec_drop_compress, vec_drop_overlap, is_feature); } else @@ -920,74 +894,10 @@ ip4_reassembly_inline (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - static u32 *vec_drop_timeout = NULL; // indexes of buffers which timed out - static u32 *vec_drop_overlap = NULL; // indexes of buffers which were discarded due to overlap - static u32 *vec_drop_internal_error = NULL; // indexes of buffers which were discarded due to internal errors - static u32 *vec_drop_compress = NULL; // indexes of buffers dicarded due to buffer compression - while (n_left_from > 0 || vec_len (vec_drop_timeout) > 0 - || vec_len (vec_drop_overlap) > 0 || vec_len (vec_drop_compress) > 0 - || vec_len (vec_drop_internal_error) > 0) + while (n_left_from > 0) { vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (vec_len (vec_drop_timeout) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_timeout); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP4_ERROR_REASS_TIMEOUT]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP4_REASSEMBLY_NEXT_DROP); - IP4_REASS_DEBUG_BUFFER (bi, enqueue_drop_timeout); - --rt->buffers_n; - } - - while (vec_len (vec_drop_overlap) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_overlap); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP4_ERROR_REASS_DUPLICATE_FRAGMENT]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP4_REASSEMBLY_NEXT_DROP); - IP4_REASS_DEBUG_BUFFER (bi, enqueue_drop_duplicate_fragment); - --rt->buffers_n; - } - - while (vec_len (vec_drop_compress) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_compress); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP4_ERROR_NONE]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP4_REASSEMBLY_NEXT_DROP); - IP4_REASS_DEBUG_BUFFER (bi, enqueue_drop_compress); - --rt->buffers_n; - } - while (vec_len (vec_drop_internal_error) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_internal_error); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP4_ERROR_REASS_INTERNAL_ERROR]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP4_REASSEMBLY_NEXT_DROP); - IP4_REASS_DEBUG_BUFFER (bi, enqueue_drop_internal_error); - --rt->buffers_n; - } while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; @@ -1033,23 +943,22 @@ ip4_reassembly_inline (vlib_main_t * vm, (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48; ip4_reass_t *reass = - ip4_reass_find_or_create (vm, rm, rt, &k, - &vec_drop_timeout); + ip4_reass_find_or_create (vm, rm, rt, &k); if (reass) { switch (ip4_reass_update (vm, node, rm, rt, reass, &bi0, &next0, &error0, - &vec_drop_overlap, &vec_drop_compress, is_feature)) { case IP4_REASS_RC_OK: /* nothing to do here */ break; + case IP4_REASS_RC_NO_BUF: + /* fallthrough */ case IP4_REASS_RC_INTERNAL_ERROR: /* drop everything and start with a clean slate */ - ip4_reass_on_timeout (vm, rm, reass, - &vec_drop_internal_error); + ip4_reass_on_timeout (vm, rm, reass); ip4_reass_free (rm, rt, reass); goto next_packet; break; @@ -1321,7 +1230,6 @@ ip4_reass_walk_expired (vlib_main_t * vm, f64 now = vlib_time_now (vm); ip4_reass_t *reass; - u32 *vec_drop_timeout = NULL; int *pool_indexes_to_free = NULL; uword thread_index = 0; @@ -1347,20 +1255,7 @@ ip4_reass_walk_expired (vlib_main_t * vm, vec_foreach (i, pool_indexes_to_free) { ip4_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); - u32 before = vec_len (vec_drop_timeout); - vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi); - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) - { - if (pool_is_free_index (vm->trace_main.trace_buffer_pool, - b->trace_index)) - { - /* the trace is gone, don't trace this buffer anymore */ - b->flags &= ~VLIB_BUFFER_IS_TRACED; - } - } - ip4_reass_on_timeout (vm, rm, reass, &vec_drop_timeout); - u32 after = vec_len (vec_drop_timeout); - rt->buffers_n -= (after - before); + ip4_reass_on_timeout (vm, rm, reass); ip4_reass_free (rm, rt, reass); } /* *INDENT-ON* */ @@ -1368,42 +1263,7 @@ ip4_reass_walk_expired (vlib_main_t * vm, clib_spinlock_unlock (&rt->lock); } - while (vec_len (vec_drop_timeout) > 0) - { - vlib_frame_t *f = vlib_get_frame_to_node (vm, rm->ip4_drop_idx); - u32 *to_next = vlib_frame_vector_args (f); - u32 n_left_to_next = VLIB_FRAME_SIZE - f->n_vectors; - int trace_frame = 0; - while (vec_len (vec_drop_timeout) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_timeout); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) - { - if (pool_is_free_index (vm->trace_main.trace_buffer_pool, - b->trace_index)) - { - /* the trace is gone, don't trace this buffer anymore */ - b->flags &= ~VLIB_BUFFER_IS_TRACED; - } - else - { - trace_frame = 1; - } - } - b->error = node->errors[IP4_ERROR_REASS_TIMEOUT]; - to_next[0] = bi; - ++f->n_vectors; - to_next += 1; - n_left_to_next -= 1; - IP4_REASS_DEBUG_BUFFER (bi, enqueue_drop_timeout_walk); - } - f->frame_flags |= (trace_frame * VLIB_FRAME_TRACE); - vlib_put_frame_to_node (vm, rm->ip4_drop_idx, f); - } - vec_free (pool_indexes_to_free); - vec_free (vec_drop_timeout); if (event_data) { _vec_len (event_data) = 0; @@ -1413,8 +1273,6 @@ ip4_reass_walk_expired (vlib_main_t * vm, return 0; } -static vlib_node_registration_t ip4_reass_expire_node; - /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_reass_expire_node, static) = { .function = ip4_reass_walk_expired, @@ -1489,7 +1347,6 @@ show_ip4_reass (vlib_main_t * vm, unformat_input_t * input, } u32 sum_reass_n = 0; - u64 sum_buffers_n = 0; ip4_reass_t *reass; uword thread_index; const uword nthreads = vlib_num_workers () + 1; @@ -1506,7 +1363,6 @@ show_ip4_reass (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-ON* */ } sum_reass_n += rt->reass_n; - sum_buffers_n += rt->buffers_n; clib_spinlock_unlock (&rt->lock); } vlib_cli_output (vm, "---------------------"); @@ -1515,8 +1371,6 @@ show_ip4_reass (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "Maximum configured concurrent IP4 reassemblies per worker-thread: %lu\n", (long unsigned) rm->max_reass_n); - vlib_cli_output (vm, "Buffers in use: %lu\n", - (long unsigned) sum_buffers_n); return 0; } diff --git a/src/vnet/ip/ip6_reassembly.c b/src/vnet/ip/ip6_reassembly.c index 012f3d26639..658fe25eeb1 100644 --- a/src/vnet/ip/ip6_reassembly.c +++ b/src/vnet/ip/ip6_reassembly.c @@ -36,6 +36,7 @@ typedef enum { IP6_REASS_RC_OK, IP6_REASS_RC_INTERNAL_ERROR, + IP6_REASS_RC_NO_BUF, } ip6_reass_rc_t; typedef struct @@ -96,7 +97,6 @@ typedef struct { ip6_reass_t *pool; u32 reass_n; - u32 buffers_n; u32 id_counter; clib_spinlock_t lock; } ip6_reass_per_thread_t; @@ -285,11 +285,12 @@ ip6_reass_free (ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, always_inline void ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm, - ip6_reass_t * reass, u32 ** vec_drop_bi) + ip6_reass_t * reass) { u32 range_bi = reass->first_bi; vlib_buffer_t *range_b; vnet_buffer_opaque_t *range_vnb; + u32 *to_free = NULL; while (~0 != range_bi) { range_b = vlib_get_buffer (vm, range_bi); @@ -297,7 +298,7 @@ ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm, u32 bi = range_bi; while (~0 != bi) { - vec_add1 (*vec_drop_bi, bi); + vec_add1 (to_free, bi); vlib_buffer_t *b = vlib_get_buffer (vm, bi); if (b->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -311,12 +312,14 @@ ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm, } range_bi = range_vnb->ip.reass.next_range_bi; } + vlib_buffer_free (vm, to_free, vec_len (to_free)); + vec_free (to_free); } always_inline void ip6_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node, - ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, - ip6_reass_t * reass, u32 * icmp_bi, u32 ** vec_timeout) + ip6_reass_main_t * rm, ip6_reass_t * reass, + u32 * icmp_bi) { if (~0 == reass->first_bi) { @@ -342,19 +345,17 @@ ip6_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node, { reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi; } - --rt->buffers_n; icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded, ICMP6_time_exceeded_fragment_reassembly_time_exceeded, 0); } - ip6_reass_drop_all (vm, rm, reass, vec_timeout); + ip6_reass_drop_all (vm, rm, reass); } always_inline ip6_reass_t * ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, - ip6_reass_key_t * k, u32 * icmp_bi, - u32 ** vec_timeout) + ip6_reass_key_t * k, u32 * icmp_bi) { ip6_reass_t *reass = NULL; f64 now = vlib_time_now (rm->vlib_main); @@ -371,8 +372,7 @@ ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, reass = pool_elt_at_index (rt->pool, value.value); if (now > reass->last_heard + rm->timeout) { - ip6_reass_on_timeout (vm, node, rm, rt, reass, icmp_bi, - vec_timeout); + ip6_reass_on_timeout (vm, node, rm, reass, icmp_bi); ip6_reass_free (rm, rt, reass); reass = NULL; } @@ -423,8 +423,8 @@ ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, always_inline ip6_reass_rc_t ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, - ip6_reass_t * reass, u32 * bi0, u32 * next0, - u32 * error0, u32 ** vec_drop_compress, bool is_feature) + ip6_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0, + bool is_feature) { *bi0 = reass->first_bi; *error0 = IP6_ERROR_NONE; @@ -434,6 +434,8 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, u32 total_length = 0; u32 buf_cnt = 0; u32 dropped_cnt = 0; + u32 *vec_drop_compress = NULL; + ip6_reass_rc_t rv = IP6_REASS_RC_OK; do { u32 tmp_bi = sub_chain_bi; @@ -442,7 +444,8 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) && !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first)) { - return IP6_REASS_RC_INTERNAL_ERROR; + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; } u32 data_len = ip6_reass_buffer_get_data_len (tmp); @@ -455,7 +458,8 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, /* first buffer - keep ip6 header */ if (0 != ip6_reass_buffer_get_data_offset (tmp)) { - return IP6_REASS_RC_INTERNAL_ERROR; + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; } trim_front = 0; trim_end = vlib_buffer_length_in_chain (vm, tmp) - data_len - @@ -463,7 +467,8 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, sizeof (*frag_hdr)); if (!(vlib_buffer_length_in_chain (vm, tmp) - trim_end > 0)) { - return IP6_REASS_RC_INTERNAL_ERROR; + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; } } u32 keep_data = @@ -476,12 +481,12 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, if (trim_front > tmp->current_length) { /* drop whole buffer */ - vec_add1 (*vec_drop_compress, tmp_bi); - ++dropped_cnt; + vec_add1 (vec_drop_compress, tmp_bi); trim_front -= tmp->current_length; if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT)) { - return IP6_REASS_RC_INTERNAL_ERROR; + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; } tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT; tmp_bi = tmp->next_buffer; @@ -512,17 +517,19 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, keep_data -= tmp->current_length; if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT)) { - return IP6_REASS_RC_INTERNAL_ERROR; + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; } } total_length += tmp->current_length; } else { - vec_add1 (*vec_drop_compress, tmp_bi); + vec_add1 (vec_drop_compress, tmp_bi); if (reass->first_bi == tmp_bi) { - return IP6_REASS_RC_INTERNAL_ERROR; + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; } ++dropped_cnt; } @@ -544,13 +551,15 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, if (!last_b) { - return IP6_REASS_RC_INTERNAL_ERROR; + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; } last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi); if (total_length < first_b->current_length) { - return IP6_REASS_RC_INTERNAL_ERROR; + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; } total_length -= first_b->current_length; first_b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -572,7 +581,8 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, } if (!((u8 *) frag_hdr - (u8 *) ip == ip6_frag_hdr_offset)) { - return IP6_REASS_RC_INTERNAL_ERROR; + rv = IP6_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; } memmove (frag_hdr, (u8 *) frag_hdr + sizeof (*frag_hdr), first_b->current_length - ip6_frag_hdr_offset - @@ -581,8 +591,11 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, ip->payload_length = clib_host_to_net_u16 (total_length + first_b->current_length - sizeof (*ip)); - vlib_buffer_chain_compress (vm, first_b, vec_drop_compress); - rt->buffers_n -= buf_cnt - vec_len (*vec_drop_compress); + if (!vlib_buffer_chain_linearize (vm, first_b)) + { + rv = IP6_REASS_RC_NO_BUF; + goto free_buffers_and_return; + } if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED)) { ip6_reass_add_trace (vm, node, rm, reass, reass->first_bi, FINALIZE, 0); @@ -624,26 +637,10 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length; ip6_reass_free (rm, rt, reass); reass = NULL; - return IP6_REASS_RC_OK; -} - -always_inline u32 -ip6_reass_get_buffer_chain_length (vlib_main_t * vm, vlib_buffer_t * b) -{ - u32 len = 0; - while (b) - { - ++len; - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b = vlib_get_buffer (vm, b->next_buffer); - } - else - { - break; - } - } - return len; +free_buffers_and_return: + vlib_buffer_free (vm, vec_drop_compress, vec_len (vec_drop_compress)); + vec_free (vec_drop_compress); + return rv; } always_inline void @@ -671,16 +668,13 @@ ip6_reass_insert_range_in_chain (vlib_main_t * vm, ip6_reass_main_t * rm, reass->first_bi = new_next_bi; } reass->data_len += ip6_reass_buffer_get_data_len (new_next_b); - rt->buffers_n += ip6_reass_get_buffer_chain_length (vm, new_next_b); } always_inline ip6_reass_rc_t ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, - ip6_reass_t * reass, u32 * bi0, u32 * next0, - u32 * error0, ip6_frag_hdr_t * frag_hdr, - u32 ** vec_drop_overlap, u32 ** vec_drop_compress, - bool is_feature) + ip6_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0, + ip6_frag_hdr_t * frag_hdr, bool is_feature) { int consumed = 0; vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0); @@ -760,7 +754,7 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, else { // overlapping fragment - not allowed by RFC 8200 - ip6_reass_drop_all (vm, rm, reass, vec_drop_overlap); + ip6_reass_drop_all (vm, rm, reass); ip6_reass_free (rm, rt, reass); if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) { @@ -785,7 +779,7 @@ check_if_done_maybe: reass->data_len == reass->last_packet_octet + 1) { return ip6_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0, - vec_drop_compress, is_feature); + is_feature); } else { @@ -882,76 +876,15 @@ ip6_reassembly_inline (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - static u32 *vec_timeout = NULL; // indexes of buffers which timed out - static u32 *vec_drop_overlap = NULL; // indexes of buffers dropped due to overlap - static u32 *vec_drop_internal_error = NULL; // indexes of buffers dropped due to internal errors - static u32 *vec_drop_compress = NULL; // indexes of buffers dropped due to buffer compression - while (n_left_from > 0 || vec_len (vec_timeout) > 0 - || vec_len (vec_drop_overlap) > 0 || vec_len (vec_drop_compress) > 0 - || vec_len (vec_drop_internal_error) > 0) + while (n_left_from > 0) { vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (vec_len (vec_timeout) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_timeout); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP6_ERROR_REASS_TIMEOUT]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP6_REASSEMBLY_NEXT_DROP); - --rt->buffers_n; - } - - while (vec_len (vec_drop_overlap) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_overlap); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP6_ERROR_REASS_OVERLAPPING_FRAGMENT]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP6_REASSEMBLY_NEXT_DROP); - --rt->buffers_n; - } - - while (vec_len (vec_drop_compress) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_compress); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP6_ERROR_NONE]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP6_REASSEMBLY_NEXT_DROP); - --rt->buffers_n; - } - while (vec_len (vec_drop_internal_error) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_drop_internal_error); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->error = node->errors[IP6_ERROR_REASS_INTERNAL_ERROR]; - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi, - IP6_REASSEMBLY_NEXT_DROP); - --rt->buffers_n; - } - while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; vlib_buffer_t *b0; - u32 next0; + u32 next0 = IP6_REASSEMBLY_NEXT_DROP; u32 error0 = IP6_ERROR_NONE; u32 icmp_bi = ~0; @@ -1001,22 +934,21 @@ ip6_reassembly_inline (vlib_main_t * vm, sw_if_index[VLIB_RX] << 32 | frag_hdr->identification; k.as_u64[5] = ip0->protocol; ip6_reass_t *reass = - ip6_reass_find_or_create (vm, node, rm, rt, &k, &icmp_bi, - &vec_timeout); + ip6_reass_find_or_create (vm, node, rm, rt, &k, &icmp_bi); if (reass) { switch (ip6_reass_update (vm, node, rm, rt, reass, &bi0, &next0, - &error0, frag_hdr, &vec_drop_overlap, - &vec_drop_compress, is_feature)) + &error0, frag_hdr, is_feature)) { case IP6_REASS_RC_OK: /* nothing to do here */ break; + case IP6_REASS_RC_NO_BUF: + /* fallthrough */ case IP6_REASS_RC_INTERNAL_ERROR: /* drop everything and start with a clean slate */ - ip6_reass_drop_all (vm, rm, reass, - &vec_drop_internal_error); + ip6_reass_drop_all (vm, rm, reass); ip6_reass_free (rm, rt, reass); goto next_packet; break; @@ -1306,7 +1238,6 @@ ip6_reass_walk_expired (vlib_main_t * vm, f64 now = vlib_time_now (vm); ip6_reass_t *reass; - u32 *vec_timeout = NULL; int *pool_indexes_to_free = NULL; uword thread_index = 0; @@ -1334,7 +1265,6 @@ ip6_reass_walk_expired (vlib_main_t * vm, { ip6_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); u32 icmp_bi = ~0; - u32 before = vec_len (vec_timeout); vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi); if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) { @@ -1345,9 +1275,7 @@ ip6_reass_walk_expired (vlib_main_t * vm, b->flags &= ~VLIB_BUFFER_IS_TRACED; } } - ip6_reass_on_timeout (vm, node, rm, rt, reass, &icmp_bi, &vec_timeout); - u32 after = vec_len (vec_timeout); - rt->buffers_n -= (after - before); + ip6_reass_on_timeout (vm, node, rm, reass, &icmp_bi); if (~0 != icmp_bi) { vec_add1 (vec_icmp_bi, icmp_bi); @@ -1359,39 +1287,6 @@ ip6_reass_walk_expired (vlib_main_t * vm, clib_spinlock_unlock (&rt->lock); } - while (vec_len (vec_timeout) > 0) - { - vlib_frame_t *f = vlib_get_frame_to_node (vm, rm->ip6_drop_idx); - u32 *to_next = vlib_frame_vector_args (f); - u32 n_left_to_next = VLIB_FRAME_SIZE - f->n_vectors; - int trace_frame = 0; - while (vec_len (vec_timeout) > 0 && n_left_to_next > 0) - { - u32 bi = vec_pop (vec_timeout); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) - { - if (pool_is_free_index (vm->trace_main.trace_buffer_pool, - b->trace_index)) - { - /* the trace is gone, don't trace this buffer anymore */ - b->flags &= ~VLIB_BUFFER_IS_TRACED; - } - else - { - trace_frame = 1; - } - } - b->error = node->errors[IP6_ERROR_REASS_TIMEOUT]; - to_next[0] = bi; - ++f->n_vectors; - to_next += 1; - n_left_to_next -= 1; - } - f->frame_flags |= (trace_frame * VLIB_FRAME_TRACE); - vlib_put_frame_to_node (vm, rm->ip6_drop_idx, f); - } - while (vec_len (vec_icmp_bi) > 0) { vlib_frame_t *f = @@ -1427,7 +1322,6 @@ ip6_reass_walk_expired (vlib_main_t * vm, } vec_free (pool_indexes_to_free); - vec_free (vec_timeout); vec_free (vec_icmp_bi); if (event_data) { @@ -1532,7 +1426,6 @@ show_ip6_reass (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-ON* */ } sum_reass_n += rt->reass_n; - sum_buffers_n += rt->buffers_n; clib_spinlock_unlock (&rt->lock); } vlib_cli_output (vm, "---------------------"); |