From e4fa1d2f6b8721318c0f104f2615588b5d4e0441 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 11 Apr 2022 18:41:49 +0200 Subject: vppinfra: vector perf improvements Type: improvement Change-Id: I37c187af80c21b8fb1ab15af112527a837e0df9e Signed-off-by: Damjan Marion --- src/vppinfra/elf.h | 4 +- src/vppinfra/fifo.c | 7 +- src/vppinfra/hash.c | 5 +- src/vppinfra/heap.c | 6 +- src/vppinfra/heap.h | 12 ++- src/vppinfra/pool.c | 5 +- src/vppinfra/pool.h | 18 ++-- src/vppinfra/ring.h | 9 +- src/vppinfra/serialize.c | 12 ++- src/vppinfra/sparse_vec.h | 7 +- src/vppinfra/test_vec.c | 4 +- src/vppinfra/vec.c | 151 ++++++++++++++++++++------------ src/vppinfra/vec.h | 204 ++++++++++++++++++++++++++++++++----------- src/vppinfra/vec_bootstrap.h | 16 +++- 14 files changed, 324 insertions(+), 136 deletions(-) (limited to 'src/vppinfra') diff --git a/src/vppinfra/elf.h b/src/vppinfra/elf.h index 8d9893120f5..56869f1b9c7 100644 --- a/src/vppinfra/elf.h +++ b/src/vppinfra/elf.h @@ -966,10 +966,10 @@ elf_get_section_contents (elf_main_t * em, result = 0; if (vec_len (s->contents) > 0) { + vec_attr_t va = { .elt_sz = elt_size }; /* Make vector copy of contents with given element size. */ result = - _vec_realloc (result, vec_len (s->contents) / elt_size, elt_size, - /* header_bytes */ 0, /* align */ 0, 0); + _vec_realloc_internal (result, vec_len (s->contents) / elt_size, &va); clib_memcpy (result, s->contents, vec_len (s->contents)); } diff --git a/src/vppinfra/fifo.c b/src/vppinfra/fifo.c index af68765f7da..2b1cfea6fe0 100644 --- a/src/vppinfra/fifo.c +++ b/src/vppinfra/fifo.c @@ -84,6 +84,9 @@ _clib_fifo_resize (void *v_old, uword n_new_elts, uword align, uword elt_bytes) uword n_old_elts; uword n_copy_bytes, n_zero_bytes; clib_fifo_header_t *f_new, *f_old; + vec_attr_t va = { .elt_sz = elt_bytes, + .hdr_sz = sizeof (clib_fifo_header_t), + .align = align }; n_old_elts = clib_fifo_elts (v_old); n_new_elts += n_old_elts; @@ -92,9 +95,7 @@ _clib_fifo_resize (void *v_old, uword n_new_elts, uword align, uword elt_bytes) else n_new_elts = max_pow2 (n_new_elts); - v_new = _vec_realloc (0, n_new_elts, elt_bytes, sizeof (clib_fifo_header_t), - align, 0); - + v_new = _vec_alloc_internal (n_new_elts, &va); f_new = clib_fifo_header (v_new); f_new->head_index = 0; f_new->tail_index = n_old_elts; diff --git a/src/vppinfra/hash.c b/src/vppinfra/hash.c index 7deff4a5bee..76f71d37d19 100644 --- a/src/vppinfra/hash.c +++ b/src/vppinfra/hash.c @@ -548,6 +548,7 @@ _hash_create (uword elts, hash_t * h_user) hash_t *h; uword log2_pair_size; void *v; + vec_attr_t va = { .hdr_sz = sizeof (h[0]), .align = sizeof (hash_pair_t) }; /* Size of hash is power of 2 >= ELTS and larger than number of bits in is_user bitmap elements. */ @@ -558,8 +559,8 @@ _hash_create (uword elts, hash_t * h_user) if (h_user) log2_pair_size = h_user->log2_pair_size; - v = _vec_realloc (0, elts, (1 << log2_pair_size) * sizeof (hash_pair_t), - sizeof (h[0]), sizeof (hash_pair_t), 0); + va.elt_sz = (1 << log2_pair_size) * sizeof (hash_pair_t), + v = _vec_alloc_internal (elts, &va); h = hash_header (v); if (h_user) diff --git a/src/vppinfra/heap.c b/src/vppinfra/heap.c index 066756b4db9..7db814200f8 100644 --- a/src/vppinfra/heap.c +++ b/src/vppinfra/heap.c @@ -413,6 +413,9 @@ _heap_alloc (void *v, if (!e) { uword max_len; + vec_attr_t va = { .elt_sz = elt_bytes, + .hdr_sz = sizeof (h[0]), + .align = HEAP_DATA_ALIGN }; offset = vec_len (v); max_len = heap_get_max_len (v); @@ -422,8 +425,7 @@ _heap_alloc (void *v, h = heap_header (v); if (!v || !(h->flags & HEAP_IS_STATIC)) - v = _vec_realloc (v, offset + align_size, elt_bytes, sizeof (h[0]), - HEAP_DATA_ALIGN, 0); + v = _vec_realloc_internal (v, offset + align_size, &va); else vec_inc_len (v, align_size); diff --git a/src/vppinfra/heap.h b/src/vppinfra/heap.h index f496fe07b2e..45f3131a45b 100644 --- a/src/vppinfra/heap.h +++ b/src/vppinfra/heap.h @@ -185,6 +185,9 @@ always_inline void * _heap_dup (void *v_old, uword v_bytes) { heap_header_t *h_old, *h_new; + vec_attr_t va = { .align = HEAP_DATA_ALIGN, + .hdr_sz = sizeof (heap_header_t), + .elt_sz = 1 }; void *v_new; h_old = heap_header (v_old); @@ -192,8 +195,7 @@ _heap_dup (void *v_old, uword v_bytes) if (!v_old) return v_old; - v_new = _vec_realloc (0, _vec_len (v_old), 1, sizeof (heap_header_t), - HEAP_DATA_ALIGN, 0); + v_new = _vec_alloc_internal (_vec_len (v_old), &va); h_new = heap_header (v_new); heap_dup_header (h_old, h_new); clib_memcpy_fast (v_new, v_old, v_bytes); @@ -212,8 +214,10 @@ uword heap_bytes (void *v); always_inline void * _heap_new (u32 len, u32 n_elt_bytes) { - void *v = _vec_realloc ((void *) 0, len, n_elt_bytes, sizeof (heap_header_t), - HEAP_DATA_ALIGN, 0); + vec_attr_t va = { .align = HEAP_DATA_ALIGN, + .hdr_sz = sizeof (heap_header_t), + .elt_sz = n_elt_bytes }; + void *v = _vec_alloc_internal (len, &va); heap_header (v)->elt_bytes = n_elt_bytes; return v; } diff --git a/src/vppinfra/pool.c b/src/vppinfra/pool.c index 2bbfe60d320..1f3b96f0f0a 100644 --- a/src/vppinfra/pool.c +++ b/src/vppinfra/pool.c @@ -44,11 +44,14 @@ _pool_init_fixed (void **pool_ptr, uword elt_size, uword max_elts, uword align) pool_header_t *ph; u8 *v; u32 i; + vec_attr_t va = { .elt_sz = elt_size, + .align = align, + .hdr_sz = sizeof (pool_header_t) }; ASSERT (elt_size); ASSERT (max_elts); - v = _vec_realloc (0, max_elts, elt_size, sizeof (pool_header_t), align, 0); + v = _vec_alloc_internal (max_elts, &va); ph = pool_header (v); ph->max_elts = max_elts; diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h index 8330d64b0bd..2a356291051 100644 --- a/src/vppinfra/pool.h +++ b/src/vppinfra/pool.h @@ -172,6 +172,9 @@ _pool_get (void **pp, void **ep, uword align, int zero, uword elt_sz) uword len = 0; void *p = pp[0]; void *e; + vec_attr_t va = { .hdr_sz = sizeof (pool_header_t), + .elt_sz = elt_sz, + .align = align }; if (p) { @@ -199,8 +202,7 @@ _pool_get (void **pp, void **ep, uword align, int zero, uword elt_sz) len = vec_len (p); /* Nothing on free list, make a new element and return it. */ - p = - _vec_realloc_inline (p, len + 1, elt_sz, sizeof (pool_header_t), align, 0); + p = _vec_realloc_internal (p, len + 1, &va); e = p + len * elt_sz; _vec_update_pointer (pp, p); @@ -312,6 +314,10 @@ _pool_alloc (void **pp, uword n_elts, uword align, void *heap, uword elt_sz) { pool_header_t *ph = pool_header (pp[0]); uword len = vec_len (pp[0]); + const vec_attr_t va = { .hdr_sz = sizeof (pool_header_t), + .elt_sz = elt_sz, + .align = align, + .heap = heap }; if (ph && ph->max_elts) { @@ -319,8 +325,7 @@ _pool_alloc (void **pp, uword n_elts, uword align, void *heap, uword elt_sz) os_out_of_memory (); } - pp[0] = _vec_realloc_inline (pp[0], len + n_elts, elt_sz, - sizeof (pool_header_t), align, heap); + pp[0] = _vec_resize_internal (pp[0], len + n_elts, &va); _vec_set_len (pp[0], len, elt_sz); clib_mem_poison (pp[0] + len * elt_sz, n_elts * elt_sz); @@ -342,6 +347,9 @@ _pool_dup (void *p, uword align, uword elt_sz) { pool_header_t *nph, *ph = pool_header (p); uword len = vec_len (p); + const vec_attr_t va = { .hdr_sz = sizeof (pool_header_t), + .elt_sz = elt_sz, + .align = align }; void *n; if (ph && ph->max_elts) @@ -350,7 +358,7 @@ _pool_dup (void *p, uword align, uword elt_sz) os_out_of_memory (); } - n = _vec_realloc_inline (0, len, elt_sz, sizeof (pool_header_t), align, 0); + n = _vec_alloc_internal (len, &va); nph = pool_header (n); clib_memset_u8 (nph, 0, sizeof (vec_header_t)); diff --git a/src/vppinfra/ring.h b/src/vppinfra/ring.h index d7e19156482..8527fdb5978 100644 --- a/src/vppinfra/ring.h +++ b/src/vppinfra/ring.h @@ -37,12 +37,11 @@ clib_ring_new_inline (void **p, u32 elt_bytes, u32 size, u32 align) { void *v; clib_ring_header_t *h; + vec_attr_t va = { .elt_sz = elt_bytes, + .hdr_sz = sizeof (clib_ring_header_t), + .align = align }; - v = _vec_realloc (0, - /* length increment */ size, - /* data bytes */ elt_bytes, - /* header bytes */ sizeof (h[0]), - /* data align */ align, 0); + v = _vec_alloc_internal (size, &va); h = clib_ring_header (v); h->next = 0; diff --git a/src/vppinfra/serialize.c b/src/vppinfra/serialize.c index d84d7ca06c5..f5c00649627 100644 --- a/src/vppinfra/serialize.c +++ b/src/vppinfra/serialize.c @@ -308,13 +308,16 @@ unserialize_vector_ha (serialize_main_t * m, { void *v, *p; u32 l; + vec_attr_t va = { .align = align, + .elt_sz = elt_bytes, + .hdr_sz = header_bytes }; unserialize_integer (m, &l, sizeof (l)); if (l > max_length) serialize_error (&m->header, clib_error_create ("bad vector length %d", l)); - p = v = _vec_realloc ((void *) 0, l, elt_bytes, header_bytes, - /* align */ align, 0); + + p = v = _vec_alloc_internal (l, &va); while (l != 0) { @@ -437,6 +440,9 @@ unserialize_pool_helper (serialize_main_t * m, void *v; u32 i, l, lo, hi; pool_header_t *p; + vec_attr_t va = { .align = align, + .elt_sz = elt_bytes, + .hdr_sz = sizeof (pool_header_t) }; unserialize_integer (m, &l, sizeof (l)); if (l == 0) @@ -444,7 +450,7 @@ unserialize_pool_helper (serialize_main_t * m, return 0; } - v = _vec_realloc ((void *) 0, l, elt_bytes, sizeof (p[0]), align, 0); + v = _vec_alloc_internal (l, &va); p = pool_header (v); vec_unserialize (m, &p->free_indices, unserialize_vec_32); diff --git a/src/vppinfra/sparse_vec.h b/src/vppinfra/sparse_vec.h index dc9cb00380a..1f57d304e95 100644 --- a/src/vppinfra/sparse_vec.h +++ b/src/vppinfra/sparse_vec.h @@ -73,15 +73,14 @@ sparse_vec_new (uword elt_bytes, uword sparse_index_bits) void *v; sparse_vec_header_t *h; word n; + vec_attr_t va = { .elt_sz = elt_bytes, .hdr_sz = sizeof (h[0]) }; ASSERT (sparse_index_bits <= 16); - v = _vec_realloc (0, /* data bytes */ 8, elt_bytes, - /* header bytes */ sizeof (h[0]), /* data align */ 0, - /* heap */ 0); + v = _vec_alloc_internal (/* data bytes */ 8, &va); /* Make space for invalid entry (entry 0). */ - _vec_find (v)->len = 1; + _vec_set_len (v, 1, elt_bytes); h = sparse_vec_header (v); diff --git a/src/vppinfra/test_vec.c b/src/vppinfra/test_vec.c index f32cd7ffb82..9f336a0a095 100644 --- a/src/vppinfra/test_vec.c +++ b/src/vppinfra/test_vec.c @@ -211,6 +211,8 @@ dump_call_stats (uword * stats) ({ \ elt_type *_v (v) = NULL; \ uword _v (l) = (len); \ + vec_attr_t _v (attr) = { .hdr_sz = (hdr_bytes), \ + .elt_sz = sizeof (elt_type) }; \ uword _v (h) = (hdr_bytes); \ u8 *_v (hdr); \ \ @@ -221,7 +223,7 @@ dump_call_stats (uword * stats) if (_v (l) == ~0) \ _v (l) = bounded_random_u32 (&(seed), 0, MAX_VEC_LEN); \ \ - _v (v) = _vec_realloc (NULL, _v (l), sizeof (elt_type), _v (h), 0, 0); \ + _v (v) = _vec_alloc_internal (_v (l), &_v (attr)); \ fill_with_random_data (_v (v), vec_bytes (_v (v)), (seed)); \ \ /* Fill header with random data as well. */ \ diff --git a/src/vppinfra/vec.c b/src/vppinfra/vec.c index 4dc8f18ce24..dbaadad2dd5 100644 --- a/src/vppinfra/vec.c +++ b/src/vppinfra/vec.c @@ -16,75 +16,118 @@ vec_mem_size (void *v) } __clib_export void * -_vec_realloc (void *v, uword n_elts, uword elt_sz, uword hdr_sz, uword align, - void *heap) +_vec_alloc_internal (uword n_elts, const vec_attr_t *const attr) { - uword n_data_bytes, alloc_size, new_data_size; - void *p; + uword req_size, alloc_size, data_offset, align; + uword elt_sz = attr->elt_sz; + void *p, *v, *heap = attr->heap; /* alignment must be power of 2 */ - align = clib_max (align, VEC_MIN_ALIGN); + align = clib_max (attr->align, VEC_MIN_ALIGN); ASSERT (count_set_bits (align) == 1); - /* mumber of bytes needed to store vector data */ - n_data_bytes = n_elts * elt_sz; + /* calc offset where vector data starts */ + data_offset = attr->hdr_sz + sizeof (vec_header_t); + data_offset += heap ? sizeof (void *) : 0; + data_offset = round_pow2 (data_offset, align); - if (v) - { - uword data_offset = vec_get_header_size (v); - uword old_data_size = data_offset + _vec_len (v) * elt_sz; - new_data_size = data_offset + n_data_bytes; - heap = _vec_find (v)->default_heap ? 0 : _vec_heap (v); - p = vec_header (v); - alloc_size = clib_mem_size (p); - - /* check that we are still dealing with the same vector type */ - ASSERT (_vec_find (v)->hdr_size * VEC_MIN_ALIGN == data_offset); - ASSERT (_vec_find (v)->log2_align == min_log2 (align)); - - /* realloc if new size cannot fit into existing allocation */ - if (alloc_size < new_data_size) - { - if (CLIB_VECTOR_GROW_BY_ONE) - alloc_size = n_data_bytes + data_offset; - else - alloc_size = (n_data_bytes * 3) / 2 + data_offset; - - p = clib_mem_heap_realloc_aligned (heap, p, alloc_size, align); - alloc_size = clib_mem_size (p); - v = p + data_offset; - } + req_size = data_offset + n_elts * elt_sz; + p = clib_mem_heap_alloc_aligned (heap, req_size, align); + + /* zero out whole alocation */ + alloc_size = clib_mem_size (p); + clib_mem_unpoison (p, alloc_size); + clib_memset_u8 (p, 0, alloc_size); - clib_mem_unpoison (p, alloc_size); - clib_memset_u8 (p + old_data_size, 0, alloc_size - old_data_size); + /* fill vector header */ + v = p + data_offset; + _vec_find (v)->len = n_elts; + _vec_find (v)->hdr_size = data_offset / VEC_MIN_ALIGN; + _vec_find (v)->log2_align = min_log2 (align); + if (heap) + { + _vec_find (v)->default_heap = 0; + _vec_heap (v) = heap; } else + _vec_find (v)->default_heap = 1; + + /* poison extra space given by allocator */ + clib_mem_poison (p + req_size, alloc_size - req_size); + _vec_set_grow_elts (v, (alloc_size - req_size) / elt_sz); + return v; +} + +static inline void +_vec_update_len (void *v, uword n_elts, uword elt_sz, uword n_data_bytes, + uword unused_bytes) +{ + _vec_find (v)->len = n_elts; + _vec_set_grow_elts (v, unused_bytes / elt_sz); + clib_mem_unpoison (v, n_data_bytes); + clib_mem_poison (v + n_data_bytes, unused_bytes); +} + +__clib_export void * +_vec_realloc_internal (void *v, uword n_elts, const vec_attr_t *const attr) +{ + uword old_alloc_sz, new_alloc_sz, new_data_size, n_data_bytes, data_offset; + uword elt_sz; + + if (PREDICT_FALSE (v == 0)) + return _vec_alloc_internal (n_elts, attr); + + elt_sz = attr->elt_sz; + n_data_bytes = n_elts * elt_sz; + data_offset = vec_get_header_size (v); + new_data_size = data_offset + n_data_bytes; + new_alloc_sz = old_alloc_sz = clib_mem_size (vec_header (v)); + + /* realloc if new size cannot fit into existing allocation */ + if (old_alloc_sz < new_data_size) { - /* new allocation */ - uword data_offset = hdr_sz + sizeof (vec_header_t); - data_offset += heap ? sizeof (void *) : 0; - data_offset = round_pow2 (data_offset, align); - - new_data_size = data_offset + n_data_bytes; - p = clib_mem_heap_alloc_aligned (heap, new_data_size, align); - alloc_size = clib_mem_size (p); - clib_mem_unpoison (p, alloc_size); - clib_memset_u8 (p, 0, alloc_size); + uword n_bytes, req_size = new_data_size; + void *p = v - data_offset; + + req_size += CLIB_VECTOR_GROW_BY_ONE ? 0 : n_data_bytes / 2; + + p = clib_mem_heap_realloc_aligned (vec_get_heap (v), p, req_size, + vec_get_align (v)); + new_alloc_sz = clib_mem_size (p); v = p + data_offset; - _vec_find (v)->hdr_size = data_offset / VEC_MIN_ALIGN; - _vec_find (v)->log2_align = min_log2 (align); - if (heap) + + /* zero out new allocation */ + n_bytes = new_alloc_sz - old_alloc_sz; + clib_mem_unpoison (p + old_alloc_sz, n_bytes); + clib_memset_u8 (p + old_alloc_sz, 0, n_bytes); + } + + _vec_update_len (v, n_elts, elt_sz, n_data_bytes, + new_alloc_sz - new_data_size); + return v; +} + +__clib_export void * +_vec_resize_internal (void *v, uword n_elts, const vec_attr_t *const attr) +{ + uword elt_sz = attr->elt_sz; + if (PREDICT_TRUE (v != 0)) + { + uword hs = _vec_find (v)->hdr_size * VEC_MIN_ALIGN; + uword alloc_sz = clib_mem_size (v - hs); + uword n_data_bytes = elt_sz * n_elts; + word unused_bytes = alloc_sz - (n_data_bytes + hs); + + if (PREDICT_TRUE (unused_bytes >= 0)) { - _vec_find (v)->default_heap = 0; - _vec_heap (v) = heap; + _vec_update_len (v, n_elts, elt_sz, n_data_bytes, unused_bytes); + return v; } - else - _vec_find (v)->default_heap = 1; } - clib_mem_poison (p + new_data_size, alloc_size - new_data_size); - _vec_find (v)->len = n_elts; - return v; + /* this shouled emit tail jump and likely avoid stack usasge inside this + * function */ + return _vec_realloc_internal (v, n_elts, attr); } __clib_export u32 diff --git a/src/vppinfra/vec.h b/src/vppinfra/vec.h index bfb7b1b7fc6..f45f45e3388 100644 --- a/src/vppinfra/vec.h +++ b/src/vppinfra/vec.h @@ -101,8 +101,20 @@ @param align alignment (may be zero) @return v_prime pointer to resized vector, may or may not equal v */ -void *_vec_realloc (void *v, uword n_elts, uword elt_sz, uword hdr_sz, - uword align, void *heap); + +typedef struct +{ + void *heap; + u32 elt_sz; + u16 hdr_sz; + u16 align; +} vec_attr_t; + +void *_vec_alloc_internal (uword n_elts, const vec_attr_t *const attr); +void *_vec_realloc_internal (void *v, uword n_elts, + const vec_attr_t *const attr); +void *_vec_resize_internal (void *v, uword n_elts, + const vec_attr_t *const attr); /* calculate minimum alignment out of data natural alignment and provided * value, should not be < VEC_MIN_ALIGN */ @@ -139,36 +151,24 @@ vec_get_heap (void *v) return _vec_heap (v); } -static_always_inline void * -_vec_realloc_inline (void *v, uword n_elts, uword elt_sz, uword hdr_sz, - uword align, void *heap) +static_always_inline uword +vec_get_align (void *v) { - if (PREDICT_TRUE (v != 0)) - { - /* Vector header must start heap object. */ - ASSERT (clib_mem_heap_is_heap_object (vec_get_heap (v), vec_header (v))); - - /* Typically we'll not need to resize. */ - if ((n_elts * elt_sz) <= vec_max_bytes (v)) - { - _vec_set_len (v, n_elts, elt_sz); - return v; - } - } - - /* Slow path: call helper function. */ - return _vec_realloc (v, n_elts, elt_sz, hdr_sz, align, heap); + return 1ULL << _vec_find (v)->log2_align; } static_always_inline void _vec_prealloc (void **vp, uword n_elts, uword hdr_sz, uword align, void *heap, uword elt_sz) { + const vec_attr_t va = { + .elt_sz = elt_sz, .hdr_sz = hdr_sz, .align = align, .heap = heap + }; void *v; ASSERT (vp[0] == 0); - v = _vec_realloc (0, n_elts, elt_sz, hdr_sz, align, heap); + v = _vec_alloc_internal (n_elts, &va); _vec_set_len (v, 0, elt_sz); _vec_update_pointer (vp, v); } @@ -247,9 +247,26 @@ _vec_resize_will_expand (void *v, uword n_elts, uword elt_sz) static_always_inline void _vec_resize (void **vp, uword n_add, uword hdr_sz, uword align, uword elt_sz) { - void *v = vp[0]; - v = _vec_realloc_inline (v, vec_len (v) + n_add, elt_sz, hdr_sz, align, 0); - _vec_update_pointer (vp, v); + void *v = *vp; + if (PREDICT_FALSE (v == 0)) + { + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = hdr_sz }; + *vp = _vec_alloc_internal (n_add, &va); + return; + } + + if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add)) + { + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = hdr_sz }; + v = _vec_resize_internal (v, _vec_len (v) + n_add, &va); + _vec_update_pointer (vp, v); + } + else + _vec_set_len (v, _vec_len (v) + n_add, elt_sz); } #define vec_resize_ha(V, N, H, A) \ @@ -324,7 +341,10 @@ _vec_resize (void **vp, uword n_add, uword hdr_sz, uword align, uword elt_sz) @return V new vector */ #define vec_new_generic(T, N, H, A, P) \ - _vec_realloc (0, N, sizeof (T), H, _vec_align ((T *) 0, A), P) + _vec_alloc_internal (N, &((vec_attr_t){ .align = _vec_align ((T *) 0, A), \ + .hdr_sz = (H), \ + .heap = (P), \ + .elt_sz = sizeof (T) })) /** \brief Create new vector of given type and length (unspecified alignment, no header). @@ -390,11 +410,12 @@ static_always_inline void * _vec_dup (void *v, uword hdr_size, uword align, uword elt_sz) { uword len = vec_len (v); + const vec_attr_t va = { .elt_sz = elt_sz, .align = align }; void *n = 0; if (len) { - n = _vec_realloc (0, len, elt_sz, hdr_size, align, 0); + n = _vec_alloc_internal (len, &va); clib_memcpy_fast (n, v, len * elt_sz); } return n; @@ -438,7 +459,8 @@ _vec_dup (void *v, uword hdr_size, uword align, uword elt_sz) static_always_inline void _vec_clone (void **v1p, void *v2, uword align, uword elt_sz) { - v1p[0] = _vec_realloc (0, vec_len (v2), elt_sz, 0, align, 0); + const vec_attr_t va = { .elt_sz = elt_sz, .align = align }; + v1p[0] = _vec_alloc_internal (vec_len (v2), &va); } #define vec_clone(NEW_V, OLD_V) \ _vec_clone ((void **) &(NEW_V), OLD_V, _vec_align (NEW_V, 0), \ @@ -464,14 +486,35 @@ static_always_inline void _vec_validate (void **vp, uword index, uword header_size, uword align, void *heap, uword elt_sz) { - void *v = vp[0]; - uword vl = vec_len (v); - if (index >= vl) + void *v = *vp; + uword vl, n_elts = index + 1; + + if (PREDICT_FALSE (v == 0)) + { + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = header_size }; + *vp = _vec_alloc_internal (n_elts, &va); + return; + } + + vl = _vec_len (v); + + if (PREDICT_FALSE (index < vl)) + return; + + if (PREDICT_FALSE (index >= _vec_find (v)->grow_elts + vl)) { - v = _vec_realloc_inline (v, index + 1, elt_sz, header_size, align, heap); - _vec_zero_elts (v, vl, index - vl + 1, elt_sz); + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = header_size }; + v = _vec_resize_internal (v, n_elts, &va); _vec_update_pointer (vp, v); } + else + _vec_set_len (v, n_elts, elt_sz); + + _vec_zero_elts (v, vl, n_elts - vl, elt_sz); } #define vec_validate_hap(V, I, H, A, P) \ @@ -572,10 +615,28 @@ static_always_inline void * _vec_add1 (void **vp, uword hdr_sz, uword align, uword elt_sz) { void *v = vp[0]; - uword len = vec_len (v); - v = _vec_realloc_inline (v, len + 1, elt_sz, hdr_sz, align, 0); + uword len; - _vec_update_pointer (vp, v); + if (PREDICT_FALSE (v == 0)) + { + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = hdr_sz }; + return *vp = _vec_alloc_internal (1, &va); + } + + len = _vec_len (v); + + if (PREDICT_FALSE (_vec_find (v)->grow_elts == 0)) + { + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = hdr_sz }; + v = _vec_resize_internal (v, len + 1, &va); + _vec_update_pointer (vp, v); + } + else + _vec_set_len (v, len + 1, elt_sz); return v + len * elt_sz; } @@ -616,11 +677,31 @@ static_always_inline void _vec_add2 (void **vp, void **pp, uword n_add, uword hdr_sz, uword align, uword elt_sz) { - void *v = vp[0]; - uword len = vec_len (vp[0]); - v = _vec_realloc_inline (v, len + n_add, elt_sz, hdr_sz, align, 0); - _vec_update_pointer (vp, v); - pp[0] = v + len * elt_sz; + void *v = *vp; + uword len; + + if (PREDICT_FALSE (v == 0)) + { + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = hdr_sz }; + *vp = *pp = _vec_alloc_internal (n_add, &va); + return; + } + + len = _vec_len (v); + if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add)) + { + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = hdr_sz }; + v = _vec_resize_internal (v, len + n_add, &va); + _vec_update_pointer (vp, v); + } + else + _vec_set_len (v, len + n_add, elt_sz); + + *pp = v + len * elt_sz; } #define vec_add2_ha(V, P, N, H, A) \ @@ -663,17 +744,38 @@ static_always_inline void _vec_add (void **vp, void *e, word n_add, uword hdr_sz, uword align, uword elt_sz) { - void *v = vp[0]; - uword len = vec_len (v); + void *v = *vp; + uword len; ASSERT (n_add >= 0); if (n_add < 1) return; - v = _vec_realloc_inline (v, len + n_add, elt_sz, hdr_sz, align, 0); + if (PREDICT_FALSE (v == 0)) + { + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = hdr_sz }; + *vp = v = _vec_alloc_internal (n_add, &va); + clib_memcpy_fast (v, e, n_add * elt_sz); + return; + } + + len = _vec_len (v); + + if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add)) + { + const vec_attr_t va = { .elt_sz = elt_sz, + .align = align, + .hdr_sz = hdr_sz }; + v = _vec_resize_internal (v, len + n_add, &va); + _vec_update_pointer (vp, v); + } + else + _vec_set_len (v, len + n_add, elt_sz); + clib_memcpy_fast (v + len * elt_sz, e, n_add * elt_sz); - _vec_update_pointer (vp, v); } #define vec_add_ha(V, E, N, H, A) \ @@ -747,11 +849,12 @@ _vec_insert (void **vp, uword n_insert, uword ins_pt, u8 init, uword hdr_sz, { void *v = vp[0]; uword len = vec_len (v); + const vec_attr_t va = { .elt_sz = elt_sz, .align = align, .hdr_sz = hdr_sz }; ASSERT (ins_pt <= len); - v = _vec_realloc_inline (v, len + n_insert, elt_sz, hdr_sz, align, 0); - clib_memmove (v + elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz, + v = _vec_resize_internal (v, len + n_insert, &va); + clib_memmove (v + va.elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz, (len - ins_pt) * elt_sz); _vec_zero_elts (v, ins_pt, n_insert, elt_sz); _vec_update_pointer (vp, v); @@ -839,10 +942,11 @@ _vec_insert_elts (void **vp, void *e, uword n_insert, uword ins_pt, { void *v = vp[0]; uword len = vec_len (v); + const vec_attr_t va = { .elt_sz = elt_sz, .align = align, .hdr_sz = hdr_sz }; ASSERT (ins_pt <= len); - v = _vec_realloc_inline (v, len + n_insert, elt_sz, hdr_sz, align, 0); + v = _vec_resize_internal (v, len + n_insert, &va); clib_memmove (v + elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz, (len - ins_pt) * elt_sz); _vec_zero_elts (v, ins_pt, n_insert, elt_sz); @@ -938,7 +1042,8 @@ _vec_append (void **v1p, void *v2, uword v1_elt_sz, uword v2_elt_sz, if (PREDICT_TRUE (len2 > 0)) { - v1 = _vec_realloc_inline (v1, len1 + len2, v2_elt_sz, 0, align, 0); + const vec_attr_t va = { .elt_sz = v2_elt_sz, .align = align }; + v1 = _vec_resize_internal (v1, len1 + len2, &va); clib_memcpy_fast (v1 + len1 * v1_elt_sz, v2, len2 * v2_elt_sz); _vec_update_pointer (v1p, v1); } @@ -971,7 +1076,8 @@ _vec_prepend (void **v1p, void *v2, uword v1_elt_sz, uword v2_elt_sz, if (PREDICT_TRUE (len2 > 0)) { - v1 = _vec_realloc_inline (v1, len1 + len2, v2_elt_sz, 0, align, 0); + const vec_attr_t va = { .elt_sz = v2_elt_sz, .align = align }; + v1 = _vec_resize_internal (v1, len1 + len2, &va); clib_memmove (v1 + len2 * v2_elt_sz, v1p[0], len1 * v1_elt_sz); clib_memcpy_fast (v1, v2, len2 * v2_elt_sz); _vec_update_pointer (v1p, v1); diff --git a/src/vppinfra/vec_bootstrap.h b/src/vppinfra/vec_bootstrap.h index a94c1a19ba7..567041550a4 100644 --- a/src/vppinfra/vec_bootstrap.h +++ b/src/vppinfra/vec_bootstrap.h @@ -58,7 +58,8 @@ typedef struct u8 hdr_size; /**< header size divided by VEC_MIN_ALIGN */ u8 log2_align : 7; /**< data alignment */ u8 default_heap : 1; /**< vector uses default heap */ - u8 vpad[2]; /**< pad to 8 bytes */ + u8 grow_elts; /**< number of elts vector can grow without realloc */ + u8 vpad[1]; /**< pad to 8 bytes */ u8 vector_data[0]; /**< Vector data . */ } vec_header_t; @@ -168,18 +169,31 @@ _vec_max_len (void *v, uword elt_sz) #define vec_max_len(v) _vec_max_len (v, _vec_elt_sz (v)) +static_always_inline void +_vec_set_grow_elts (void *v, uword n_elts) +{ + uword max = pow2_mask (BITS (_vec_find (0)->grow_elts)); + + if (PREDICT_FALSE (n_elts > max)) + n_elts = max; + + _vec_find (v)->grow_elts = n_elts; +} + always_inline void _vec_set_len (void *v, uword len, uword elt_sz) { ASSERT (v); ASSERT (len <= _vec_max_len (v, elt_sz)); uword old_len = _vec_len (v); + uword grow_elts = _vec_find (v)->grow_elts; if (len > old_len) clib_mem_unpoison (v + old_len * elt_sz, (len - old_len) * elt_sz); else if (len > old_len) clib_mem_poison (v + len * elt_sz, (old_len - len) * elt_sz); + _vec_set_grow_elts (v, old_len + grow_elts - len); _vec_find (v)->len = len; } -- cgit 1.2.3-korg