diff options
-rw-r--r-- | src/plugins/dpdk/buffer.c | 18 | ||||
-rw-r--r-- | src/vlib/buffer.c | 22 | ||||
-rw-r--r-- | src/vlib/buffer.h | 8 | ||||
-rw-r--r-- | src/vlib/buffer_funcs.h | 62 |
4 files changed, 64 insertions, 46 deletions
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c index a1c1ea1ca46..d7a79161386 100644 --- a/src/plugins/dpdk/buffer.c +++ b/src/plugins/dpdk/buffer.c @@ -40,7 +40,7 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp) struct rte_mempool *mp, *nmp; struct rte_pktmbuf_pool_private priv; enum rte_iova_mode iova_mode; - u32 *bi; + u32 i; u8 *name = 0; u32 elt_size = @@ -54,7 +54,7 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp) /* normal mempool */ name = format (name, "vpp pool %u%c", bp->index, 0); - mp = rte_mempool_create_empty ((char *) name, vec_len (bp->buffers), + mp = rte_mempool_create_empty ((char *) name, bp->n_buffers, elt_size, 512, sizeof (priv), bp->numa_node, 0); if (!mp) @@ -68,7 +68,7 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp) /* non-cached mempool */ name = format (name, "vpp pool %u (no cache)%c", bp->index, 0); - nmp = rte_mempool_create_empty ((char *) name, vec_len (bp->buffers), + nmp = rte_mempool_create_empty ((char *) name, bp->n_buffers, elt_size, 0, sizeof (priv), bp->numa_node, 0); if (!nmp) @@ -99,11 +99,10 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp) iova_mode = rte_eal_iova_mode (); /* populate mempool object buffer header */ - /* *INDENT-OFF* */ - vec_foreach (bi, bp->buffers) + for (i = 0; i < bp->n_buffers; i++) { struct rte_mempool_objhdr *hdr; - vlib_buffer_t *b = vlib_get_buffer (vm, *bi); + vlib_buffer_t *b = vlib_get_buffer (vm, bp->buffers[i]); struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer (b); hdr = (struct rte_mempool_objhdr *) RTE_PTR_SUB (mb, sizeof (*hdr)); hdr->mp = mp; @@ -114,7 +113,6 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp) mp->populated_size++; nmp->populated_size++; } - /* *INDENT-ON* */ /* call the object initializers */ rte_mempool_obj_iter (mp, rte_pktmbuf_init, 0); @@ -127,14 +125,12 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp) (buffer_mem_start, *bp->buffers, 0)), sizeof (struct rte_mbuf)); - /* *INDENT-OFF* */ - vec_foreach (bi, bp->buffers) + for (i = 0; i < bp->n_buffers; i++) { vlib_buffer_t *b; - b = vlib_buffer_ptr_from_index (buffer_mem_start, *bi, 0); + b = vlib_buffer_ptr_from_index (buffer_mem_start, bp->buffers[i], 0); vlib_buffer_copy_template (b, &bp->buffer_template); } - /* *INDENT-ON* */ /* map DMA pages if at least one physical device exists */ if (rte_eth_dev_count_avail ()) diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index c4c05bbbcaa..9838e23e0d5 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -552,9 +552,9 @@ vlib_buffer_pool_create (vlib_main_t * vm, char *name, u32 data_size, n_alloc_per_page = (1ULL << m->log2_page_size) / alloc_size; /* preallocate buffer indices memory */ - vec_validate_aligned (bp->buffers, m->n_pages * n_alloc_per_page, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (bp->buffers); + bp->n_buffers = m->n_pages * n_alloc_per_page; + bp->buffers = clib_mem_alloc_aligned (bp->n_buffers * sizeof (u32), + CLIB_CACHE_LINE_BYTES); clib_spinlock_init (&bp->lock); @@ -571,11 +571,11 @@ vlib_buffer_pool_create (vlib_main_t * vm, char *name, u32 data_size, bi = vlib_get_buffer_index (vm, (vlib_buffer_t *) p); - vec_add1_aligned (bp->buffers, bi, CLIB_CACHE_LINE_BYTES); + bp->buffers[bp->n_avail++] = bi; + vlib_get_buffer (vm, bi); } - bp->n_buffers = vec_len (bp->buffers); return bp->index; } @@ -594,14 +594,14 @@ format_vlib_buffer_pool (u8 * s, va_list * va) /* *INDENT-OFF* */ vec_foreach (bpt, bp->threads) - cached += vec_len (bpt->cached_buffers); + cached += bpt->n_cached; /* *INDENT-ON* */ s = format (s, "%-20s%=6d%=6d%=6u%=11u%=6u%=8u%=8u%=8u", bp->name, bp->index, bp->numa_node, bp->data_size + sizeof (vlib_buffer_t) + vm->buffer_main->ext_hdr_size, - bp->data_size, bp->n_buffers, vec_len (bp->buffers), cached, - bp->n_buffers - vec_len (bp->buffers) - cached); + bp->data_size, bp->n_buffers, bp->n_avail, cached, + bp->n_buffers - bp->n_avail - cached); return s; } @@ -736,7 +736,7 @@ buffer_get_cached (vlib_buffer_pool_t * bp) /* *INDENT-OFF* */ vec_foreach (bpt, bp->threads) - cached += vec_len (bpt->cached_buffers); + cached += bpt->n_cached; /* *INDENT-ON* */ clib_spinlock_unlock (&bp->lock); @@ -763,7 +763,7 @@ buffer_gauges_update_used_fn (stat_segment_directory_entry_t * e, u32 index) if (!bp) return; - e->value = bp->n_buffers - vec_len (bp->buffers) - buffer_get_cached (bp); + e->value = bp->n_buffers - bp->n_avail - buffer_get_cached (bp); } static void @@ -775,7 +775,7 @@ buffer_gauges_update_available_fn (stat_segment_directory_entry_t * e, if (!bp) return; - e->value = vec_len (bp->buffers); + e->value = bp->n_avail; } static void diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index c8761af4d20..c2ca8214162 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -411,12 +411,15 @@ vlib_buffer_pull (vlib_buffer_t * b, u8 size) /* Forward declaration. */ struct vlib_main_t; +#define VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ 512 + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u32 *cached_buffers; - u32 n_alloc; + u32 cached_buffers[VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ]; + u32 n_cached; } vlib_buffer_pool_thread_t; + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -428,6 +431,7 @@ typedef struct u32 physmem_map_index; u32 data_size; u32 n_buffers; + u32 n_avail; u32 *buffers; u8 *name; clib_spinlock_t lock; diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 7480326ee3d..2ba9f1cb894 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -491,19 +491,19 @@ vlib_buffer_pool_get (vlib_main_t * vm, u8 buffer_pool_index, u32 * buffers, ASSERT (bp->buffers); clib_spinlock_lock (&bp->lock); - len = vec_len (bp->buffers); + len = bp->n_avail; if (PREDICT_TRUE (n_buffers < len)) { len -= n_buffers; vlib_buffer_copy_indices (buffers, bp->buffers + len, n_buffers); - _vec_len (bp->buffers) = len; + bp->n_avail = len; clib_spinlock_unlock (&bp->lock); return n_buffers; } else { vlib_buffer_copy_indices (buffers, bp->buffers, len); - _vec_len (bp->buffers) = 0; + bp->n_avail = 0; clib_spinlock_unlock (&bp->lock); return len; } @@ -533,14 +533,26 @@ vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 n_buffers, dst = buffers; n_left = n_buffers; - len = vec_len (bpt->cached_buffers); + len = bpt->n_cached; /* per-thread cache contains enough buffers */ if (len >= n_buffers) { src = bpt->cached_buffers + len - n_buffers; vlib_buffer_copy_indices (dst, src, n_buffers); - _vec_len (bpt->cached_buffers) -= n_buffers; + bpt->n_cached -= n_buffers; + + if (CLIB_DEBUG > 0) + vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, + VLIB_BUFFER_KNOWN_FREE); + return n_buffers; + } + + /* alloc bigger than cache - take buffers directly from main pool */ + if (n_buffers >= VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ) + { + n_buffers = vlib_buffer_pool_get (vm, buffer_pool_index, buffers, + n_buffers); if (CLIB_DEBUG > 0) vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, @@ -552,23 +564,22 @@ vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 n_buffers, if (len) { vlib_buffer_copy_indices (dst, bpt->cached_buffers, len); - _vec_len (bpt->cached_buffers) = 0; + bpt->n_cached = 0; dst += len; n_left -= len; } len = round_pow2 (n_left, 32); - vec_validate_aligned (bpt->cached_buffers, len - 1, CLIB_CACHE_LINE_BYTES); len = vlib_buffer_pool_get (vm, buffer_pool_index, bpt->cached_buffers, len); - _vec_len (bpt->cached_buffers) = len; + bpt->n_cached = len; if (len) { u32 n_copy = clib_min (len, n_left); src = bpt->cached_buffers + len - n_copy; vlib_buffer_copy_indices (dst, src, n_copy); - _vec_len (bpt->cached_buffers) -= n_copy; + bpt->n_cached -= n_copy; n_left -= n_copy; } @@ -681,26 +692,33 @@ vlib_buffer_pool_put (vlib_main_t * vm, u8 buffer_pool_index, u32 * buffers, u32 n_buffers) { vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); - vlib_buffer_pool_thread_t *bpt = - vec_elt_at_index (bp->threads, vm->thread_index); + vlib_buffer_pool_thread_t *bpt = vec_elt_at_index (bp->threads, + vm->thread_index); + u32 n_cached, n_empty; if (CLIB_DEBUG > 0) vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, VLIB_BUFFER_KNOWN_ALLOCATED); - vec_add_aligned (bpt->cached_buffers, buffers, n_buffers, - CLIB_CACHE_LINE_BYTES); - - if (vec_len (bpt->cached_buffers) > 4 * VLIB_FRAME_SIZE) + n_cached = bpt->n_cached; + n_empty = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ - n_cached; + if (n_buffers <= n_empty) { - clib_spinlock_lock (&bp->lock); - /* keep last stored buffers, as they are more likely hot in the cache */ - vec_add_aligned (bp->buffers, bpt->cached_buffers, VLIB_FRAME_SIZE, - CLIB_CACHE_LINE_BYTES); - vec_delete (bpt->cached_buffers, VLIB_FRAME_SIZE, 0); - bpt->n_alloc -= VLIB_FRAME_SIZE; - clib_spinlock_unlock (&bp->lock); + vlib_buffer_copy_indices (bpt->cached_buffers + n_cached, + buffers, n_buffers); + bpt->n_cached = n_cached + n_buffers; + return; } + + vlib_buffer_copy_indices (bpt->cached_buffers + n_cached, + buffers + n_buffers - n_empty, n_empty); + bpt->n_cached = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ; + + clib_spinlock_lock (&bp->lock); + vlib_buffer_copy_indices (bp->buffers + bp->n_avail, buffers, + n_buffers - n_empty); + bp->n_avail += n_buffers - n_empty; + clib_spinlock_unlock (&bp->lock); } static_always_inline void |