From 97f5af01808b1987df66d0f1c7a48bb413a4ef48 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Thu, 22 Feb 2018 09:48:45 -0500 Subject: bihash table size perf/scale improvements Directly allocate and carve cache-line-aligned chunks of virtual memory. To a first approximation, bihash wasn't using clib_mem_free(...). We eliminate mheap object header/trailers, which improves space efficiency. We also eliminate the 4gb bihash table size limit. An 8_8 bihash w/ 100 million random entries uses 3.8 Gbytes. Change-Id: Icf925fdf99bce7d6ac407ac4edd30560b8f04808 Signed-off-by: Dave Barach --- src/vppinfra/bihash_template.c | 76 ++++++++++++++++++++++--------------- src/vppinfra/bihash_template.h | 14 +++++-- src/vppinfra/test_bihash_template.c | 24 ++++++++---- 3 files changed, 73 insertions(+), 41 deletions(-) (limited to 'src/vppinfra') diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c index 2b40af31d6f..89ae847c036 100644 --- a/src/vppinfra/bihash_template.c +++ b/src/vppinfra/bihash_template.c @@ -15,10 +15,28 @@ /** @cond DOCUMENTATION_IS_IN_BIHASH_DOC_H */ +static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes) +{ + uword rv; + + /* Round to an even number of cache lines */ + nbytes += CLIB_CACHE_LINE_BYTES - 1; + nbytes &= ~(CLIB_CACHE_LINE_BYTES - 1); + + rv = h->alloc_arena_next; + h->alloc_arena_next += nbytes; + + if (rv >= (h->alloc_arena + h->alloc_arena_size)) + os_out_of_memory (); + + return (void *) rv; +} + + void BV (clib_bihash_init) (BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size) { - void *oldheap; + uword bucket_size; int i; nbuckets = 1 << (max_log2 (nbuckets)); @@ -29,19 +47,19 @@ void BV (clib_bihash_init) h->cache_hits = 0; h->cache_misses = 0; - h->mheap = mheap_alloc (0 /* use VM */ , memory_size); + h->alloc_arena = (uword) clib_mem_vm_alloc (memory_size); + h->alloc_arena_next = h->alloc_arena; + h->alloc_arena_size = memory_size; - oldheap = clib_mem_set_heap (h->mheap); - vec_validate_aligned (h->buckets, nbuckets - 1, CLIB_CACHE_LINE_BYTES); - h->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); + bucket_size = nbuckets * sizeof (h->buckets[0]); + h->buckets = BV (alloc_aligned) (h, bucket_size); + + h->writer_lock = BV (alloc_aligned) (h, CLIB_CACHE_LINE_BYTES); h->writer_lock[0] = 0; for (i = 0; i < nbuckets; i++) BV (clib_bihash_reset_cache) (h->buckets + i); - clib_mem_set_heap (oldheap); - h->fmt_fn = NULL; } @@ -53,7 +71,9 @@ void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h, void BV (clib_bihash_free) (BVT (clib_bihash) * h) { - mheap_free (h->mheap); + vec_free (h->working_copies); + vec_free (h->freelists); + clib_mem_vm_free ((void *) (h->alloc_arena), h->alloc_arena_size); memset (h, 0, sizeof (*h)); } @@ -62,17 +82,12 @@ BVT (clib_bihash_value) * BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages) { BVT (clib_bihash_value) * rv = 0; - void *oldheap; ASSERT (h->writer_lock[0]); if (log2_pages >= vec_len (h->freelists) || h->freelists[log2_pages] == 0) { - oldheap = clib_mem_set_heap (h->mheap); - - vec_validate (h->freelists, log2_pages); - rv = clib_mem_alloc_aligned ((sizeof (*rv) * (1 << log2_pages)), - CLIB_CACHE_LINE_BYTES); - clib_mem_set_heap (oldheap); + vec_validate_init_empty (h->freelists, log2_pages, 0); + rv = BV (alloc_aligned) (h, (sizeof (*rv) * (1 << log2_pages))); goto initialize; } rv = h->freelists[log2_pages]; @@ -106,17 +121,14 @@ BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b) { BVT (clib_bihash_value) * v; BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8))); - void *oldheap; BVT (clib_bihash_value) * working_copy; u32 thread_index = os_get_thread_index (); int log2_working_copy_length; if (thread_index >= vec_len (h->working_copies)) { - oldheap = clib_mem_set_heap (h->mheap); vec_validate (h->working_copies, thread_index); vec_validate_init_empty (h->working_copy_lengths, thread_index, ~0); - clib_mem_set_heap (oldheap); } /* @@ -128,22 +140,20 @@ BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b) log2_working_copy_length = h->working_copy_lengths[thread_index]; h->saved_bucket.as_u64 = b->as_u64; - oldheap = clib_mem_set_heap (h->mheap); if (b->log2_pages > log2_working_copy_length) { - if (working_copy) - clib_mem_free (working_copy); - - working_copy = clib_mem_alloc_aligned - (sizeof (working_copy[0]) * (1 << b->log2_pages), - CLIB_CACHE_LINE_BYTES); + /* + * It's not worth the bookkeeping to free working copies + * if (working_copy) + * clib_mem_free (working_copy); + */ + working_copy = BV (alloc_aligned) + (h, sizeof (working_copy[0]) * (1 << b->log2_pages)); h->working_copy_lengths[thread_index] = b->log2_pages; h->working_copies[thread_index] = working_copy; } - clib_mem_set_heap (oldheap); - /* Lock the bucket... */ while (BV (clib_bihash_lock_bucket) (b) == 0) ; @@ -554,6 +564,7 @@ u8 *BV (format_bihash) (u8 * s, va_list * args) u64 active_elements = 0; u64 active_buckets = 0; u64 linear_buckets = 0; + u64 used_bytes; s = format (s, "Hash table %s\n", h->name ? h->name : (u8 *) "(unnamed)"); @@ -633,8 +644,13 @@ u8 *BV (format_bihash) (u8 * s, va_list * args) s = format (s, " %lld linear search buckets\n", linear_buckets); s = format (s, " %lld cache hits, %lld cache misses\n", h->cache_hits, h->cache_misses); - if (h->mheap) - s = format (s, " mheap: %U", format_mheap, h->mheap, 0 /* verbose */ ); + used_bytes = h->alloc_arena_next - h->alloc_arena; + s = format (s, + " arena: base %llx, next %llx\n" + " used %lld b (%lld Mbytes) of %lld b (%lld Mbytes)\n", + h->alloc_arena, h->alloc_arena_next, + used_bytes, used_bytes >> 20, + h->alloc_arena_size, h->alloc_arena_size >> 20); return s; } diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h index 4e5d995cd9f..81d9ffad41e 100644 --- a/src/vppinfra/bihash_template.h +++ b/src/vppinfra/bihash_template.h @@ -89,7 +89,14 @@ typedef struct u64 cache_misses; BVT (clib_bihash_value) ** freelists; - void *mheap; + + /* + * Backing store allocation. Since bihash mananges its own + * freelists, we simple dole out memory at alloc_arena_next. + */ + uword alloc_arena; + uword alloc_arena_next; + uword alloc_arena_size; /** * A custom format function to print the Key and Value of bihash_key instead of default hexdump @@ -224,7 +231,7 @@ static inline void BV (clib_bihash_unlock_bucket) static inline void *BV (clib_bihash_get_value) (BVT (clib_bihash) * h, uword offset) { - u8 *hp = h->mheap; + u8 *hp = (u8 *) h->alloc_arena; u8 *vp = hp + offset; return (void *) vp; @@ -235,10 +242,9 @@ static inline uword BV (clib_bihash_get_offset) (BVT (clib_bihash) * h, { u8 *hp, *vp; - hp = (u8 *) h->mheap; + hp = (u8 *) h->alloc_arena; vp = (u8 *) v; - ASSERT ((vp - hp) < 0x100000000ULL); return vp - hp; } diff --git a/src/vppinfra/test_bihash_template.c b/src/vppinfra/test_bihash_template.c index 2d4b553d259..bdcf2cd6a81 100644 --- a/src/vppinfra/test_bihash_template.c +++ b/src/vppinfra/test_bihash_template.c @@ -36,6 +36,7 @@ typedef struct int non_random_keys; uword *key_hash; u64 *keys; + uword hash_memory_size; BVT (clib_bihash) hash; clib_time_t clib_time; @@ -101,8 +102,7 @@ test_bihash (test_main_t * tm) h = &tm->hash; - BV (clib_bihash_init) (h, "test", tm->nbuckets, 3ULL << 30); - + BV (clib_bihash_init) (h, "test", tm->nbuckets, tm->hash_memory_size); for (acycle = 0; acycle < tm->ncycles; acycle++) { @@ -269,10 +269,11 @@ test_bihash (test_main_t * tm) } /* Clean up side-bet hash table and random key vector */ - for (i = 0; i < tm->nitems; i++) - hash_unset (tm->key_hash, tm->keys[i]); - + hash_free (tm->key_hash); vec_reset_length (tm->keys); + /* Recreate hash table if we're going to need it again */ + if (acycle != (tm->ncycles - 1)) + tm->key_hash = hash_create (tm->nitems, sizeof (uword)); } fformat (stdout, "End of run, should be empty...\n"); @@ -322,6 +323,7 @@ test_bihash_main (test_main_t * tm) int which = 0; tm->report_every_n = 1; + tm->hash_memory_size = 4095ULL << 20; while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { @@ -344,6 +346,9 @@ test_bihash_main (test_main_t * tm) ; else if (unformat (i, "report-every %d", &tm->report_every_n)) ; + else if (unformat (i, "memory-size %U", + unformat_memory_size, &tm->hash_memory_size)) + ; else if (unformat (i, "vec64")) which = 1; else if (unformat (i, "cache")) @@ -356,6 +361,12 @@ test_bihash_main (test_main_t * tm) format_unformat_error, i); } + /* Preallocate hash table, key vector */ + tm->key_hash = hash_create (tm->nitems, sizeof (uword)); + vec_validate (tm->keys, tm->nitems - 1); + _vec_len (tm->keys) = 0; + + switch (which) { case 0: @@ -385,7 +396,7 @@ main (int argc, char *argv[]) clib_error_t *error; test_main_t *tm = &test_main; - clib_mem_init (0, 3ULL << 30); + clib_mem_init (0, 4095ULL << 20); tm->input = &i; tm->seed = 0xdeaddabe; @@ -396,7 +407,6 @@ main (int argc, char *argv[]) tm->verbose = 1; tm->search_iter = 1; tm->careful_delete_tests = 0; - tm->key_hash = hash_create (0, sizeof (uword)); clib_time_init (&tm->clib_time); unformat_init_command_line (&i, argv); -- cgit 1.2.3-korg