diff options
author | Dave Barach <dave@barachs.net> | 2020-04-16 12:00:14 -0400 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2020-04-21 10:26:14 +0000 |
commit | 16e4a4a0ae39ebc1ded1b6dba2799b176aee1828 (patch) | |
tree | 14e21d5be2bb77b9301b5cb56118e3e9d8293811 | |
parent | b9753540d2a69bbab807653fc3d0c1b43ec4d6d5 (diff) |
vppinfra: bihash improvements
Template instances can allocate BIHASH_KVP_PER_PAGE data records
tangent to the bucket, to remove a dependent read / prefetch.
Template instances can ask for immediate memory allocation, to avoid
several branches in the lookup path.
Clean up l2 fib, gpb plugin codes: use clib_bihash_get_bucket(...)
Use hugepages for bihash allocation arenas
Type: improvement
Signed-off-by: Dave Barach <dave@barachs.net>
Signed-off-by: Damjan Marion <damarion@cisco.com>
Change-Id: I92fc11bc58e48d84e2d61f44580916dd1c56361c
-rw-r--r-- | src/plugins/gbp/gbp_endpoint.c | 12 | ||||
-rw-r--r-- | src/vnet/l2/l2_fib.c | 13 | ||||
-rw-r--r-- | src/vppinfra/bihash_16_8.h | 6 | ||||
-rw-r--r-- | src/vppinfra/bihash_16_8_32.h | 7 | ||||
-rw-r--r-- | src/vppinfra/bihash_24_8.h | 6 | ||||
-rw-r--r-- | src/vppinfra/bihash_40_8.h | 6 | ||||
-rw-r--r-- | src/vppinfra/bihash_48_8.h | 6 | ||||
-rw-r--r-- | src/vppinfra/bihash_8_8.h | 8 | ||||
-rw-r--r-- | src/vppinfra/bihash_8_8_stats.h | 6 | ||||
-rw-r--r-- | src/vppinfra/bihash_doc.h | 1 | ||||
-rw-r--r-- | src/vppinfra/bihash_template.c | 129 | ||||
-rw-r--r-- | src/vppinfra/bihash_template.h | 53 | ||||
-rw-r--r-- | src/vppinfra/bihash_vec8_8.h | 6 | ||||
-rw-r--r-- | src/vppinfra/linux/mem.c | 32 | ||||
-rw-r--r-- | src/vppinfra/mem.h | 1 | ||||
-rw-r--r-- | src/vppinfra/pmalloc.c | 29 | ||||
-rw-r--r-- | src/vppinfra/test_bihash_template.c | 26 |
17 files changed, 285 insertions, 62 deletions
diff --git a/src/plugins/gbp/gbp_endpoint.c b/src/plugins/gbp/gbp_endpoint.c index 9ef08904041..e1a810cf222 100644 --- a/src/plugins/gbp/gbp_endpoint.c +++ b/src/plugins/gbp/gbp_endpoint.c @@ -1398,8 +1398,8 @@ gbp_endpoint_scan_l2 (vlib_main_t * vm) last_start = vlib_time_now (vm); } - b = >e_table->buckets[i]; - if (b->offset == 0) + b = clib_bihash_get_bucket_16_8 (gte_table, i); + if (clib_bihash_bucket_is_empty_16_8 (b)) continue; v = clib_bihash_get_value_16_8 (gte_table, b->offset); @@ -1416,7 +1416,7 @@ gbp_endpoint_scan_l2 (vlib_main_t * vm) * Note: we may have just freed the bucket's backing * storage, so check right here... */ - if (b->offset == 0) + if (clib_bihash_bucket_is_empty_16_8 (b)) goto doublebreak; } v++; @@ -1453,8 +1453,8 @@ gbp_endpoint_scan_l3 (vlib_main_t * vm) last_start = vlib_time_now (vm); } - b = >e_table->buckets[i]; - if (b->offset == 0) + b = clib_bihash_get_bucket_24_8 (gte_table, i); + if (clib_bihash_bucket_is_empty_24_8 (b)) continue; v = clib_bihash_get_value_24_8 (gte_table, b->offset); @@ -1471,7 +1471,7 @@ gbp_endpoint_scan_l3 (vlib_main_t * vm) * Note: we may have just freed the bucket's backing * storage, so check right here... */ - if (b->offset == 0) + if (clib_bihash_bucket_is_empty_24_8 (b)) goto doublebreak; } v++; diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 160e4e64048..983e0218bfc 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -1028,10 +1028,11 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) if (i < (h->nbuckets - 3)) { - BVT (clib_bihash_bucket) * b = &h->buckets[i + 3]; + BVT (clib_bihash_bucket) * b = + BV (clib_bihash_get_bucket) (h, i + 3); CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); - b = &h->buckets[i + 1]; - if (b->offset) + b = BV (clib_bihash_get_bucket) (h, i + 1); + if (!BV (clib_bihash_bucket_is_empty) (b)) { BVT (clib_bihash_value) * v = BV (clib_bihash_get_value) (h, b->offset); @@ -1039,8 +1040,8 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) } } - BVT (clib_bihash_bucket) * b = &h->buckets[i]; - if (b->offset == 0) + BVT (clib_bihash_bucket) * b = BV (clib_bihash_get_bucket) (h, i); + if (BV (clib_bihash_bucket_is_empty) (b)) continue; BVT (clib_bihash_value) * v = BV (clib_bihash_get_value) (h, b->offset); for (j = 0; j < (1 << b->log2_pages); j++) @@ -1146,7 +1147,7 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) * Note: we may have just freed the bucket's backing * storage, so check right here... */ - if (b->offset == 0) + if (BV (clib_bihash_bucket_is_empty) (b)) goto doublebreak; } v++; diff --git a/src/vppinfra/bihash_16_8.h b/src/vppinfra/bihash_16_8.h index b6b0766b8ee..1815a526ce6 100644 --- a/src/vppinfra/bihash_16_8.h +++ b/src/vppinfra/bihash_16_8.h @@ -16,9 +16,15 @@ #undef BIHASH_KVP_PER_PAGE #undef BIHASH_32_64_SVM #undef BIHASH_ENABLE_STATS +#undef BIHASH_KVP_AT_BUCKET_LEVEL +#undef BIHASH_LAZY_INSTANTIATE +#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES #define BIHASH_TYPE _16_8 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_AT_BUCKET_LEVEL 1 +#define BIHASH_LAZY_INSTANTIATE 0 +#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 2 #ifndef __included_bihash_16_8_h__ #define __included_bihash_16_8_h__ diff --git a/src/vppinfra/bihash_16_8_32.h b/src/vppinfra/bihash_16_8_32.h index e66954f4838..9453f88ace7 100644 --- a/src/vppinfra/bihash_16_8_32.h +++ b/src/vppinfra/bihash_16_8_32.h @@ -16,10 +16,15 @@ #undef BIHASH_KVP_PER_PAGE #undef BIHASH_32_64_SVM #undef BIHASH_ENABLE_STATS - +#undef BIHASH_KVP_AT_BUCKET_LEVEL +#undef BIHASH_LAZY_INSTANTIATE +#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES #define BIHASH_TYPE _16_8_32 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_AT_BUCKET_LEVEL 0 +#define BIHASH_LAZY_INSTANTIATE 1 +#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 1 #define BIHASH_32_64_SVM 1 diff --git a/src/vppinfra/bihash_24_8.h b/src/vppinfra/bihash_24_8.h index 463521d8126..33199bfcef4 100644 --- a/src/vppinfra/bihash_24_8.h +++ b/src/vppinfra/bihash_24_8.h @@ -16,9 +16,15 @@ #undef BIHASH_KVP_PER_PAGE #undef BIHASH_32_64_SVM #undef BIHASH_ENABLE_STATS +#undef BIHASH_KVP_AT_BUCKET_LEVEL +#undef BIHASH_LAZY_INSTANTIATE +#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES #define BIHASH_TYPE _24_8 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_AT_BUCKET_LEVEL 0 +#define BIHASH_LAZY_INSTANTIATE 1 +#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 1 #ifndef __included_bihash_24_8_h__ #define __included_bihash_24_8_h__ diff --git a/src/vppinfra/bihash_40_8.h b/src/vppinfra/bihash_40_8.h index b50e5eb5ac4..9cd2371afa7 100644 --- a/src/vppinfra/bihash_40_8.h +++ b/src/vppinfra/bihash_40_8.h @@ -17,9 +17,15 @@ #undef BIHASH_KVP_PER_PAGE #undef BIHASH_32_64_SVM #undef BIHASH_ENABLE_STATS +#undef BIHASH_KVP_AT_BUCKET_LEVEL +#undef BIHASH_LAZY_INSTANTIATE +#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES #define BIHASH_TYPE _40_8 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_AT_BUCKET_LEVEL 0 +#define BIHASH_LAZY_INSTANTIATE 1 +#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 1 #ifndef __included_bihash_40_8_h__ #define __included_bihash_40_8_h__ diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h index 2a6381faa61..54fd7090e81 100644 --- a/src/vppinfra/bihash_48_8.h +++ b/src/vppinfra/bihash_48_8.h @@ -17,9 +17,15 @@ #undef BIHASH_KVP_PER_PAGE #undef BIHASH_32_64_SVM #undef BIHASH_ENABLE_STATS +#undef BIHASH_KVP_AT_BUCKET_LEVEL +#undef BIHASH_LAZY_INSTANTIATE +#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES #define BIHASH_TYPE _48_8 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_AT_BUCKET_LEVEL 0 +#define BIHASH_LAZY_INSTANTIATE 1 +#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 1 #ifndef __included_bihash_48_8_h__ #define __included_bihash_48_8_h__ diff --git a/src/vppinfra/bihash_8_8.h b/src/vppinfra/bihash_8_8.h index a4a18a1d7eb..234890752a5 100644 --- a/src/vppinfra/bihash_8_8.h +++ b/src/vppinfra/bihash_8_8.h @@ -16,9 +16,15 @@ #undef BIHASH_KVP_PER_PAGE #undef BIHASH_32_64_SVM #undef BIHASH_ENABLE_STATS +#undef BIHASH_KVP_AT_BUCKET_LEVEL +#undef BIHASH_LAZY_INSTANTIATE +#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES #define BIHASH_TYPE _8_8 -#define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_PER_PAGE 7 +#define BIHASH_KVP_AT_BUCKET_LEVEL 1 +#define BIHASH_LAZY_INSTANTIATE 0 +#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 2 #ifndef __included_bihash_8_8_h__ #define __included_bihash_8_8_h__ diff --git a/src/vppinfra/bihash_8_8_stats.h b/src/vppinfra/bihash_8_8_stats.h index a6c947ab5c4..5aceb1bdc81 100644 --- a/src/vppinfra/bihash_8_8_stats.h +++ b/src/vppinfra/bihash_8_8_stats.h @@ -16,10 +16,16 @@ #undef BIHASH_KVP_PER_PAGE #undef BIHASH_32_64_SVM #undef BIHASH_ENABLE_STATS +#undef BIHASH_KVP_AT_BUCKET_LEVEL +#undef BIHASH_LAZY_INSTANTIATE +#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES #define BIHASH_TYPE _8_8_stats #define BIHASH_KVP_PER_PAGE 4 #define BIHASH_ENABLE_STATS 1 +#define BIHASH_KVP_AT_BUCKET_LEVEL 0 +#define BIHASH_LAZY_INSTANTIATE 1 +#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 1 #ifndef __included_bihash_8_8_stats_h__ #define __included_bihash_8_8__stats_h__ diff --git a/src/vppinfra/bihash_doc.h b/src/vppinfra/bihash_doc.h index da8c832f160..b4b6a4a969b 100644 --- a/src/vppinfra/bihash_doc.h +++ b/src/vppinfra/bihash_doc.h @@ -81,6 +81,7 @@ typedef struct uword alloc_arena; /**< memory allocation arena */ uword alloc_arena_next; /**< first available mem chunk */ uword alloc_arena_size; /**< size of the arena */ + uword alloc_arena_mapped; /**< size of mapped memory in the arena */ } clib_bihash_t; /** Get pointer to value page given its clib mheap offset */ diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c index 471251d04d1..89bfc8b6b56 100644 --- a/src/vppinfra/bihash_template.c +++ b/src/vppinfra/bihash_template.c @@ -15,6 +15,10 @@ /** @cond DOCUMENTATION_IS_IN_BIHASH_DOC_H */ +#ifndef MAP_HUGE_SHIFT +#define MAP_HUGE_SHIFT 26 +#endif + static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes) { uword rv; @@ -29,6 +33,35 @@ static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes) if (alloc_arena_next (h) > alloc_arena_size (h)) os_out_of_memory (); + if (alloc_arena_next (h) > alloc_arena_mapped (h)) + { + void *base, *rv; + uword alloc = alloc_arena_next (h) - alloc_arena_mapped (h); + int mmap_flags = MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS; + int mmap_flags_huge = (mmap_flags | MAP_HUGETLB | + BIHASH_LOG2_HUGEPAGE_SIZE << MAP_HUGE_SHIFT); + + /* new allocation is 25% of existing one */ + if (alloc_arena_mapped (h) >> 2 > alloc) + alloc = alloc_arena_mapped (h) >> 2; + + /* round allocation to page size */ + alloc = round_pow2 (alloc, 1 << BIHASH_LOG2_HUGEPAGE_SIZE); + + base = (void *) (uword) (alloc_arena (h) + alloc_arena_mapped (h)); + + rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags_huge, -1, 0); + + /* fallback - maybe we are still able to allocate normal pages */ + if (rv == MAP_FAILED) + rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + + if (rv == MAP_FAILED) + os_out_of_memory (); + + alloc_arena_mapped (h) += alloc; + } + return (void *) (uword) (rv + alloc_arena (h)); } @@ -36,12 +69,43 @@ static void BV (clib_bihash_instantiate) (BVT (clib_bihash) * h) { uword bucket_size; - alloc_arena (h) = (uword) clib_mem_vm_alloc (h->memory_size); + alloc_arena (h) = clib_mem_vm_reserve (0, h->memory_size, + BIHASH_LOG2_HUGEPAGE_SIZE); + if (alloc_arena (h) == ~0) + os_out_of_memory (); alloc_arena_next (h) = 0; alloc_arena_size (h) = h->memory_size; + alloc_arena_mapped (h) = 0; bucket_size = h->nbuckets * sizeof (h->buckets[0]); + + if (BIHASH_KVP_AT_BUCKET_LEVEL) + bucket_size += + h->nbuckets * BIHASH_KVP_PER_PAGE * sizeof (BVT (clib_bihash_kv)); + h->buckets = BV (alloc_aligned) (h, bucket_size); + + if (BIHASH_KVP_AT_BUCKET_LEVEL) + { + int i; + BVT (clib_bihash_bucket) * b; + + b = h->buckets; + + for (i = 0; i < h->nbuckets; i++) + { + b->offset = BV (clib_bihash_get_offset) (h, (void *) (b + 1)); + b->refcnt = 1; + /* Mark all elements free */ + clib_memset ((b + 1), 0xff, + BIHASH_KVP_PER_PAGE * sizeof (BVT (clib_bihash_kv))); + + /* Compute next bucket start address */ + b = (void *) (((uword) b) + sizeof (*b) + + (BIHASH_KVP_PER_PAGE * + sizeof (BVT (clib_bihash_kv)))); + } + } CLIB_MEMORY_BARRIER (); h->instantiated = 1; } @@ -94,7 +158,9 @@ do_lock: CLIB_CACHE_LINE_BYTES); h->alloc_lock[0] = 0; +#if BIHASH_LAZY_INSTANTIATE if (a->instantiate_immediately) +#endif BV (clib_bihash_instantiate) (h); } @@ -505,7 +571,7 @@ static inline int BV (clib_bihash_add_del_inline) BV (clib_bihash_lock_bucket) (b); /* First elt in the bucket? */ - if (BV (clib_bihash_bucket_is_empty) (b)) + if (BIHASH_KVP_AT_BUCKET_LEVEL == 0 && BV (clib_bihash_bucket_is_empty) (b)) { if (is_add == 0) { @@ -620,6 +686,24 @@ static inline int BV (clib_bihash_add_del_inline) if (PREDICT_TRUE (b->refcnt > 1)) { b->refcnt--; + /* Switch back to the bucket-level kvp array? */ + if (BIHASH_KVP_AT_BUCKET_LEVEL && b->refcnt == 1 + && b->log2_pages > 0) + { + tmp_b.as_u64 = b->as_u64; + b->offset = BV (clib_bihash_get_offset) + (h, (void *) (b + 1)); + b->linear_search = 0; + b->log2_pages = 0; + /* Clean up the bucket-level kvp array */ + clib_memset + ((b + 1), 0xff, + BIHASH_KVP_PER_PAGE * sizeof (BVT (clib_bihash_kv))); + BV (clib_bihash_unlock_bucket) (b); + BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del, 1); + goto free_backing_store; + } + BV (clib_bihash_unlock_bucket) (b); BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del, 1); return (0); @@ -633,6 +717,7 @@ static inline int BV (clib_bihash_add_del_inline) /* Kill and unlock the bucket */ b->as_u64 = 0; + free_backing_store: /* And free the backing storage */ BV (clib_bihash_alloc_lock) (h); /* Note: v currently points into the middle of the bucket */ @@ -726,14 +811,30 @@ expand_ok: tmp_b.log2_pages = new_log2_pages; tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v); tmp_b.linear_search = mark_bucket_linear; - tmp_b.refcnt = h->saved_bucket.refcnt + 1; +#if BIHASH_KVP_AT_BUCKET_LEVEL + /* Compensate for permanent refcount bump at the bucket level */ + if (new_log2_pages > 0) +#endif + tmp_b.refcnt = h->saved_bucket.refcnt + 1; ASSERT (tmp_b.refcnt > 0); tmp_b.lock = 0; CLIB_MEMORY_BARRIER (); b->as_u64 = tmp_b.as_u64; - /* free the old bucket */ - v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset); - BV (value_free) (h, v, h->saved_bucket.log2_pages); + +#if BIHASH_KVP_AT_BUCKET_LEVEL + if (h->saved_bucket.log2_pages > 0) + { +#endif + + /* free the old bucket, except at the bucket level if so configured */ + v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset); + BV (value_free) (h, v, h->saved_bucket.log2_pages); + +#if BIHASH_KVP_AT_BUCKET_LEVEL + } +#endif + + BV (clib_bihash_alloc_unlock) (h); return (0); } @@ -762,8 +863,10 @@ int BV (clib_bihash_search) ASSERT (valuep); +#if BIHASH_LAZY_INSTANTIATE if (PREDICT_FALSE (alloc_arena (h) == 0)) return -1; +#endif hash = BV (clib_bihash_hash) (search_key); @@ -812,12 +915,14 @@ u8 *BV (format_bihash) (u8 * s, va_list * args) s = format (s, "Hash table %s\n", h->name ? h->name : (u8 *) "(unnamed)"); +#if BIHASH_LAZY_INSTANTIATE if (PREDICT_FALSE (alloc_arena (h) == 0)) return format (s, "[empty, uninitialized]"); +#endif for (i = 0; i < h->nbuckets; i++) { - b = &h->buckets[i]; + b = BV (clib_bihash_get_bucket) (h, i); if (BV (clib_bihash_bucket_is_empty) (b)) { if (verbose > 1) @@ -832,8 +937,9 @@ u8 *BV (format_bihash) (u8 * s, va_list * args) if (verbose) { - s = format (s, "[%d]: heap offset %lld, len %d, linear %d\n", i, - b->offset, (1 << b->log2_pages), b->linear_search); + s = format + (s, "[%d]: heap offset %lld, len %d, refcnt %d, linear %d\n", i, + b->offset, (1 << b->log2_pages), b->refcnt, b->linear_search); } v = BV (clib_bihash_get_value) (h, b->offset); @@ -909,12 +1015,15 @@ void BV (clib_bihash_foreach_key_value_pair) BVT (clib_bihash_bucket) * b; BVT (clib_bihash_value) * v; + +#if BIHASH_LAZY_INSTANTIATE if (PREDICT_FALSE (alloc_arena (h) == 0)) return; +#endif for (i = 0; i < h->nbuckets; i++) { - b = &h->buckets[i]; + b = BV (clib_bihash_get_bucket) (h, i); if (BV (clib_bihash_bucket_is_empty) (b)) continue; diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h index 6abe7a36b27..13a348fbcf4 100644 --- a/src/vppinfra/bihash_template.h +++ b/src/vppinfra/bihash_template.h @@ -44,6 +44,11 @@ #define BIHASH_FREELIST_LENGTH 17 #endif +/* default is 2MB, use 30 for 1GB */ +#ifndef BIHASH_LOG2_HUGEPAGE_SIZE +#define BIHASH_LOG2_HUGEPAGE_SIZE 21 +#endif + #define _bv(a,b) a##b #define __bv(a,b) _bv(a,b) #define BV(a) __bv(a,BIHASH_TYPE) @@ -103,6 +108,7 @@ typedef CLIB_PACKED (struct { */ u64 alloc_arena_next; /* Next offset from alloc_arena to allocate, definitely NOT a constant */ u64 alloc_arena_size; /* Size of the arena */ + u64 alloc_arena_mapped; /* Size of the mapped memory in the arena */ /* Two SVM pointers stored as 8-byte integers */ u64 alloc_lock_as_u64; u64 buckets_as_u64; @@ -111,7 +117,7 @@ typedef CLIB_PACKED (struct { u32 nbuckets; /* Number of buckets */ /* Set when header valid */ volatile u32 ready; - u64 pad[2]; + u64 pad[1]; }) BVT (clib_bihash_shared_header); /* *INDENT-ON* */ @@ -175,19 +181,23 @@ extern void **clib_all_bihashes; #if BIHASH_32_64_SVM #undef alloc_arena_next #undef alloc_arena_size +#undef alloc_arena_mapped #undef alloc_arena #undef CLIB_BIHASH_READY_MAGIC #define alloc_arena_next(h) (((h)->sh)->alloc_arena_next) #define alloc_arena_size(h) (((h)->sh)->alloc_arena_size) +#define alloc_arena_mapped(h) (((h)->sh)->alloc_arena_mapped) #define alloc_arena(h) ((h)->alloc_arena) #define CLIB_BIHASH_READY_MAGIC 0xFEEDFACE #else #undef alloc_arena_next #undef alloc_arena_size +#undef alloc_arena_mapped #undef alloc_arena #undef CLIB_BIHASH_READY_MAGIC #define alloc_arena_next(h) ((h)->sh.alloc_arena_next) #define alloc_arena_size(h) ((h)->sh.alloc_arena_size) +#define alloc_arena_mapped(h) ((h)->sh.alloc_arena_mapped) #define alloc_arena(h) ((h)->alloc_arena) #define CLIB_BIHASH_READY_MAGIC 0 #endif @@ -285,7 +295,10 @@ static inline int BV (clib_bihash_bucket_is_empty) (BVT (clib_bihash_bucket) * b) { /* Note: applied to locked buckets, test offset */ - return b->offset == 0; + if (BIHASH_KVP_AT_BUCKET_LEVEL == 0) + return b->offset == 0; + else + return (b->log2_pages == 0 && b->refcnt == 1); } static inline uword BV (clib_bihash_get_offset) (BVT (clib_bihash) * h, @@ -345,19 +358,34 @@ format_function_t BV (format_bihash); format_function_t BV (format_bihash_kvp); format_function_t BV (format_bihash_lru); +static inline +BVT (clib_bihash_bucket) * +BV (clib_bihash_get_bucket) (BVT (clib_bihash) * h, u64 hash) +{ +#if BIHASH_KVP_AT_BUCKET_LEVEL + uword offset; + offset = (hash & (h->nbuckets - 1)); + offset = offset * (sizeof (BVT (clib_bihash_bucket)) + + (BIHASH_KVP_PER_PAGE * sizeof (BVT (clib_bihash_kv)))); + return ((BVT (clib_bihash_bucket) *) (((u8 *) h->buckets) + offset)); +#endif + + return h->buckets + (hash & (h->nbuckets - 1)); +} + static inline int BV (clib_bihash_search_inline_with_hash) (BVT (clib_bihash) * h, u64 hash, BVT (clib_bihash_kv) * key_result) { - u32 bucket_index; BVT (clib_bihash_value) * v; BVT (clib_bihash_bucket) * b; int i, limit; +#if BIHASH_LAZY_INSTANTIATE if (PREDICT_FALSE (alloc_arena (h) == 0)) return -1; +#endif - bucket_index = hash & (h->nbuckets - 1); - b = &h->buckets[bucket_index]; + b = BV (clib_bihash_get_bucket) (h, hash); if (PREDICT_FALSE (BV (clib_bihash_bucket_is_empty) (b))) return -1; @@ -400,17 +428,12 @@ static inline int BV (clib_bihash_search_inline) return BV (clib_bihash_search_inline_with_hash) (h, hash, key_result); } -static inline -BVT (clib_bihash_bucket) * -BV (clib_bihash_get_bucket) (BVT (clib_bihash) * h, u64 hash) -{ - return h->buckets + (hash & (h->nbuckets - 1)); -} - static inline void BV (clib_bihash_prefetch_bucket) (BVT (clib_bihash) * h, u64 hash) { - clib_prefetch_load (BV (clib_bihash_get_bucket) (h, hash)); + CLIB_PREFETCH (BV (clib_bihash_get_bucket) (h, hash), + BIHASH_BUCKET_PREFETCH_CACHE_LINES * CLIB_CACHE_LINE_BYTES, + LOAD); } static inline void BV (clib_bihash_prefetch_data) @@ -419,8 +442,10 @@ static inline void BV (clib_bihash_prefetch_data) BVT (clib_bihash_value) * v; BVT (clib_bihash_bucket) * b; +#if BIHASH_LAZY_INSTANTIATE if (PREDICT_FALSE (alloc_arena (h) == 0)) return; +#endif b = BV (clib_bihash_get_bucket) (h, hash); @@ -445,8 +470,10 @@ static inline int BV (clib_bihash_search_inline_2_with_hash) ASSERT (valuep); +#if BIHASH_LAZY_INSTANTIATE if (PREDICT_FALSE (alloc_arena (h) == 0)) return -1; +#endif b = BV (clib_bihash_get_bucket) (h, hash); diff --git a/src/vppinfra/bihash_vec8_8.h b/src/vppinfra/bihash_vec8_8.h index f50234e5d97..15c6d8cebff 100644 --- a/src/vppinfra/bihash_vec8_8.h +++ b/src/vppinfra/bihash_vec8_8.h @@ -16,9 +16,15 @@ #undef BIHASH_KVP_PER_PAGE #undef BIHASH_32_64_SVM #undef BIHASH_ENABLE_STATS +#undef BIHASH_KVP_AT_BUCKET_LEVEL +#undef BIHASH_LAZY_INSTANTIATE +#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES #define BIHASH_TYPE _vec8_8 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_AT_BUCKET_LEVEL 0 +#define BIHASH_LAZY_INSTANTIATE 1 +#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 1 #ifndef __included_bihash_vec8_8_h__ #define __included_bihash_vec8_8_h__ diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c index 3b7294f038a..d86402a9b16 100644 --- a/src/vppinfra/linux/mem.c +++ b/src/vppinfra/linux/mem.c @@ -344,6 +344,38 @@ clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a) } } +uword +clib_mem_vm_reserve (uword start, uword size, u32 log2_page_sz) +{ + uword off, pagesize = 1 << log2_page_sz; + int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS; + u8 *p; + + if (start) + mmap_flags |= MAP_FIXED; + + size = round_pow2 (size, pagesize); + + p = uword_to_pointer (start, void *); + p = mmap (p, size + pagesize, PROT_NONE, mmap_flags, -1, 0); + + if (p == MAP_FAILED) + return ~0; + + off = round_pow2 ((uword) p, pagesize) - (uword) p; + + /* trim start and end of reservation to be page aligned */ + if (off) + { + munmap (p, off); + p += off; + } + + munmap (p + size, pagesize - off); + + return (uword) p; +} + u64 * clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages) { diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index 4fedd107754..f254601043c 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -411,6 +411,7 @@ void clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a); u64 clib_mem_get_fd_page_size (int fd); uword clib_mem_get_default_hugepage_size (void); int clib_mem_get_fd_log2_page_size (int fd); +uword clib_mem_vm_reserve (uword start, uword size, u32 log2_page_sz); u64 *clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages); typedef struct diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c index cca8039433d..e0f3f3a6585 100644 --- a/src/vppinfra/pmalloc.c +++ b/src/vppinfra/pmalloc.c @@ -63,9 +63,8 @@ pmalloc_validate_numa_node (u32 * numa_node) int clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size) { - uword off, pagesize; + uword base, pagesize; u64 *pt = 0; - int mmap_flags; ASSERT (pm->error == 0); @@ -84,32 +83,16 @@ clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size) pm->max_pages = size >> pm->def_log2_page_sz; - /* reserve VA space for future growth */ - mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS; + base = clib_mem_vm_reserve (base_addr, size, pm->def_log2_page_sz); - if (base_addr) - mmap_flags |= MAP_FIXED; - - pm->base = mmap (uword_to_pointer (base_addr, void *), size + pagesize, - PROT_NONE, mmap_flags, -1, 0); - - if (pm->base == MAP_FAILED) + if (base == ~0) { - pm->error = clib_error_return_unix (0, "failed to reserve %u pages"); + pm->error = clib_error_return (0, "failed to reserve %u pages", + pm->max_pages); return -1; } - off = round_pow2 (pointer_to_uword (pm->base), pagesize) - - pointer_to_uword (pm->base); - - /* trim start and end of reservation to be page aligned */ - if (off) - { - munmap (pm->base, off); - pm->base += off; - } - - munmap (pm->base + ((uword) pm->max_pages * pagesize), pagesize - off); + pm->base = uword_to_pointer (base, void *); return 0; } diff --git a/src/vppinfra/test_bihash_template.c b/src/vppinfra/test_bihash_template.c index c1a44691966..86039d8408a 100644 --- a/src/vppinfra/test_bihash_template.c +++ b/src/vppinfra/test_bihash_template.c @@ -337,6 +337,16 @@ test_bihash (test_main_t * tm) { for (i = 0; i < tm->nitems; i++) { + /* Prefetch buckets 8 iterations ahead */ + if (1 && (i < (tm->nitems - 8))) + { + BVT (clib_bihash_kv) pref_kv; + u64 pref_hash; + pref_kv.key = tm->keys[i + 8]; + pref_hash = BV (clib_bihash_hash) (&pref_kv); + BV (clib_bihash_prefetch_bucket) (h, pref_hash); + } + kv.key = tm->keys[i]; if (BV (clib_bihash_search) (h, &kv, &kv) < 0) if (BV (clib_bihash_search) (h, &kv, &kv) < 0) @@ -356,8 +366,10 @@ test_bihash (test_main_t * tm) total_searches = (uword) tm->search_iter * (uword) tm->nitems; if (delta > 0) - fformat (stdout, "%.f searches per second\n", - ((f64) total_searches) / delta); + fformat (stdout, + "%.f searches per second, %.2f nsec per search\n", + ((f64) total_searches) / delta, + 1e9 * (delta / ((f64) total_searches))); fformat (stdout, "%lld searches in %.6f seconds\n", total_searches, delta); @@ -409,6 +421,16 @@ test_bihash (test_main_t * tm) { for (j = 0; j < tm->nitems; j++) { + /* Prefetch buckets 8 iterations ahead */ + if (1 && (j < (tm->nitems - 8))) + { + BVT (clib_bihash_kv) pref_kv; + u64 pref_hash; + pref_kv.key = tm->keys[j + 8]; + pref_hash = BV (clib_bihash_hash) (&pref_kv); + BV (clib_bihash_prefetch_bucket) (h, pref_hash); + } + kv.key = tm->keys[j]; rv = BV (clib_bihash_search) (h, &kv, &kv); if (j <= i && rv >= 0) |