diff options
author | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2016-12-08 14:07:29 +0100 |
---|---|---|
committer | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2016-12-08 14:10:05 +0100 |
commit | 6b3e017e5d25f15da73f7700f7f2ac553ef1a2e9 (patch) | |
tree | 1b1fb3f903b2282e261ade69e3c17952b3fd3464 /lib/librte_hash | |
parent | 32e04ea00cd159613e04acef75e52bfca6eeff2f (diff) |
Imported Upstream version 16.11
Change-Id: I1944c65ddc88a9ad70f8c0eb6731552b84fbcb77
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Diffstat (limited to 'lib/librte_hash')
-rw-r--r-- | lib/librte_hash/rte_cuckoo_hash.c | 472 | ||||
-rw-r--r-- | lib/librte_hash/rte_cuckoo_hash.h | 70 | ||||
-rw-r--r-- | lib/librte_hash/rte_cuckoo_hash_x86.h | 25 | ||||
-rw-r--r-- | lib/librte_hash/rte_fbk_hash.h | 2 | ||||
-rw-r--r-- | lib/librte_hash/rte_thash.h | 3 |
5 files changed, 252 insertions, 320 deletions
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index d6e68c68..51db006a 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -98,6 +98,7 @@ rte_hash_find_existing(const char *name) void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) { + h->cmp_jump_table_idx = KEY_CUSTOM; h->rte_hash_custom_cmp_eq = func; } @@ -283,6 +284,15 @@ rte_hash_create(const struct rte_hash_parameters *params) h->free_slots = r; h->hw_trans_mem_support = hw_trans_mem_support; +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2; + else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_SSE; + else +#endif + h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR; + /* Turn on multi-writer only with explicit flat from user and TM * support. */ @@ -421,10 +431,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Search for space in alternative locations */ - next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask; + next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask; next_bkt[i] = &h->buckets[next_bucket_idx]; for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) { - if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE) + if (next_bkt[i]->key_idx[j] == EMPTY_SLOT) break; } @@ -434,8 +444,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) /* Alternative location has spare room (end of recursive function) */ if (i != RTE_HASH_BUCKET_ENTRIES) { - next_bkt[i]->signatures[j].alt = bkt->signatures[i].current; - next_bkt[i]->signatures[j].current = bkt->signatures[i].alt; + next_bkt[i]->sig_alt[j] = bkt->sig_current[i]; + next_bkt[i]->sig_current[j] = bkt->sig_alt[i]; next_bkt[i]->key_idx[j] = bkt->key_idx[i]; return i; } @@ -464,8 +474,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) bkt->flag[i] = 0; nr_pushes = 0; if (ret >= 0) { - next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current; - next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt; + next_bkt[i]->sig_alt[ret] = bkt->sig_current[i]; + next_bkt[i]->sig_current[ret] = bkt->sig_alt[i]; next_bkt[i]->key_idx[ret] = bkt->key_idx[i]; return i; } else @@ -547,8 +557,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (prim_bkt->signatures[i].current == sig && - prim_bkt->signatures[i].alt == alt_hash) { + if (prim_bkt->sig_current[i] == sig && + prim_bkt->sig_alt[i] == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + prim_bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -567,8 +577,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (sec_bkt->signatures[i].alt == sig && - sec_bkt->signatures[i].current == alt_hash) { + if (sec_bkt->sig_alt[i] == sig && + sec_bkt->sig_current[i] == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + sec_bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -613,9 +623,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, #endif for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Check if slot is available */ - if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) { - prim_bkt->signatures[i].current = sig; - prim_bkt->signatures[i].alt = alt_hash; + if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { + prim_bkt->sig_current[i] = sig; + prim_bkt->sig_alt[i] = alt_hash; prim_bkt->key_idx[i] = new_idx; break; } @@ -635,8 +645,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, */ ret = make_space_bucket(h, prim_bkt); if (ret >= 0) { - prim_bkt->signatures[ret].current = sig; - prim_bkt->signatures[ret].alt = alt_hash; + prim_bkt->sig_current[ret] = sig; + prim_bkt->sig_alt[ret] = alt_hash; prim_bkt->key_idx[ret] = new_idx; if (h->add_key == ADD_KEY_MULTIWRITER) rte_spinlock_unlock(h->multiwriter_lock); @@ -710,8 +720,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == sig && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == sig && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -733,8 +743,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == alt_hash && - bkt->signatures[i].alt == sig) { + if (bkt->sig_current[i] == alt_hash && + bkt->sig_alt[i] == sig) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -788,7 +798,8 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) unsigned lcore_id, n_slots; struct lcore_cache *cached_free_slots; - bkt->signatures[i].sig = NULL_SIGNATURE; + bkt->sig_current[i] = NULL_SIGNATURE; + bkt->sig_alt[i] = NULL_SIGNATURE; if (h->hw_trans_mem_support) { lcore_id = rte_lcore_id(); cached_free_slots = &h->local_free_slots[lcore_id]; @@ -826,8 +837,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == sig && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == sig && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -838,7 +849,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, * substracting the first dummy index */ ret = bkt->key_idx[i] - 1; - bkt->key_idx[i] = 0; + bkt->key_idx[i] = EMPTY_SLOT; return ret; } } @@ -851,8 +862,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == alt_hash && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == alt_hash && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -863,7 +874,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, * substracting the first dummy index */ ret = bkt->key_idx[i] - 1; - bkt->key_idx[i] = 0; + bkt->key_idx[i] = EMPTY_SLOT; return ret; } } @@ -907,280 +918,189 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position, return 0; } -/* Lookup bulk stage 0: Prefetch input key */ static inline void -lookup_stage0(unsigned *idx, uint64_t *lookup_mask, - const void * const *keys) +compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, + const struct rte_hash_bucket *prim_bkt, + const struct rte_hash_bucket *sec_bkt, + hash_sig_t prim_hash, hash_sig_t sec_hash, + enum rte_hash_sig_compare_function sig_cmp_fn) { - *idx = __builtin_ctzl(*lookup_mask); - if (*lookup_mask == 0) - *idx = 0; + unsigned int i; + + switch (sig_cmp_fn) { +#ifdef RTE_MACHINE_CPUFLAG_AVX2 + case RTE_HASH_COMPARE_AVX2: + *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)prim_bkt->sig_current), + _mm256_set1_epi32(prim_hash))); + *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)sec_bkt->sig_current), + _mm256_set1_epi32(sec_hash))); + break; +#endif +#ifdef RTE_MACHINE_CPUFLAG_SSE2 + case RTE_HASH_COMPARE_SSE: + /* Compare the first 4 signatures in the bucket */ + *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)prim_bkt->sig_current), + _mm_set1_epi32(prim_hash))); + *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&prim_bkt->sig_current[4]), + _mm_set1_epi32(prim_hash)))) << 4; + /* Compare the first 4 signatures in the bucket */ + *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)sec_bkt->sig_current), + _mm_set1_epi32(sec_hash))); + *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&sec_bkt->sig_current[4]), + _mm_set1_epi32(sec_hash)))) << 4; + break; +#endif + default: + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + *prim_hash_matches |= + ((prim_hash == prim_bkt->sig_current[i]) << i); + *sec_hash_matches |= + ((sec_hash == sec_bkt->sig_current[i]) << i); + } + } - rte_prefetch0(keys[*idx]); - *lookup_mask &= ~(1llu << *idx); } -/* - * Lookup bulk stage 1: Calculate primary/secondary hashes - * and prefetch primary/secondary buckets - */ +#define PREFETCH_OFFSET 4 static inline void -lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash, - const struct rte_hash_bucket **primary_bkt, - const struct rte_hash_bucket **secondary_bkt, - hash_sig_t *hash_vals, const void * const *keys, - const struct rte_hash *h) +__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + int32_t num_keys, int32_t *positions, + uint64_t *hit_mask, void *data[]) { - *prim_hash = rte_hash_hash(h, keys[idx]); - hash_vals[idx] = *prim_hash; - *sec_hash = rte_hash_secondary_hash(*prim_hash); + uint64_t hits = 0; + int32_t i; + uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + + /* Prefetch first keys */ + for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++) + rte_prefetch0(keys[i]); - *primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask]; - *secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask]; + /* + * Prefetch rest of the keys, calculate primary and + * secondary bucket and prefetch them + */ + for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) { + rte_prefetch0(keys[i + PREFETCH_OFFSET]); - rte_prefetch0(*primary_bkt); - rte_prefetch0(*secondary_bkt); -} + prim_hash[i] = rte_hash_hash(h, keys[i]); + sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); -/* - * Lookup bulk stage 2: Search for match hashes in primary/secondary locations - * and prefetch first key slot - */ -static inline void -lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash, - const struct rte_hash_bucket *prim_bkt, - const struct rte_hash_bucket *sec_bkt, - const struct rte_hash_key **key_slot, int32_t *positions, - uint64_t *extra_hits_mask, const void *keys, - const struct rte_hash *h) -{ - unsigned prim_hash_matches, sec_hash_matches, key_idx, i; - unsigned total_hash_matches; + primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; + secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; - prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i); - sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i); + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); } - key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)]; - if (key_idx == 0) - key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)]; + /* Calculate and prefetch rest of the buckets */ + for (; i < num_keys; i++) { + prim_hash[i] = rte_hash_hash(h, keys[i]); + sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); - total_hash_matches = (prim_hash_matches | - (sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1))); - *key_slot = (const struct rte_hash_key *) ((const char *)keys + - key_idx * h->key_entry_size); + primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; + secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; - rte_prefetch0(*key_slot); - /* - * Return index where key is stored, - * substracting the first dummy index - */ - positions[idx] = (key_idx - 1); + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); + } - *extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx; + /* Compare signatures and prefetch key slot of first hit */ + for (i = 0; i < num_keys; i++) { + compare_signatures(&prim_hitmask[i], &sec_hitmask[i], + primary_bkt[i], secondary_bkt[i], + prim_hash[i], sec_hash[i], h->sig_cmp_fn); + + if (prim_hitmask[i]) { + uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]); + uint32_t key_idx = primary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + continue; + } -} + if (sec_hitmask[i]) { + uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]); + uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + } + } + /* Compare keys, first hits in primary first */ + for (i = 0; i < num_keys; i++) { + positions[i] = -ENOENT; + while (prim_hitmask[i]) { + uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]); + + uint32_t key_idx = primary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ + if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; -/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */ -static inline void -lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys, - const int32_t *positions, void *data[], uint64_t *hits, - const struct rte_hash *h) -{ - unsigned hit; - unsigned key_idx; + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; + } + prim_hitmask[i] &= ~(1 << (hit_index)); + } - hit = !rte_hash_cmp_eq(key_slot->key, keys[idx], h); - if (data != NULL) - data[idx] = key_slot->pdata; + while (sec_hitmask[i]) { + uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]); - key_idx = positions[idx] + 1; - /* - * If key index is 0, force hit to be 0, in case key to be looked up - * is all zero (as in the dummy slot), which would result in a wrong hit - */ - *hits |= (uint64_t)(hit && !!key_idx) << idx; -} + uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ -static inline void -__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, - uint32_t num_keys, int32_t *positions, - uint64_t *hit_mask, void *data[]) -{ - uint64_t hits = 0; - uint64_t extra_hits_mask = 0; - uint64_t lookup_mask, miss_mask; - unsigned idx; - const void *key_store = h->key_store; - int ret; - hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX]; - - unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31; - const struct rte_hash_bucket *primary_bkt10, *primary_bkt11; - const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11; - const struct rte_hash_bucket *primary_bkt20, *primary_bkt21; - const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21; - const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31; - hash_sig_t primary_hash10, primary_hash11; - hash_sig_t secondary_hash10, secondary_hash11; - hash_sig_t primary_hash20, primary_hash21; - hash_sig_t secondary_hash20, secondary_hash21; - - lookup_mask = (uint64_t) -1 >> (64 - num_keys); - miss_mask = lookup_mask; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - - while (lookup_mask) { - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, - primary_bkt20, secondary_bkt20, &k_slot20, positions, - &extra_hits_mask, key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, - primary_bkt21, secondary_bkt21, &k_slot21, positions, - &extra_hits_mask, key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - } + if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - /* ignore any items we have already found */ - extra_hits_mask &= ~hits; - - if (unlikely(extra_hits_mask)) { - /* run a single search for each remaining item */ - do { - idx = __builtin_ctzl(extra_hits_mask); - if (data != NULL) { - ret = rte_hash_lookup_with_hash_data(h, - keys[idx], hash_vals[idx], &data[idx]); - if (ret >= 0) - hits |= 1ULL << idx; - } else { - positions[idx] = rte_hash_lookup_with_hash(h, - keys[idx], hash_vals[idx]); - if (positions[idx] >= 0) - hits |= 1llu << idx; + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; } - extra_hits_mask &= ~(1llu << idx); - } while (extra_hits_mask); - } + sec_hitmask[i] &= ~(1 << (hit_index)); + } - miss_mask &= ~hits; - if (unlikely(miss_mask)) { - do { - idx = __builtin_ctzl(miss_mask); - positions[idx] = -ENOENT; - miss_mask &= ~(1llu << idx); - } while (miss_mask); +next_key: + continue; } if (hit_mask != NULL) @@ -1233,7 +1153,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32 idx = *next % RTE_HASH_BUCKET_ENTRIES; /* If current position is empty, go to the next one */ - while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) { + while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) { (*next)++; /* End of table */ if (*next == total_entries) diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h index 9625fffe..1b8ffed8 100644 --- a/lib/librte_hash/rte_cuckoo_hash.h +++ b/lib/librte_hash/rte_cuckoo_hash.h @@ -130,10 +130,12 @@ enum add_key_case { }; /** Number of items per bucket. */ -#define RTE_HASH_BUCKET_ENTRIES 4 +#define RTE_HASH_BUCKET_ENTRIES 8 #define NULL_SIGNATURE 0 +#define EMPTY_SLOT 0 + #define KEY_ALIGNMENT 16 #define LCORE_CACHE_SIZE 64 @@ -151,17 +153,6 @@ struct lcore_cache { void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */ } __rte_cache_aligned; -/* Structure storing both primary and secondary hashes */ -struct rte_hash_signatures { - union { - struct { - hash_sig_t current; - hash_sig_t alt; - }; - uint64_t sig; - }; -}; - /* Structure that stores key-value pair */ struct rte_hash_key { union { @@ -172,11 +163,22 @@ struct rte_hash_key { char key[0]; } __attribute__((aligned(KEY_ALIGNMENT))); +/* All different signature compare functions */ +enum rte_hash_sig_compare_function { + RTE_HASH_COMPARE_SCALAR = 0, + RTE_HASH_COMPARE_SSE, + RTE_HASH_COMPARE_AVX2, + RTE_HASH_COMPARE_NUM +}; + /** Bucket structure */ struct rte_hash_bucket { - struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES]; - /* Includes dummy key index that always contains index 0 */ - uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1]; + hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES]; + + uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES]; + + hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES]; + uint8_t flag[RTE_HASH_BUCKET_ENTRIES]; } __rte_cache_aligned; @@ -185,30 +187,38 @@ struct rte_hash { char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */ uint32_t entries; /**< Total table entries. */ uint32_t num_buckets; /**< Number of buckets in table. */ - uint32_t key_len; /**< Length of hash key. */ + + struct rte_ring *free_slots; + /**< Ring that stores all indexes of the free slots in the key table */ + uint8_t hw_trans_mem_support; + /**< Hardware transactional memory support */ + struct lcore_cache *local_free_slots; + /**< Local cache per lcore, storing some indexes of the free slots */ + enum add_key_case add_key; /**< Multi-writer hash add behavior */ + + rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */ + + /* Fields used in lookup */ + + uint32_t key_len __rte_cache_aligned; + /**< Length of hash key. */ rte_hash_function hash_func; /**< Function used to calculate hash. */ uint32_t hash_func_init_val; /**< Init value used by hash_func. */ rte_hash_cmp_eq_t rte_hash_custom_cmp_eq; /**< Custom function used to compare keys. */ enum cmp_jump_table_case cmp_jump_table_idx; /**< Indicates which compare function to use. */ - uint32_t bucket_bitmask; /**< Bitmask for getting bucket index - from hash signature. */ + enum rte_hash_sig_compare_function sig_cmp_fn; + /**< Indicates which signature compare function to use. */ + uint32_t bucket_bitmask; + /**< Bitmask for getting bucket index from hash signature. */ uint32_t key_entry_size; /**< Size of each key entry. */ - struct rte_ring *free_slots; /**< Ring that stores all indexes - of the free slots in the key table */ void *key_store; /**< Table storing all keys and data */ - struct rte_hash_bucket *buckets; /**< Table with buckets storing all the - hash values and key indexes - to the key table*/ - uint8_t hw_trans_mem_support; /**< Hardware transactional - memory support */ - struct lcore_cache *local_free_slots; - /**< Local cache per lcore, storing some indexes of the free slots */ - enum add_key_case add_key; /**< Multi-writer hash add behavior */ - - rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */ + struct rte_hash_bucket *buckets; + /**< Table with buckets storing all the hash values and key indexes + * to the key table. + */ } __rte_cache_aligned; struct queue_node { diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h index ace1bd2e..0c94244a 100644 --- a/lib/librte_hash/rte_cuckoo_hash_x86.h +++ b/lib/librte_hash/rte_cuckoo_hash_x86.h @@ -53,10 +53,9 @@ rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt, */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Check if slot is available */ - if (likely(prim_bkt->signatures[i].sig == - NULL_SIGNATURE)) { - prim_bkt->signatures[i].current = sig; - prim_bkt->signatures[i].alt = alt_hash; + if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { + prim_bkt->sig_current[i] = sig; + prim_bkt->sig_alt[i] = alt_hash; prim_bkt->key_idx[i] = new_idx; break; } @@ -102,7 +101,7 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, prev_slot = curr_node->prev_slot; prev_alt_bkt_idx - = prev_bkt->signatures[prev_slot].alt + = prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask; if (unlikely(&h->buckets[prev_alt_bkt_idx] @@ -114,10 +113,10 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, * Cuckoo insert to move elements back to its * primary bucket if available */ - curr_bkt->signatures[curr_slot].alt = - prev_bkt->signatures[prev_slot].current; - curr_bkt->signatures[curr_slot].current = - prev_bkt->signatures[prev_slot].alt; + curr_bkt->sig_alt[curr_slot] = + prev_bkt->sig_current[prev_slot]; + curr_bkt->sig_current[curr_slot] = + prev_bkt->sig_alt[prev_slot]; curr_bkt->key_idx[curr_slot] = prev_bkt->key_idx[prev_slot]; @@ -126,8 +125,8 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, curr_bkt = curr_node->bkt; } - curr_bkt->signatures[curr_slot].current = sig; - curr_bkt->signatures[curr_slot].alt = alt_hash; + curr_bkt->sig_current[curr_slot] = sig; + curr_bkt->sig_alt[curr_slot] = alt_hash; curr_bkt->key_idx[curr_slot] = new_idx; rte_xend(); @@ -172,7 +171,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h, RTE_HASH_BUCKET_ENTRIES)) { curr_bkt = tail->bkt; for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) { + if (curr_bkt->key_idx[i] == EMPTY_SLOT) { if (likely(rte_hash_cuckoo_move_insert_mw_tm(h, tail, i, sig, alt_hash, new_idx) == 0)) @@ -180,7 +179,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h, } /* Enqueue new node and keep prev node info */ - alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt + alt_bkt = &(h->buckets[curr_bkt->sig_alt[i] & h->bucket_bitmask]); head->bkt = alt_bkt; head->prev = tail; diff --git a/lib/librte_hash/rte_fbk_hash.h b/lib/librte_hash/rte_fbk_hash.h index a430961d..bd46048f 100644 --- a/lib/librte_hash/rte_fbk_hash.h +++ b/lib/librte_hash/rte_fbk_hash.h @@ -115,7 +115,7 @@ struct rte_fbk_hash_table { uint32_t init_val; /**< For initialising hash function. */ /** A flat table of all buckets. */ - union rte_fbk_hash_entry t[0]; + union rte_fbk_hash_entry t[]; }; /** diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index d98e98e7..a4886a8c 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -54,6 +54,7 @@ extern "C" { #include <stdint.h> #include <rte_byteorder.h> #include <rte_ip.h> +#include <rte_common.h> #ifdef __SSE3__ #include <rte_vect.h> @@ -102,6 +103,7 @@ static const __m128i rte_thash_ipv6_bswap_mask = { struct rte_ipv4_tuple { uint32_t src_addr; uint32_t dst_addr; + RTE_STD_C11 union { struct { uint16_t dport; @@ -119,6 +121,7 @@ struct rte_ipv4_tuple { struct rte_ipv6_tuple { uint8_t src_addr[16]; uint8_t dst_addr[16]; + RTE_STD_C11 union { struct { uint16_t dport; |