aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_member/rte_member_ht.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_member/rte_member_ht.c')
-rw-r--r--lib/librte_member/rte_member_ht.c586
1 files changed, 586 insertions, 0 deletions
diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c
new file mode 100644
index 00000000..59332d56
--- /dev/null
+++ b/lib/librte_member/rte_member_ht.c
@@ -0,0 +1,586 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_prefetch.h>
+#include <rte_random.h>
+#include <rte_log.h>
+
+#include "rte_member.h"
+#include "rte_member_ht.h"
+
+#if defined(RTE_ARCH_X86)
+#include "rte_member_x86.h"
+#endif
+
+/* Search bucket for entry with tmp_sig and update set_id */
+static inline int
+update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig,
+ struct member_ht_bucket *buckets,
+ member_set_t set_id)
+{
+ uint32_t i;
+
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (buckets[bucket_id].sigs[i] == tmp_sig) {
+ buckets[bucket_id].sets[i] = set_id;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static inline int
+search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig,
+ struct member_ht_bucket *buckets,
+ member_set_t *set_id)
+{
+ uint32_t iter;
+
+ for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
+ if (tmp_sig == buckets[bucket_id].sigs[iter] &&
+ buckets[bucket_id].sets[iter] !=
+ RTE_MEMBER_NO_MATCH) {
+ *set_id = buckets[bucket_id].sets[iter];
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static inline void
+search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig,
+ struct member_ht_bucket *buckets,
+ uint32_t *counter,
+ uint32_t matches_per_key,
+ member_set_t *set_id)
+{
+ uint32_t iter;
+
+ for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
+ if (tmp_sig == buckets[bucket_id].sigs[iter] &&
+ buckets[bucket_id].sets[iter] !=
+ RTE_MEMBER_NO_MATCH) {
+ set_id[*counter] = buckets[bucket_id].sets[iter];
+ (*counter)++;
+ if (*counter >= matches_per_key)
+ return;
+ }
+ }
+}
+
+int
+rte_member_create_ht(struct rte_member_setsum *ss,
+ const struct rte_member_parameters *params)
+{
+ uint32_t i, j;
+ uint32_t size_bucket_t;
+ uint32_t num_entries = rte_align32pow2(params->num_keys);
+
+ if ((num_entries > RTE_MEMBER_ENTRIES_MAX) ||
+ !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) ||
+ num_entries < RTE_MEMBER_BUCKET_ENTRIES) {
+ rte_errno = EINVAL;
+ RTE_MEMBER_LOG(ERR,
+ "Membership HT create with invalid parameters\n");
+ return -EINVAL;
+ }
+
+ uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES;
+
+ size_bucket_t = sizeof(struct member_ht_bucket);
+
+ struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL,
+ num_buckets * size_bucket_t,
+ RTE_CACHE_LINE_SIZE, ss->socket_id);
+
+ if (buckets == NULL) {
+ RTE_MEMBER_LOG(ERR, "memory allocation failed for HT "
+ "setsummary\n");
+ return -ENOMEM;
+ }
+
+ ss->table = buckets;
+ ss->bucket_cnt = num_buckets;
+ ss->bucket_mask = num_buckets - 1;
+ ss->cache = params->is_cache;
+
+ for (i = 0; i < num_buckets; i++) {
+ for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
+ buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
+ }
+#if defined(RTE_ARCH_X86)
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+ RTE_MEMBER_BUCKET_ENTRIES == 16)
+ ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
+ else
+#endif
+ ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR;
+
+ RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, "
+ "the table has %u entries, %u buckets\n",
+ num_entries, num_buckets);
+ return 0;
+}
+
+static inline void
+get_buckets_index(const struct rte_member_setsum *ss, const void *key,
+ uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig)
+{
+ uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len,
+ ss->prim_hash_seed);
+ uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t),
+ ss->sec_hash_seed);
+ /*
+ * We use the first hash value for the signature, and the second hash
+ * value to derive the primary and secondary bucket locations.
+ *
+ * For non-cache mode, we use the lower bits for the primary bucket
+ * location. Then we xor primary bucket location and the signature
+ * to get the secondary bucket location. This is called "partial-key
+ * cuckoo hashing" proposed by B. Fan, et al's paper
+ * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use
+ * xor is that one could derive the alternative bucket location
+ * by only using the current bucket location and the signature. This is
+ * generally required by non-cache mode's eviction and deletion
+ * process without the need to store alternative hash value nor the full
+ * key.
+ *
+ * For cache mode, we use the lower bits for the primary bucket
+ * location and the higher bits for the secondary bucket location. In
+ * cache mode, keys are simply overwritten if bucket is full. We do not
+ * use xor since lower/higher bits are more independent hash values thus
+ * should provide slightly better table load.
+ */
+ *sig = first_hash;
+ if (ss->cache) {
+ *prim_bkt = sec_hash & ss->bucket_mask;
+ *sec_bkt = (sec_hash >> 16) & ss->bucket_mask;
+ } else {
+ *prim_bkt = sec_hash & ss->bucket_mask;
+ *sec_bkt = (*prim_bkt ^ *sig) & ss->bucket_mask;
+ }
+}
+
+int
+rte_member_lookup_ht(const struct rte_member_setsum *ss,
+ const void *key, member_set_t *set_id)
+{
+ uint32_t prim_bucket, sec_bucket;
+ member_sig_t tmp_sig;
+ struct member_ht_bucket *buckets = ss->table;
+
+ *set_id = RTE_MEMBER_NO_MATCH;
+ get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
+
+ switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,
+ set_id) ||
+ search_bucket_single_avx(sec_bucket, tmp_sig,
+ buckets, set_id))
+ return 1;
+ break;
+#endif
+ default:
+ if (search_bucket_single(prim_bucket, tmp_sig, buckets,
+ set_id) ||
+ search_bucket_single(sec_bucket, tmp_sig,
+ buckets, set_id))
+ return 1;
+ }
+
+ return 0;
+}
+
+uint32_t
+rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
+ const void **keys, uint32_t num_keys, member_set_t *set_id)
+{
+ uint32_t i;
+ uint32_t num_matches = 0;
+ struct member_ht_bucket *buckets = ss->table;
+ member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
+
+ for (i = 0; i < num_keys; i++) {
+ get_buckets_index(ss, keys[i], &prim_buckets[i],
+ &sec_buckets[i], &tmp_sig[i]);
+ rte_prefetch0(&buckets[prim_buckets[i]]);
+ rte_prefetch0(&buckets[sec_buckets[i]]);
+ }
+
+ for (i = 0; i < num_keys; i++) {
+ switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ if (search_bucket_single_avx(prim_buckets[i],
+ tmp_sig[i], buckets, &set_id[i]) ||
+ search_bucket_single_avx(sec_buckets[i],
+ tmp_sig[i], buckets, &set_id[i]))
+ num_matches++;
+ else
+ set_id[i] = RTE_MEMBER_NO_MATCH;
+ break;
+#endif
+ default:
+ if (search_bucket_single(prim_buckets[i], tmp_sig[i],
+ buckets, &set_id[i]) ||
+ search_bucket_single(sec_buckets[i],
+ tmp_sig[i], buckets, &set_id[i]))
+ num_matches++;
+ else
+ set_id[i] = RTE_MEMBER_NO_MATCH;
+ }
+ }
+ return num_matches;
+}
+
+uint32_t
+rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
+ const void *key, uint32_t match_per_key,
+ member_set_t *set_id)
+{
+ uint32_t num_matches = 0;
+ uint32_t prim_bucket, sec_bucket;
+ member_sig_t tmp_sig;
+ struct member_ht_bucket *buckets = ss->table;
+
+ get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
+
+ switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ search_bucket_multi_avx(prim_bucket, tmp_sig, buckets,
+ &num_matches, match_per_key, set_id);
+ if (num_matches < match_per_key)
+ search_bucket_multi_avx(sec_bucket, tmp_sig,
+ buckets, &num_matches, match_per_key, set_id);
+ return num_matches;
+#endif
+ default:
+ search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
+ match_per_key, set_id);
+ if (num_matches < match_per_key)
+ search_bucket_multi(sec_bucket, tmp_sig,
+ buckets, &num_matches, match_per_key, set_id);
+ return num_matches;
+ }
+}
+
+uint32_t
+rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
+ const void **keys, uint32_t num_keys, uint32_t match_per_key,
+ uint32_t *match_count,
+ member_set_t *set_ids)
+{
+ uint32_t i;
+ uint32_t num_matches = 0;
+ struct member_ht_bucket *buckets = ss->table;
+ uint32_t match_cnt_tmp;
+ member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
+
+ for (i = 0; i < num_keys; i++) {
+ get_buckets_index(ss, keys[i], &prim_buckets[i],
+ &sec_buckets[i], &tmp_sig[i]);
+ rte_prefetch0(&buckets[prim_buckets[i]]);
+ rte_prefetch0(&buckets[sec_buckets[i]]);
+ }
+ for (i = 0; i < num_keys; i++) {
+ match_cnt_tmp = 0;
+
+ switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ search_bucket_multi_avx(prim_buckets[i], tmp_sig[i],
+ buckets, &match_cnt_tmp, match_per_key,
+ &set_ids[i*match_per_key]);
+ if (match_cnt_tmp < match_per_key)
+ search_bucket_multi_avx(sec_buckets[i],
+ tmp_sig[i], buckets, &match_cnt_tmp,
+ match_per_key,
+ &set_ids[i*match_per_key]);
+ match_count[i] = match_cnt_tmp;
+ if (match_cnt_tmp != 0)
+ num_matches++;
+ break;
+#endif
+ default:
+ search_bucket_multi(prim_buckets[i], tmp_sig[i],
+ buckets, &match_cnt_tmp, match_per_key,
+ &set_ids[i*match_per_key]);
+ if (match_cnt_tmp < match_per_key)
+ search_bucket_multi(sec_buckets[i], tmp_sig[i],
+ buckets, &match_cnt_tmp, match_per_key,
+ &set_ids[i*match_per_key]);
+ match_count[i] = match_cnt_tmp;
+ if (match_cnt_tmp != 0)
+ num_matches++;
+ }
+ }
+ return num_matches;
+}
+
+static inline int
+try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
+ member_sig_t sig, member_set_t set_id)
+{
+ int i;
+ /* If not full then insert into one slot */
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) {
+ buckets[prim].sigs[i] = sig;
+ buckets[prim].sets[i] = set_id;
+ return 0;
+ }
+ }
+ /* If prim failed, we need to access second bucket */
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) {
+ buckets[sec].sigs[i] = sig;
+ buckets[sec].sets[i] = set_id;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static inline int
+try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
+ member_sig_t sig, member_set_t set_id,
+ enum rte_member_sig_compare_function cmp_fn)
+{
+ switch (cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ if (update_entry_search_avx(prim, sig, buckets, set_id) ||
+ update_entry_search_avx(sec, sig, buckets,
+ set_id))
+ return 0;
+ break;
+#endif
+ default:
+ if (update_entry_search(prim, sig, buckets, set_id) ||
+ update_entry_search(sec, sig, buckets,
+ set_id))
+ return 0;
+ }
+ return -1;
+}
+
+static inline int
+evict_from_bucket(void)
+{
+ /* For now, we randomly pick one entry to evict */
+ return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1);
+}
+
+/*
+ * This function is similar to the cuckoo hash make_space function in hash
+ * library
+ */
+static inline int
+make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx,
+ unsigned int *nr_pushes)
+{
+ unsigned int i, j;
+ int ret;
+ struct member_ht_bucket *buckets = ss->table;
+ uint32_t next_bucket_idx;
+ struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES];
+ struct member_ht_bucket *bkt = &buckets[bkt_idx];
+ /* MSB is set to indicate if an entry has been already pushed */
+ member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
+
+ /*
+ * Push existing item (search for bucket with space in
+ * alternative locations) to its alternative location
+ */
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ /* Search for space in alternative locations */
+ next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
+ next_bkt[i] = &buckets[next_bucket_idx];
+ for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) {
+ if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH)
+ break;
+ }
+
+ if (j != RTE_MEMBER_BUCKET_ENTRIES)
+ break;
+ }
+
+ /* Alternative location has spare room (end of recursive function) */
+ if (i != RTE_MEMBER_BUCKET_ENTRIES) {
+ next_bkt[i]->sigs[j] = bkt->sigs[i];
+ next_bkt[i]->sets[j] = bkt->sets[i];
+ return i;
+ }
+
+ /* Pick entry that has not been pushed yet */
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++)
+ if ((bkt->sets[i] & flag_mask) == 0)
+ break;
+
+ /* All entries have been pushed, so entry cannot be added */
+ if (i == RTE_MEMBER_BUCKET_ENTRIES ||
+ ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES)
+ return -ENOSPC;
+
+ next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
+ /* Set flag to indicate that this entry is going to be pushed */
+ bkt->sets[i] |= flag_mask;
+
+ /* Need room in alternative bucket to insert the pushed entry */
+ ret = make_space_bucket(ss, next_bucket_idx, nr_pushes);
+ /*
+ * After recursive function.
+ * Clear flags and insert the pushed entry
+ * in its alternative location if successful,
+ * or return error
+ */
+ bkt->sets[i] &= ~flag_mask;
+ if (ret >= 0) {
+ next_bkt[i]->sigs[ret] = bkt->sigs[i];
+ next_bkt[i]->sets[ret] = bkt->sets[i];
+ return i;
+ } else
+ return ret;
+}
+
+int
+rte_member_add_ht(const struct rte_member_setsum *ss,
+ const void *key, member_set_t set_id)
+{
+ int ret;
+ unsigned int nr_pushes = 0;
+ uint32_t prim_bucket, sec_bucket;
+ member_sig_t tmp_sig;
+ struct member_ht_bucket *buckets = ss->table;
+ member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
+
+ if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0)
+ return -EINVAL;
+
+ get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
+
+ /*
+ * If it is cache based setsummary, we try overwriting (updating)
+ * existing entry with the same signature first. In cache mode, we allow
+ * false negatives and only cache the most recent keys.
+ *
+ * For non-cache mode, we do not update existing entry with the same
+ * signature. This is because if two keys with same signature update
+ * each other, false negative may happen, which is not the expected
+ * behavior for non-cache setsummary.
+ */
+ if (ss->cache) {
+ ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig,
+ set_id, ss->sig_cmp_fn);
+ if (ret != -1)
+ return ret;
+ }
+ /* If not full then insert into one slot */
+ ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id);
+ if (ret != -1)
+ return ret;
+
+ /* Random pick prim or sec for recursive displacement */
+ uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket;
+ if (ss->cache) {
+ ret = evict_from_bucket();
+ buckets[select_bucket].sigs[ret] = tmp_sig;
+ buckets[select_bucket].sets[ret] = set_id;
+ return 1;
+ }
+
+ ret = make_space_bucket(ss, select_bucket, &nr_pushes);
+ if (ret >= 0) {
+ buckets[select_bucket].sigs[ret] = tmp_sig;
+ buckets[select_bucket].sets[ret] = set_id;
+ ret = 1;
+ }
+
+ return ret;
+}
+
+void
+rte_member_free_ht(struct rte_member_setsum *ss)
+{
+ rte_free(ss->table);
+}
+
+int
+rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key,
+ member_set_t set_id)
+{
+ int i;
+ uint32_t prim_bucket, sec_bucket;
+ member_sig_t tmp_sig;
+ struct member_ht_bucket *buckets = ss->table;
+
+ get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
+
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (tmp_sig == buckets[prim_bucket].sigs[i] &&
+ set_id == buckets[prim_bucket].sets[i]) {
+ buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
+ return 0;
+ }
+ }
+
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (tmp_sig == buckets[sec_bucket].sigs[i] &&
+ set_id == buckets[sec_bucket].sets[i]) {
+ buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+void
+rte_member_reset_ht(const struct rte_member_setsum *ss)
+{
+ uint32_t i, j;
+ struct member_ht_bucket *buckets = ss->table;
+
+ for (i = 0; i < ss->bucket_cnt; i++) {
+ for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
+ buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
+ }
+}