aboutsummaryrefslogtreecommitdiffstats
path: root/cicn-plugin/cicn/cicn_hashtb.c
diff options
context:
space:
mode:
Diffstat (limited to 'cicn-plugin/cicn/cicn_hashtb.c')
-rw-r--r--cicn-plugin/cicn/cicn_hashtb.c1567
1 files changed, 1567 insertions, 0 deletions
diff --git a/cicn-plugin/cicn/cicn_hashtb.c b/cicn-plugin/cicn/cicn_hashtb.c
new file mode 100644
index 00000000..e8b344fe
--- /dev/null
+++ b/cicn-plugin/cicn/cicn_hashtb.c
@@ -0,0 +1,1567 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cicn_hashtb.c: Fast-path, vpp-aware hashtable, the base for the PIT/CS and FIB
+ * used in the cicn forwarder.
+ *
+ * - As is the case in other areas, we can't share headers between the vpp code
+ * and the cicn/cndn code: there are conflicting definitions.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <assert.h>
+#include <inttypes.h>
+
+#include <vlib/vlib.h>
+#include <vppinfra/pool.h>
+
+#include "cicn_hashtb.h"
+#include "cicn_siphash.h" /* Inline implementation of siphash-2-4 */
+#include "cicn_parser.h"
+
+/* return dvd/dvr, rounded up (intended for integer values) */
+#define CEIL(dvd, dvr) \
+({ \
+ __typeof__ (dvd) _dvd = (dvd); \
+ __typeof__ (dvr) _dvr = (dvr); \
+ (_dvd + _dvr - 1)/_dvr; \
+})
+
+#ifndef ALIGN8
+#define ALIGN8(p) (((p) + 0x7) & ~(0x7))
+#endif
+
+#ifndef ALIGNPTR8
+#define ALIGNPTR8(p) ((void *)(((uint8_t * )(p) + 0x7) & ~(0x7)))
+#endif
+
+#ifndef ALIGN64
+#define ALIGN64(p) (((p) + 0x3f) & ~(0x3f))
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* Default hash seed for now; TODO: needs to be random/per-box eventually */
+unsigned char cicn_default_sip_seed[16] = {
+ 0x12, 0x34, 0x56, 0x78, 0x98, 0x76, 0x54, 0x32,
+ 0x12, 0x34, 0x56, 0x78, 0x98, 0x76, 0x54, 0x32,
+};
+
+/* Offset to aligned start of additional data (PIT/CS, FIB)
+ * embedded in each node.
+ */
+uint32_t ht_node_data_offset_aligned;
+
+/* Some support for posix vs vpp mem management */
+#define MEM_ALLOC(x) clib_mem_alloc_aligned((x), 8)
+#define MEM_FREE(p) clib_mem_free((p))
+
+/*
+ * Internal utilities
+ */
+
+/* Allocate an overflow bucket */
+static cicn_hash_bucket_t *
+alloc_overflow_bucket (cicn_hashtb_h h)
+{
+ cicn_hash_bucket_t *newbkt = NULL;
+
+ if (h->ht_overflow_buckets_used < h->ht_overflow_bucket_count)
+ {
+ pool_get_aligned (h->ht_overflow_buckets, newbkt, 8);
+
+ if (newbkt)
+ {
+ h->ht_overflow_buckets_used++;
+ }
+ }
+
+ return (newbkt);
+}
+
+/* Free an overflow bucket; clear caller's pointer */
+static void
+free_overflow_bucket (cicn_hashtb_h h, cicn_hash_bucket_t ** pb)
+{
+ cicn_hash_bucket_t *bkt = *pb;
+
+ ASSERT (h->ht_overflow_buckets_used > 0);
+
+ pool_put (h->ht_overflow_buckets, bkt);
+ h->ht_overflow_buckets_used--;
+ *pb = NULL;
+}
+
+/* Allocate an overflow key buffer */
+static cicn_hash_key_t *
+alloc_key_buf (cicn_hashtb_h h)
+{
+ cicn_hash_key_t *hk = NULL;
+
+ if (h->ht_keys_used < h->ht_key_count)
+ {
+ pool_get_aligned (h->ht_extra_keys, hk, 8);
+ if (hk)
+ {
+ h->ht_keys_used++;
+ }
+ }
+
+ return (hk);
+}
+
+/* for iterating over key chunks, get next chunk given current chunk */
+static inline cicn_hash_key_t *
+next_key_buf (cicn_hashtb_h h, const cicn_hash_key_t * hk_cur)
+{
+ cicn_hash_key_t *hk_next;
+
+ if (hk_cur == NULL)
+ {
+ return (NULL);
+ }
+ hk_next = (hk_cur->kl.idx_next == CICN_HASH_INVALID_IDX) ? NULL :
+ pool_elt_at_index (h->ht_extra_keys, hk_cur->kl.idx_next);
+
+ return (hk_next);
+}
+
+/* Free an overflow key buffer; clear caller's pointer. */
+static void
+free_key_buf (cicn_hashtb_h h, cicn_hash_key_t ** pkey)
+{
+ cicn_hash_key_t *k = *pkey;
+
+ ASSERT (h->ht_keys_used > 0);
+
+ pool_put (h->ht_extra_keys, k);
+ h->ht_keys_used--;
+
+ *pkey = NULL;
+}
+
+/*
+ * Init, allocate a new hashtable
+ */
+int
+cicn_hashtb_alloc (cicn_hashtb_h * ph, uint32_t max_elems,
+ size_t app_data_size)
+{
+ int ret = EINVAL;
+ cicn_hashtb_h h = NULL;
+ uint32_t count;
+ size_t sz;
+ cicn_hash_node_t *nodep;
+ cicn_hash_bucket_t *bucket;
+ cicn_hash_key_t *hkey;
+
+ if (ph == NULL)
+ {
+ goto done;
+ }
+
+ if (max_elems < CICN_HASHTB_MIN_ENTRIES ||
+ max_elems > CICN_HASHTB_MAX_ENTRIES)
+ {
+ goto done;
+ }
+
+ /* Allocate and init main hashtable struct */
+ h = MEM_ALLOC (sizeof (cicn_hashtb_t));
+ if (h == NULL)
+ {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ memset (h, 0, sizeof (cicn_hashtb_t));
+
+ /* Compute main table bucket (row) count and size, and allocate */
+ count = ALIGN8 (CEIL (max_elems, CICN_HASHTB_FILL_FACTOR));
+
+ h->ht_bucket_count = count;
+
+ /* We _really_ expect to have buckets aligned on cache lines ... */
+ sz = sizeof (cicn_hash_bucket_t);
+ assert (sz == ALIGN64 (sz));
+
+ h->ht_buckets = MEM_ALLOC (count * sz);
+ if (h->ht_buckets == NULL)
+ {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ memset (h->ht_buckets, 0, count * sz);
+
+ /* First time through, compute offset to aligned extra data start in
+ * each node struct
+ * it's crucial that both the node struct (that the base hashtable uses)
+ * and the extra data area (that's also probably a struct) are aligned.
+ */
+ if (ht_node_data_offset_aligned == 0)
+ {
+ count = STRUCT_OFFSET_OF (cicn_hash_node_t, hn_data);
+ ht_node_data_offset_aligned = ALIGN8 (count);
+ }
+
+ // check app struct fits into space provided (CICN_HASH_NODE_APP_DATA_SIZE)
+ uint32_t ht_node_data_size;
+ ht_node_data_size = sizeof (cicn_hash_node_t) - ht_node_data_offset_aligned;
+ if (app_data_size > ht_node_data_size)
+ {
+ clib_error
+ ("cicn hashtable: fatal error: requested app data size(%u) > hashtb node's configured bytes available(%u)",
+ app_data_size, ht_node_data_size);
+ }
+
+ /*
+ * Compute entry node count and size, allocate
+ * Allocate/'Hide' the zero-th node so can use zero as an 'empty' value
+ */
+ pool_alloc_aligned (h->ht_nodes, max_elems, 8);
+ if (h->ht_nodes == NULL)
+ {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ pool_get_aligned (h->ht_nodes, nodep, 8); // alloc node 0
+ nodep = nodep; /* Silence 'not used' warning */
+
+ h->ht_node_count = max_elems;
+ h->ht_nodes_used = 1;
+
+ /*
+ * Compute overflow bucket count and size, allocate
+ */
+ count = ALIGN8 (CEIL (max_elems, CICN_HASHTB_OVERFLOW_FRACTION));
+
+ pool_alloc_aligned (h->ht_overflow_buckets, count, 8);
+ if (h->ht_overflow_buckets == NULL)
+ {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ /* 'Hide' the zero-th node so we can use zero as an 'empty' value */
+ pool_get_aligned (h->ht_overflow_buckets, bucket, 8);
+ bucket = bucket; /* Silence 'not used' warning */
+
+ h->ht_overflow_bucket_count = count;
+ h->ht_overflow_buckets_used = 1;
+
+ /*
+ * Compute overflow key buffer count and size, allocate
+ */
+ count = ALIGN8 (CEIL (max_elems, CICN_HASHTB_KEY_RATIO));
+
+ pool_alloc_aligned (h->ht_extra_keys, count, 8);
+ if (h->ht_extra_keys == NULL)
+ {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ /* 'Hide' the zero-th node so we can use zero as an 'empty' value */
+ pool_get_aligned (h->ht_extra_keys, hkey, 8);
+ hkey = hkey; /* Silence 'not used' warning */
+
+ h->ht_key_count = count;
+ h->ht_keys_used = 1;
+
+ /* Success */
+ ret = AOK;
+
+done:
+
+ if (h)
+ {
+ if ((ret == AOK) && ph)
+ {
+ *ph = h;
+ }
+ else
+ {
+ cicn_hashtb_free (&h);
+ }
+ }
+
+ return (ret);
+}
+
+/*
+ * Free, de-allocate a hashtable
+ */
+int
+cicn_hashtb_free (cicn_hashtb_h * ph)
+{
+ int ret = 0;
+
+ if (ph)
+ {
+ if ((*ph)->ht_extra_keys)
+ {
+ pool_free ((*ph)->ht_extra_keys);
+ (*ph)->ht_extra_keys = 0;
+ }
+ if ((*ph)->ht_nodes)
+ {
+ pool_free ((*ph)->ht_nodes);
+ (*ph)->ht_nodes = 0;
+ }
+ if ((*ph)->ht_overflow_buckets)
+ {
+ pool_free ((*ph)->ht_overflow_buckets);
+ (*ph)->ht_overflow_buckets = 0;
+ }
+ if ((*ph)->ht_buckets)
+ {
+ MEM_FREE ((*ph)->ht_buckets);
+ (*ph)->ht_buckets = 0;
+ }
+ MEM_FREE (*ph);
+
+ *ph = NULL;
+ }
+
+ return (ret);
+}
+
+/*
+ * Hash a bytestring, using siphash-2-4.
+ */
+uint64_t
+cicn_hashtb_hash_bytestring (const uint8_t * in, uint32_t inlen)
+{
+ return (cicn_siphash (in, inlen, cicn_default_sip_seed));
+}
+
+/*
+ * Hash a name, using siphash-2-4. TODO -- want a table handle here?
+ */
+uint64_t
+cicn_hashtb_hash_name (const uint8_t * key, uint32_t keylen)
+{
+ if (key == NULL || keylen < CICN_TLV_HDR_LEN)
+ {
+ return (-1LL);
+ }
+ return (cicn_siphash (&key[CICN_TLV_HDR_LEN], keylen - CICN_TLV_HDR_LEN,
+ cicn_default_sip_seed));
+}
+
+/*
+ * Hash a name, returning hash values of prefixes (for LPM, e.g.) in
+ * addition to (or instead of) hash of full name
+ * - Hash of prefixes (by necessity) and of full name (for consistency)
+ * skips the name header tlv and starts at the first name component tlv.
+ * - version using incremental hashing, i.e. a single pass over string
+ * reusing the results for hashing each prefix in calculating the
+ * hash of the following prefix (rather than re-hashing from the
+ * beginning of the bytestring for each successive prefix as in the
+ * nonincr version).
+ * Args:
+ * - is_full_name:
+ * - if true
+ * - 'name' points to the beginning of the entire name TLV;
+ * - calculate hash of entire name, as well as prefixes
+ * - if false
+ * - name to the first name-comp sub-tlv
+ * - not required to compute the full-name hash, though currently
+ * this version does compute full-name hash.
+ * TODO: is avoiding that full hash a worthwhile savings?
+ * - limit: if 'limit' > 0, limit prefixes to less than array size (8)
+ */
+static inline int
+cicn_hashtb_hash_prefixes_incr (const uint8_t * name, uint16_t namelen,
+ int is_full_name, cicn_prefix_hashinf_t * pfx,
+ int limit)
+{
+ int ret = AOK;
+
+ cicn_siphash_hi_t hi_state;
+ uint64_t cur_hash = 0;
+
+ int comp_offset; // offset (from name_val) of comp
+
+ /* Must point to something, and it must be at least as long
+ * as an empty name or name-comp
+ */
+ if ((name == NULL) || (namelen < CICN_TLV_HDR_LEN) || (pfx == NULL))
+ {
+ ret = EINVAL;
+ goto done;
+ }
+
+ /* Establish sane limit on number of comps */
+ if (limit == 0 || limit > CICN_HASHTB_MAX_NAME_COMPS)
+ {
+ limit = CICN_HASHTB_MAX_NAME_COMPS;
+ }
+ pfx->pfx_overflow = 0;
+
+ // Capture tlv pointer and len in the context struct
+ // If leading name tlv (packet vs. fib prefix), skip it
+ if (is_full_name)
+ {
+ pfx->pfx_ptr = name + CICN_TLV_HDR_LEN;
+ pfx->pfx_len = namelen - CICN_TLV_HDR_LEN;
+ }
+ else
+ {
+ /* Capture tlv pointer and len in the context struct */
+ pfx->pfx_ptr = name;
+ pfx->pfx_len = namelen;
+ }
+
+ cicn_siphash_hi_initialize (&hi_state, cicn_default_sip_seed);
+
+ int comp_flen; // len of comp incl. hdr
+ int pfx_idx; // index into returned prefix arrays
+ for (comp_offset = 0, pfx_idx = 0;
+ comp_offset < pfx->pfx_len; comp_offset += comp_flen, pfx_idx++)
+ {
+
+ const unsigned char *comp; // pointer to component record (hdr)
+ int comp_type; // type of component record (from component (sub)-TLV)
+ uint16_t comp_vlen; // len of comp excl. hdr (component name/value len)
+ int pfx_len; // len of pfx (equal to offset of following comp)
+
+ comp = &pfx->pfx_ptr[comp_offset];
+ C_GETINT16 (comp_type, &comp[0]);
+ C_GETINT16 (comp_vlen, &comp[CICN_TLV_TYPE_LEN]);
+ comp_flen = CICN_TLV_HDR_LEN + comp_vlen;
+
+ pfx_len = comp_offset + comp_flen;
+ if (pfx_len > pfx->pfx_len)
+ {
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (comp_type == CICN_NAME_COMP_CHUNK)
+ {
+ /* assume FIB entries dont include chunk#: terminate partial hashes
+ * and proceed to full name hash
+ *
+ * for now, only chunk# (above) ends partial hashing, i.e. do not
+ * rule out creating and matching on FIB entries that include
+ * non-NameComponent components (that precede chunk#).
+ */
+ comp_flen = -1; /* end indicator */
+ pfx_len = pfx->pfx_len;
+ }
+ else if (pfx_idx >= limit)
+ { /* Are we out of partial hash slots? */
+ /*
+ * - no room in arrays to save remaining hashes or, in
+ * fib lookup case, no reason to calculate remaining
+ * partial hashes
+ * - do one last hash covering full string, taking advantage
+ * of v_whole rather than starting from scratch, to save in
+ * name_hash as always (even though will not saved in array
+ * in overflow case).
+ * - re-check for overflow below, i.e. after efficient
+ * whole-string hash calculation, and break out of loop there
+ */
+ pfx->pfx_overflow = 1;
+ comp_flen = -1; /* end indicator */
+ pfx_len = pfx->pfx_len;
+ }
+
+ cur_hash =
+ cicn_siphash_hi_calculate (&hi_state, pfx->pfx_ptr, pfx_len,
+ comp_offset);
+
+ if (comp_flen < 0)
+ {
+ /*
+ * - No benefit to calculating more partial hashes.
+ * - In overflow case, no room to store results.
+ * - In chunk component case, partial hash not useful
+ * - Due to similar check above, full string's hash has been
+ * calculated, break out of loop with cur_hash and
+ * pfx_idx having right values for return arguments.
+ * Return appropriate rc:
+ * - if actually out of room, return overflow (callers may not
+ * need to know this)
+ * - otherwise, max requested depth (less than array size)
+ * reached, but that's okay (not currently used).
+ */
+ if (pfx_idx >= ARRAY_LEN (pfx->pfx_hashes))
+ {
+ ret = ENOSPC;
+ }
+ break;
+ }
+
+ pfx->pfx_lens[pfx_idx] = pfx_len;
+ pfx->pfx_hashes[pfx_idx] = cur_hash;
+ } /* for */
+
+ // pfx_idx is now count (idx+1) (normal loop exit happens after
+ // for-loop increment and loop breaks happen before next array entry
+ // is filled in
+ pfx->pfx_count = pfx_idx;
+
+ if (pfx_idx == 0)
+ { // case of empty name still has hash
+ cur_hash =
+ cicn_siphash_hi_calculate (&hi_state, pfx->pfx_ptr, pfx->pfx_len,
+ comp_offset);
+ }
+
+ pfx->pfx_full_hash = cur_hash;
+
+done:
+ return (ret);
+}
+
+int
+cicn_hashtb_hash_prefixes (const uint8_t * name, uint16_t namelen,
+ int is_full_name, cicn_prefix_hashinf_t * pfx,
+ int limit)
+{
+ int ret;
+
+ ret =
+ cicn_hashtb_hash_prefixes_incr (name, namelen, is_full_name, pfx, limit);
+
+ return (ret);
+}
+
+/*
+ * Basic api to lookup a specific hash+key tuple. This does the entire
+ * lookup operation, retrieving node structs and comparing keys,
+ * so it's not optimized for prefetching or high performance.
+ *
+ * Returns zero and mails back a node on success, errno otherwise.
+ */
+int
+cicn_hashtb_lookup_node (cicn_hashtb_h h, const uint8_t * key,
+ uint32_t keylen, uint64_t hashval,
+ cicn_hash_node_t ** nodep)
+{
+ return (cicn_hashtb_lookup_node_ex
+ (h, key, keylen, hashval, FALSE /*deleted nodes */ , nodep));
+}
+
+/*
+ * Extended api to lookup a specific hash+key tuple. The implementation
+ * allows the caller to locate nodes that are marked for deletion, which
+ * is part of some hashtable applications, such as the FIB.
+ *
+ * This does the entire lookup operation, retrieving node structs
+ * and comparing keys, so it's not optimized for prefetching or high performance.
+ *
+ * Returns zero and mails back a node on success, errno otherwise.
+ */
+int
+cicn_hashtb_lookup_node_ex (cicn_hashtb_h h, const uint8_t * key,
+ uint32_t keylen, uint64_t hashval,
+ int include_deleted_p, cicn_hash_node_t ** nodep)
+{
+ int i, ret = EINVAL;
+ int found_p = FALSE;
+ uint32_t bidx;
+ cicn_hash_bucket_t *bucket;
+ cicn_hash_node_t *node;
+
+ /* Check args */
+ if ((h == NULL) || (key == NULL) || (keylen == 0))
+ {
+ goto done;
+ }
+
+ /* Use some bits of the low half of the hash
+ * to locate a row/bucket in the table
+ */
+ bidx = (hashval & (h->ht_bucket_count - 1));
+
+ bucket = h->ht_buckets + bidx;
+
+ /* Check the entries in the bucket for matching hash value */
+
+loop_buckets:
+
+ for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++)
+ {
+
+ /* If an entry is marked for deletion, ignore it unless the caller
+ * explicitly wants these nodes.
+ */
+ if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED)
+ {
+ if (!include_deleted_p)
+ {
+ continue;
+ }
+ }
+
+ /* Be prepared to continue to an overflow bucket if necessary.
+ * We only expect the last entry in a bucket to refer to an overflow
+ * bucket...
+ */
+ if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1) &&
+ bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW)
+ {
+ bucket = pool_elt_at_index (h->ht_overflow_buckets,
+ bucket->hb_entries[i].he_node);
+ goto loop_buckets;
+ }
+
+ if (bucket->hb_entries[i].he_msb64 == hashval)
+ {
+ /* Found a candidate - must retrieve the actual node and
+ * check the key.
+ */
+ node = pool_elt_at_index (h->ht_nodes,
+ bucket->hb_entries[i].he_node);
+
+ /* Check the key itself; we've already checked the hash value */
+ ASSERT (node->hn_hash == hashval);
+
+ if (key && (keylen > 0))
+ {
+ if (keylen != node->hn_keysize)
+ {
+ continue;
+ }
+
+ if (keylen <= CICN_HASH_KEY_BYTES)
+ {
+ if (memcmp (key, node->hn_key.ks.key, keylen) != 0)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ int key_bseen; // bytes processed/compared equal
+ int kc_bytes;
+ const cicn_hash_key_t *kc; // key chunks
+ for (kc = &node->hn_key, key_bseen = 0; kc != NULL;
+ kc = next_key_buf (h, kc), key_bseen += kc_bytes)
+ {
+ int key_bleft = keylen - key_bseen;
+ kc_bytes = (key_bleft <= CICN_HASH_KEY_LIST_BYTES) ?
+ key_bleft : CICN_HASH_KEY_LIST_BYTES;
+ if (memcmp (&key[key_bseen], kc->kl.key, kc_bytes))
+ {
+ break; // key chunk didn't match
+ }
+ }
+ if (kc != NULL)
+ { // found a mismatch before end of key
+ continue;
+ }
+ }
+ }
+
+ /* Found a match */
+ if (nodep)
+ {
+ *nodep = node;
+ }
+
+ found_p = TRUE;
+ break;
+ }
+ }
+
+ if (found_p)
+ {
+ ret = AOK;
+ }
+ else
+ {
+ ret = ENOENT;
+ }
+
+done:
+
+ return (ret);
+}
+
+/* Compute hash node index from node pointer */
+#define NODE_IDX_FROM_NODE(p, h) \
+ ((p) - ((h)->ht_nodes))
+
+/*
+ * Utility to init a new entry in a hashtable bucket/row. We use this
+ * to add new a node+hash, and to clear out an entry during removal.
+ */
+void
+cicn_hashtb_init_entry (cicn_hash_entry_t * entry, uint32_t nodeidx,
+ uint64_t hashval)
+{
+ entry->he_msb64 = hashval;
+ entry->he_node = nodeidx;
+
+ /* Clear out some other fields in the entry */
+ entry->he_flags = 0;
+ entry->he_timeout = 0;
+}
+
+/*
+ * Insert a node into the hashtable. We expect the caller has a) computed
+ * the hash value to use, b) initialized the node with the hash and key info,
+ * and c) filled in its app-specific data portion of the node.
+ */
+int
+cicn_hashtb_insert (cicn_hashtb_h h, cicn_hash_node_t * node)
+{
+ int i, ret = EINVAL;
+ uint32_t bidx;
+ cicn_hash_bucket_t *bucket, *newbkt;
+ int use_seven;
+
+ if (h == NULL)
+ {
+ goto done;
+ }
+
+ /* Use some bits of the low half of the hash
+ * to locate a row/bucket in the table
+ */
+ bidx = (node->hn_hash & (h->ht_bucket_count - 1));
+
+ bucket = h->ht_buckets + bidx;
+
+ use_seven = (h->ht_flags & CICN_HASHTB_FLAG_USE_SEVEN);
+
+ /* Locate a free entry slot in the bucket */
+
+loop_buckets:
+
+ for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++)
+ {
+
+ /*
+ * If an entry is marked for deletion, ignore it
+ */
+ if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED)
+ {
+ continue;
+ }
+
+ if ((bucket->hb_entries[i].he_msb64 == 0LL) &&
+ (bucket->hb_entries[i].he_node == 0))
+ {
+ /* Found a candidate -- fill it in */
+
+ /* Special case if the application asked not to use the last
+ * entry in each bucket.
+ */
+ if ((i != (CICN_HASHTB_BUCKET_ENTRIES - 1)) || !use_seven)
+ {
+
+ cicn_hashtb_init_entry (&(bucket->hb_entries[i]),
+ NODE_IDX_FROM_NODE (node, h),
+ node->hn_hash);
+
+ ret = AOK;
+ break;
+ }
+ }
+
+ /* Be prepared to continue to an overflow bucket if necessary,
+ * or to add a new overflow bucket.
+ * We only expect the last entry in a bucket to refer to an overflow
+ * bucket...
+ */
+ if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1))
+ {
+ if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW)
+ {
+
+ /* Existing overflow bucket - re-start the search loop */
+ bucket = pool_elt_at_index (h->ht_overflow_buckets,
+ bucket->hb_entries[i].he_node);
+ goto loop_buckets;
+
+ }
+ else
+ {
+ /* Overflow - reached the end of a bucket without finding a
+ * free entry slot. Need to allocate an overflow bucket, and
+ * connect it to this bucket.
+ */
+ newbkt = alloc_overflow_bucket (h);
+ if (newbkt == NULL)
+ {
+ ret = ENOMEM;
+ goto done;
+ }
+
+ /* We're touching some more bytes than we absolutely have to
+ * here, but ... that seems ok.
+ */
+ memset (newbkt, 0, sizeof (cicn_hash_bucket_t));
+
+ if (!use_seven)
+ {
+ /* Copy existing entry into new bucket - we really expect
+ * these to be properly aligned so they can be treated as
+ * ints. TODO -- do in 8-byte assignments?
+ */
+ memcpy (&(newbkt->hb_entries[0]),
+ &(bucket->hb_entries[i]),
+ sizeof (cicn_hash_entry_t));
+ }
+
+ /* Connect original bucket to the index of the
+ * new overflow bucket
+ */
+ bucket->hb_entries[i].he_flags |= CICN_HASH_ENTRY_FLAG_OVERFLOW;
+ bucket->hb_entries[i].he_node =
+ (newbkt - h->ht_overflow_buckets);
+
+ /* Add new entry to new overflow bucket */
+ bucket = newbkt;
+
+ /* Use entry [1] in the new bucket _if_ we just copied into
+ * entry [zero] above.
+ */
+ if (!use_seven)
+ {
+
+ cicn_hashtb_init_entry (&(bucket->hb_entries[1]),
+ NODE_IDX_FROM_NODE (node, h),
+ node->hn_hash);
+ }
+ else
+ {
+
+ cicn_hashtb_init_entry (&(bucket->hb_entries[0]),
+ NODE_IDX_FROM_NODE (node, h),
+ node->hn_hash);
+ }
+
+ /* And we're done with the overflow bucket */
+ ret = AOK;
+ break;
+ }
+ }
+ }
+
+done:
+
+ return (ret);
+}
+
+/*
+ * Delete a node from a hashtable using the node itself, and delete/free
+ * the node. Caller's pointer is cleared on success.
+ */
+int
+cicn_hashtb_delete (cicn_hashtb_h h, cicn_hash_node_t ** pnode)
+{
+ int ret = EINVAL;
+
+ if ((h == NULL) || (pnode == NULL) || (*pnode == NULL))
+ {
+ goto done;
+ }
+
+ ret = cicn_hashtb_remove_node (h, *pnode);
+ if (ret == AOK)
+ {
+ cicn_hashtb_free_node (h, *pnode);
+ *pnode = NULL;
+ }
+
+done:
+ return (ret);
+}
+
+/*
+ * Delete an entry from a hashtable using the node itself
+ */
+int
+cicn_hashtb_remove_node (cicn_hashtb_h h, cicn_hash_node_t * node)
+{
+ int i, count, ret = EINVAL;
+ uint32_t bidx, overflow_p, nodeidx;
+ cicn_hash_bucket_t *bucket, *parent;
+
+ if ((h == NULL) || (node == NULL))
+ {
+ goto done;
+ }
+
+ /* Use some bits of the low half of the hash
+ * to locate a row/bucket in the table
+ */
+ bidx = (node->hn_hash & (h->ht_bucket_count - 1));
+
+ nodeidx = NODE_IDX_FROM_NODE (node, h);
+
+ bucket = h->ht_buckets + bidx;
+
+ overflow_p = FALSE;
+
+loop_buckets:
+
+ for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++)
+ {
+ /* Note that we do consider entries that are marked for
+ * delete here, unlike some other operations.
+ */
+
+ /* Be prepared to continue to an overflow bucket if necessary.
+ * We only expect the last entry in a bucket to refer to an overflow
+ * bucket...
+ */
+ if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1) &&
+ bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW)
+ {
+
+ bucket = pool_elt_at_index (h->ht_overflow_buckets,
+ bucket->hb_entries[i].he_node);
+
+ overflow_p = TRUE;
+
+ goto loop_buckets;
+ }
+
+ /* Look for an entry carrying this node index */
+ if (nodeidx == bucket->hb_entries[i].he_node)
+ {
+ ret = AOK;
+ break;
+ }
+ }
+
+ /* If we didn't find the matching entry, we're done */
+ if (ret != AOK)
+ {
+ ret = ENOENT;
+ goto done;
+ }
+
+ /* Clear out the entry. */
+ cicn_hashtb_init_entry (&(bucket->hb_entries[i]), 0, 0LL);
+
+ if (!overflow_p)
+ {
+ /* And we're done, in the easy case where we didn't change an
+ * overflow bucket
+ */
+ goto done;
+ }
+
+ /* The special case: if this is the last remaining entry in an
+ * overflow bucket, liberate the bucket. That in turn has a special
+ * case if this bucket is in the middle of a chain of overflow buckets.
+ *
+ * Note that we're not trying aggressively (yet) to condense buckets
+ * at every possible opportunity.
+ */
+
+ /* Reset this flag; we'll set it again if this bucket links to another */
+ overflow_p = FALSE;
+
+ for (i = 0, count = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++)
+ {
+ if (bucket->hb_entries[i].he_node != 0)
+ {
+ count++;
+ }
+
+ if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1) &&
+ (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW))
+ {
+ count--; /* Doesn't count as a 'real' entry */
+ overflow_p = TRUE;
+ }
+ }
+
+ if (count > 0)
+ {
+ /* Still a (real) entry in the row */
+ goto done;
+ }
+
+ /* Need to locate the predecessor of 'bucket': start at
+ * the beginning of the chain of buckets and move forward
+ */
+ bidx = (node->hn_hash & (h->ht_bucket_count - 1));
+
+ for (parent = h->ht_buckets + bidx; parent != NULL;)
+ {
+
+ if ((parent->hb_entries[(CICN_HASHTB_BUCKET_ENTRIES - 1)].he_flags &
+ CICN_HASH_ENTRY_FLAG_OVERFLOW) == 0)
+ {
+ parent = NULL;
+ break;
+ }
+
+ bidx = parent->hb_entries[(CICN_HASHTB_BUCKET_ENTRIES - 1)].he_node;
+
+ if (pool_elt_at_index (h->ht_overflow_buckets, bidx) == bucket)
+ {
+ /* Found the predecessor of 'bucket'. If 'bucket' has a successor,
+ * connect 'parent' to it, and take 'bucket out of the middle.
+ */
+ if (overflow_p)
+ {
+ parent->hb_entries[(CICN_HASHTB_BUCKET_ENTRIES - 1)].he_node =
+ bucket->hb_entries[(CICN_HASHTB_BUCKET_ENTRIES - 1)].he_node;
+ }
+ else
+ {
+ /* Just clear the predecessor entry pointing at 'bucket' */
+ cicn_hashtb_init_entry (&parent->hb_entries
+ [(CICN_HASHTB_BUCKET_ENTRIES - 1)], 0,
+ 0LL);
+ }
+
+ break;
+ }
+
+ /* After the first iteration, 'parent' will be an overflow bucket too */
+ parent = pool_elt_at_index (h->ht_overflow_buckets, bidx);
+ }
+
+ /* We really expect to have found the predecessor */
+ ASSERT (parent != NULL);
+
+ /* And now, finally, we can put 'bucket' back on the free list */
+ free_overflow_bucket (h, &bucket);
+
+done:
+
+ return (ret);
+}
+
+/*
+ * Prepare a hashtable node, supplying the key, and computed hash info.
+ */
+int
+cicn_hashtb_init_node (cicn_hashtb_h h, cicn_hash_node_t * node,
+ uint64_t hashval, const uint8_t * key, uint32_t keylen)
+{
+ int ret = EINVAL;
+ uint32_t keyidx;
+ cicn_hash_key_t *hk, *prevhk;
+
+ assert (h != NULL);
+ assert (node != NULL);
+
+ if ((h == NULL) || (node == NULL))
+ {
+ goto done;
+ }
+
+ /* Init the node struct */
+ node->hn_hash = hashval;
+ node->hn_flags = CICN_HASH_NODE_FLAGS_DEFAULT;
+ node->hn_keysize = 0;
+
+ if (key && (keylen > 0))
+ {
+ if (keylen > CICN_PARAM_HASHTB_KEY_BYTES_MAX)
+ {
+ /* Whoops - key is too darn big */
+ ret = EINVAL;
+ goto done;
+ }
+
+ node->hn_keysize = keylen;
+
+ if (keylen <= CICN_HASH_KEY_BYTES)
+ {
+ /* Use the node's embedded key buffer */
+ memcpy (node->hn_key.ks.key, key, keylen);
+ }
+ else
+ {
+ /*
+ * Key is too large for the embedded buffer alone;
+ * must use a chain of key buffers; we capture the chain
+ * by index.
+ */
+ prevhk = NULL;
+ hk = &(node->hn_key);
+
+ do
+ {
+
+ /* Put new key buf index into previous buf */
+ if (prevhk)
+ {
+ /* Compute index of new key buf */
+ keyidx = hk - h->ht_extra_keys;
+ prevhk->kl.idx_next = keyidx;
+ }
+
+ /* Copy as much key material as we can */
+ if (keylen > CICN_HASH_KEY_LIST_BYTES)
+ {
+ memcpy (hk->kl.key, key, CICN_HASH_KEY_LIST_BYTES);
+ key += CICN_HASH_KEY_LIST_BYTES;
+ keylen -= CICN_HASH_KEY_LIST_BYTES;
+ }
+ else
+ {
+ /* Last piece of the key */
+ memcpy (hk->kl.key, key, keylen);
+ keylen = 0;
+
+ /* Terminate the chain of key buffers */
+ hk->kl.idx_next = CICN_HASH_INVALID_IDX;
+ break;
+ }
+
+ prevhk = hk;
+
+ hk = alloc_key_buf (h);
+
+ }
+ while (hk);
+
+ if (keylen > 0)
+ {
+ /* Whoops - failed to get enough key buffers */
+ ret = ENOMEM;
+ goto done;
+ }
+ }
+ }
+
+ ret = AOK;
+
+done:
+
+ return (ret);
+}
+
+/*
+ * Release a hashtable node back to the free list when an entry is cleared
+ */
+void
+cicn_hashtb_free_node (cicn_hashtb_h h, cicn_hash_node_t * node)
+{
+ uint32_t klen, keyidx;
+ cicn_hash_key_t *keyp;
+
+ ASSERT (h->ht_nodes_used > 0);
+
+ /* If there is a chain of key structs, need to free them also */
+ if (node->hn_keysize > CICN_HASH_KEY_BYTES)
+ {
+
+ /* Remaining key bytes (for consistency check) */
+ klen = node->hn_keysize - CICN_HASH_KEY_LIST_BYTES;
+
+ /* Walk along the chain of key buffers until we reach the end */
+ for (keyidx = node->hn_key.kl.idx_next; keyidx != CICN_HASH_INVALID_IDX;
+ /* can't step in iterator: keyp already freed */ )
+ {
+
+ keyp = pool_elt_at_index (h->ht_extra_keys, keyidx);
+ if (!keyp)
+ { // should not happen given valid keyidx
+ break;
+ }
+ keyidx = keyp->kl.idx_next;
+
+ /* Return the key buf to the free pool */
+ free_key_buf (h, &keyp);
+
+ /* Consistency checks on klen */
+ if (klen > CICN_HASH_KEY_LIST_BYTES)
+ {
+ klen -= CICN_HASH_KEY_LIST_BYTES;
+ ASSERT (keyidx != CICN_HASH_INVALID_IDX);
+ }
+ else
+ {
+ klen = 0;
+ ASSERT (keyidx == CICN_HASH_INVALID_IDX);
+ }
+ }
+ }
+
+ /* Return 'node' to the free list */
+ pool_put (h->ht_nodes, node);
+ h->ht_nodes_used--;
+
+}
+
+/*
+ * Walk a hashtable, iterating through the nodes, keeping context in 'ctx'.
+ */
+int
+cicn_hashtb_next_node (cicn_hashtb_h h, cicn_hash_node_t ** pnode,
+ uint64_t * ctx)
+{
+ int i, j, ret = EINVAL;
+ uint32_t bidx, entry;
+ cicn_hash_bucket_t *bucket;
+
+ if ((h == NULL) || (pnode == NULL) || (ctx == NULL))
+ {
+ goto done;
+ }
+
+ /* Special-case for new iteration */
+ if (*ctx == CICN_HASH_WALK_CTX_INITIAL)
+ {
+ bidx = 0;
+ bucket = &h->ht_buckets[0];
+ entry = 0;
+ j = 0;
+ i = 0;
+ goto search_table;
+ }
+
+ /* Convert context to bucket and entry indices */
+ bidx = *ctx & 0xffffffffLL;
+ entry = *ctx >> 32;
+
+ if (bidx >= h->ht_bucket_count)
+ {
+ ret = ENOENT;
+ goto done;
+ }
+
+ bucket = h->ht_buckets + bidx;
+
+ /* Init total index into entries (includes fixed bucket and overflow) */
+ j = 0;
+
+
+skip_processed_bucket_chunks:
+ /* Figure out where to resume the search for the next entry in
+ * the table, by trying to find the last entry returned, from the cookie.
+ * Loop walks one (regular or overflow) bucket chunk, label is used for
+ * walking chain of chunks.
+ * Note that if there was a deletion or an addition that created an
+ * overflow, iterator can skip entries or return duplicate entries,
+ * for entries that are present from before the walk starts until
+ * after it ends.
+ * TODO: Add mechanism to break at bucket row boundaries, to avoid
+ * skip/duplication of entries not changed during walk.
+ */
+
+ for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++, j++)
+ {
+ if (j > entry)
+ {
+ /*
+ * Start search for next here, use existing 'bucket' and 'i'
+ */
+ break;
+ }
+
+ /*
+ * If an entry is marked for deletion, ignore it
+ */
+ if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED)
+ {
+ continue;
+ }
+
+ /* Be prepared to continue to an overflow bucket if necessary.
+ * (We only expect the last entry in a bucket to refer to an overflow
+ * bucket...)
+ */
+ if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1))
+ {
+ if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW)
+ {
+ bucket = pool_elt_at_index (h->ht_overflow_buckets,
+ bucket->hb_entries[i].he_node);
+
+ /* Increment overall entry counter 'j' */
+ j++;
+
+ goto skip_processed_bucket_chunks;
+ }
+
+ /* end of row (end of fixed bucket plus any overflows) */
+ i = 0;
+ j = 0;
+
+ bidx++;
+
+ /* Special case - we're at the end */
+ if (bidx >= h->ht_bucket_count)
+ {
+ ret = ENOENT;
+ goto done;
+ }
+ bucket = h->ht_buckets + bidx;
+ break;
+ }
+ }
+
+search_table:
+
+ /* Now we're searching through the table for the next entry that's set */
+
+ for (; i < CICN_HASHTB_BUCKET_ENTRIES; i++, j++)
+ {
+ /*
+ * If an entry is marked for deletion, ignore it
+ */
+ if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED)
+ {
+ continue;
+ }
+
+ /* Is this entry set? */
+ if (bucket->hb_entries[i].he_node != 0)
+ {
+
+ /* Retrieve the node struct */
+ *pnode = pool_elt_at_index (h->ht_nodes,
+ bucket->hb_entries[i].he_node);
+
+ /* Set 'entry' as we exit, so we can update the cookie */
+ entry = j;
+ ret = AOK;
+ break;
+ }
+
+ /* Be prepared to continue to an overflow bucket if necessary.
+ * (We only expect the last entry in a bucket to refer to an overflow
+ * bucket...)
+ */
+ if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1))
+ {
+ if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW)
+ {
+ bucket = pool_elt_at_index (h->ht_overflow_buckets,
+ bucket->hb_entries[i].he_node);
+ /* Reset per-bucket index 'i', here (not done in iterator) */
+ i = 0;
+ /* Increment overall entry counter 'j' */
+ j++;
+
+ goto search_table;
+ }
+ else
+ {
+ /* Move to next bucket, resetting per-bucket and overall
+ * entry indexes
+ */
+ i = 0;
+ j = 0;
+
+ bidx++;
+
+ /* Special case - we're at the end */
+ if (bidx >= h->ht_bucket_count)
+ {
+ ret = ENOENT;
+ goto done;
+ }
+
+ bucket = h->ht_buckets + bidx;
+ goto search_table;
+ }
+ }
+ }
+
+done:
+
+ if (ret == AOK)
+ {
+ /* Update context */
+ *ctx = bidx;
+ *ctx |= ((uint64_t) entry << 32);
+ }
+
+ return (ret);
+}
+
+/*
+ * Update the per-entry compression expiration value for a hashtable node.
+ */
+int
+cicn_hashtb_entry_set_expiration (cicn_hashtb_h h,
+ cicn_hash_node_t * node,
+ uint16_t entry_timeout, uint8_t entry_flags)
+{
+ int i, ret = EINVAL;
+ uint32_t bidx, nodeidx;
+ cicn_hash_bucket_t *bucket;
+
+ if ((h == NULL) || (node == NULL))
+ {
+ goto done;
+ }
+
+ /* Use some bits of the low half of the hash
+ * to locate a row/bucket in the table
+ */
+ bidx = (node->hn_hash & (h->ht_bucket_count - 1));
+
+ nodeidx = NODE_IDX_FROM_NODE (node, h);
+
+ bucket = h->ht_buckets + bidx;
+
+loop_buckets:
+
+ for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++)
+ {
+ /*
+ * If an entry is marked for deletion, ignore it
+ */
+ if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED)
+ {
+ continue;
+ }
+
+ /* Be prepared to continue to an overflow bucket if necessary.
+ * We only expect the last entry in a bucket to refer to an overflow
+ * bucket...
+ */
+ if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1) &&
+ bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW)
+ {
+
+ bucket = pool_elt_at_index (h->ht_overflow_buckets,
+ bucket->hb_entries[i].he_node);
+
+ goto loop_buckets;
+ }
+
+ /* Look for an entry carrying this node index */
+ if (nodeidx == bucket->hb_entries[i].he_node)
+ {
+ ret = AOK;
+ break;
+ }
+ }
+
+ /* If we didn't find the matching entry, we're done */
+ if (ret != AOK)
+ {
+ ret = ENOENT;
+ goto done;
+ }
+
+ /* Update the entry. */
+ bucket->hb_entries[i].he_timeout = entry_timeout;
+ if (entry_flags & CICN_HASH_ENTRY_FLAG_FAST_TIMEOUT)
+ {
+ bucket->hb_entries[i].he_flags |= CICN_HASH_ENTRY_FLAG_FAST_TIMEOUT;
+ }
+ else
+ {
+ bucket->hb_entries[i].he_flags &= ~CICN_HASH_ENTRY_FLAG_FAST_TIMEOUT;
+ }
+
+done:
+ return (ret);
+}
+
+int
+cicn_hashtb_key_to_buf (u8 ** vec_res, cicn_hashtb_h h,
+ const cicn_hash_node_t * node)
+{
+ int ret = AOK;
+ u8 *vec = *vec_res;
+
+ if (node->hn_keysize <= CICN_HASH_KEY_BYTES)
+ {
+ vec_add (vec, node->hn_key.ks.key, node->hn_keysize);
+ goto success;
+ }
+
+ const cicn_hash_key_t *kc;
+ for (kc = &node->hn_key; kc != NULL; kc = next_key_buf (h, kc))
+ {
+
+ int key_bleft = node->hn_keysize - vec_len (vec);
+ int kc_bytes = (key_bleft <= CICN_HASH_KEY_LIST_BYTES) ?
+ key_bleft : CICN_HASH_KEY_LIST_BYTES;
+ vec_add (vec, kc->kl.key, kc_bytes);
+ }
+
+success:
+
+ *vec_res = vec;
+ return (ret);
+}
+
+int
+cicn_hashtb_key_to_str (cicn_hashtb_h h, const cicn_hash_node_t * node,
+ char *buf, int bufsize, int must_fit)
+{
+ int ret = EINVAL;
+
+ u8 *kvec = NULL;
+ int bstr_len;
+
+ ret = cicn_hashtb_key_to_buf (&kvec, h, node);
+
+ if (h->ht_flags & CICN_HASHTB_FLAG_KEY_FMT_PFX)
+ {
+ ret =
+ cicn_parse_prefix_to_str (buf, bufsize, kvec, vec_len (kvec),
+ &bstr_len);
+ }
+ else if (h->ht_flags & CICN_HASHTB_FLAG_KEY_FMT_NAME)
+ {
+ ret =
+ cicn_parse_name_to_str (buf, bufsize, kvec, vec_len (kvec),
+ &bstr_len);
+ }
+ else
+ {
+ ret = EINVAL; // should never happen
+ }
+ if (ret != AOK)
+ {
+ goto err;
+ }
+
+ if (bstr_len >= bufsize)
+ {
+ if (must_fit)
+ {
+ ret = ENOSPC;
+ goto err;
+ }
+ if (bufsize < 4)
+ {
+ ret = ENOSPC;
+ goto err;
+ }
+ snprintf (&buf[bufsize - 4], 4, "...");
+ }
+
+ ret = AOK;
+
+err:
+ vec_free (kvec);
+ /* Not totally sure that I've got the to_str perfect - belt-and-susp. */
+ buf[bufsize - 1] = '\000';
+ return (ret);
+}