From dfd7ce27fea04c1a76844e21286c2b1d6653e153 Mon Sep 17 00:00:00 2001 From: Jim Gibson Date: Mon, 20 Feb 2017 11:53:54 -0500 Subject: Initial Commit: VPP cicn VPP plugin Change-Id: If1b965f0a4b7cfacda8f6caf6925072a9007ffb4 Signed-off-by: Jim Gibson --- cicn-plugin/cicn/cicn_hashtb.c | 1567 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1567 insertions(+) create mode 100644 cicn-plugin/cicn/cicn_hashtb.c (limited to 'cicn-plugin/cicn/cicn_hashtb.c') diff --git a/cicn-plugin/cicn/cicn_hashtb.c b/cicn-plugin/cicn/cicn_hashtb.c new file mode 100644 index 00000000..e8b344fe --- /dev/null +++ b/cicn-plugin/cicn/cicn_hashtb.c @@ -0,0 +1,1567 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * cicn_hashtb.c: Fast-path, vpp-aware hashtable, the base for the PIT/CS and FIB + * used in the cicn forwarder. + * + * - As is the case in other areas, we can't share headers between the vpp code + * and the cicn/cndn code: there are conflicting definitions. + */ + +#include +#include +#include +#include + +#include +#include + +#include "cicn_hashtb.h" +#include "cicn_siphash.h" /* Inline implementation of siphash-2-4 */ +#include "cicn_parser.h" + +/* return dvd/dvr, rounded up (intended for integer values) */ +#define CEIL(dvd, dvr) \ +({ \ + __typeof__ (dvd) _dvd = (dvd); \ + __typeof__ (dvr) _dvr = (dvr); \ + (_dvd + _dvr - 1)/_dvr; \ +}) + +#ifndef ALIGN8 +#define ALIGN8(p) (((p) + 0x7) & ~(0x7)) +#endif + +#ifndef ALIGNPTR8 +#define ALIGNPTR8(p) ((void *)(((uint8_t * )(p) + 0x7) & ~(0x7))) +#endif + +#ifndef ALIGN64 +#define ALIGN64(p) (((p) + 0x3f) & ~(0x3f)) +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +/* Default hash seed for now; TODO: needs to be random/per-box eventually */ +unsigned char cicn_default_sip_seed[16] = { + 0x12, 0x34, 0x56, 0x78, 0x98, 0x76, 0x54, 0x32, + 0x12, 0x34, 0x56, 0x78, 0x98, 0x76, 0x54, 0x32, +}; + +/* Offset to aligned start of additional data (PIT/CS, FIB) + * embedded in each node. + */ +uint32_t ht_node_data_offset_aligned; + +/* Some support for posix vs vpp mem management */ +#define MEM_ALLOC(x) clib_mem_alloc_aligned((x), 8) +#define MEM_FREE(p) clib_mem_free((p)) + +/* + * Internal utilities + */ + +/* Allocate an overflow bucket */ +static cicn_hash_bucket_t * +alloc_overflow_bucket (cicn_hashtb_h h) +{ + cicn_hash_bucket_t *newbkt = NULL; + + if (h->ht_overflow_buckets_used < h->ht_overflow_bucket_count) + { + pool_get_aligned (h->ht_overflow_buckets, newbkt, 8); + + if (newbkt) + { + h->ht_overflow_buckets_used++; + } + } + + return (newbkt); +} + +/* Free an overflow bucket; clear caller's pointer */ +static void +free_overflow_bucket (cicn_hashtb_h h, cicn_hash_bucket_t ** pb) +{ + cicn_hash_bucket_t *bkt = *pb; + + ASSERT (h->ht_overflow_buckets_used > 0); + + pool_put (h->ht_overflow_buckets, bkt); + h->ht_overflow_buckets_used--; + *pb = NULL; +} + +/* Allocate an overflow key buffer */ +static cicn_hash_key_t * +alloc_key_buf (cicn_hashtb_h h) +{ + cicn_hash_key_t *hk = NULL; + + if (h->ht_keys_used < h->ht_key_count) + { + pool_get_aligned (h->ht_extra_keys, hk, 8); + if (hk) + { + h->ht_keys_used++; + } + } + + return (hk); +} + +/* for iterating over key chunks, get next chunk given current chunk */ +static inline cicn_hash_key_t * +next_key_buf (cicn_hashtb_h h, const cicn_hash_key_t * hk_cur) +{ + cicn_hash_key_t *hk_next; + + if (hk_cur == NULL) + { + return (NULL); + } + hk_next = (hk_cur->kl.idx_next == CICN_HASH_INVALID_IDX) ? NULL : + pool_elt_at_index (h->ht_extra_keys, hk_cur->kl.idx_next); + + return (hk_next); +} + +/* Free an overflow key buffer; clear caller's pointer. */ +static void +free_key_buf (cicn_hashtb_h h, cicn_hash_key_t ** pkey) +{ + cicn_hash_key_t *k = *pkey; + + ASSERT (h->ht_keys_used > 0); + + pool_put (h->ht_extra_keys, k); + h->ht_keys_used--; + + *pkey = NULL; +} + +/* + * Init, allocate a new hashtable + */ +int +cicn_hashtb_alloc (cicn_hashtb_h * ph, uint32_t max_elems, + size_t app_data_size) +{ + int ret = EINVAL; + cicn_hashtb_h h = NULL; + uint32_t count; + size_t sz; + cicn_hash_node_t *nodep; + cicn_hash_bucket_t *bucket; + cicn_hash_key_t *hkey; + + if (ph == NULL) + { + goto done; + } + + if (max_elems < CICN_HASHTB_MIN_ENTRIES || + max_elems > CICN_HASHTB_MAX_ENTRIES) + { + goto done; + } + + /* Allocate and init main hashtable struct */ + h = MEM_ALLOC (sizeof (cicn_hashtb_t)); + if (h == NULL) + { + ret = ENOMEM; + goto done; + } + + memset (h, 0, sizeof (cicn_hashtb_t)); + + /* Compute main table bucket (row) count and size, and allocate */ + count = ALIGN8 (CEIL (max_elems, CICN_HASHTB_FILL_FACTOR)); + + h->ht_bucket_count = count; + + /* We _really_ expect to have buckets aligned on cache lines ... */ + sz = sizeof (cicn_hash_bucket_t); + assert (sz == ALIGN64 (sz)); + + h->ht_buckets = MEM_ALLOC (count * sz); + if (h->ht_buckets == NULL) + { + ret = ENOMEM; + goto done; + } + + memset (h->ht_buckets, 0, count * sz); + + /* First time through, compute offset to aligned extra data start in + * each node struct + * it's crucial that both the node struct (that the base hashtable uses) + * and the extra data area (that's also probably a struct) are aligned. + */ + if (ht_node_data_offset_aligned == 0) + { + count = STRUCT_OFFSET_OF (cicn_hash_node_t, hn_data); + ht_node_data_offset_aligned = ALIGN8 (count); + } + + // check app struct fits into space provided (CICN_HASH_NODE_APP_DATA_SIZE) + uint32_t ht_node_data_size; + ht_node_data_size = sizeof (cicn_hash_node_t) - ht_node_data_offset_aligned; + if (app_data_size > ht_node_data_size) + { + clib_error + ("cicn hashtable: fatal error: requested app data size(%u) > hashtb node's configured bytes available(%u)", + app_data_size, ht_node_data_size); + } + + /* + * Compute entry node count and size, allocate + * Allocate/'Hide' the zero-th node so can use zero as an 'empty' value + */ + pool_alloc_aligned (h->ht_nodes, max_elems, 8); + if (h->ht_nodes == NULL) + { + ret = ENOMEM; + goto done; + } + + pool_get_aligned (h->ht_nodes, nodep, 8); // alloc node 0 + nodep = nodep; /* Silence 'not used' warning */ + + h->ht_node_count = max_elems; + h->ht_nodes_used = 1; + + /* + * Compute overflow bucket count and size, allocate + */ + count = ALIGN8 (CEIL (max_elems, CICN_HASHTB_OVERFLOW_FRACTION)); + + pool_alloc_aligned (h->ht_overflow_buckets, count, 8); + if (h->ht_overflow_buckets == NULL) + { + ret = ENOMEM; + goto done; + } + + /* 'Hide' the zero-th node so we can use zero as an 'empty' value */ + pool_get_aligned (h->ht_overflow_buckets, bucket, 8); + bucket = bucket; /* Silence 'not used' warning */ + + h->ht_overflow_bucket_count = count; + h->ht_overflow_buckets_used = 1; + + /* + * Compute overflow key buffer count and size, allocate + */ + count = ALIGN8 (CEIL (max_elems, CICN_HASHTB_KEY_RATIO)); + + pool_alloc_aligned (h->ht_extra_keys, count, 8); + if (h->ht_extra_keys == NULL) + { + ret = ENOMEM; + goto done; + } + + /* 'Hide' the zero-th node so we can use zero as an 'empty' value */ + pool_get_aligned (h->ht_extra_keys, hkey, 8); + hkey = hkey; /* Silence 'not used' warning */ + + h->ht_key_count = count; + h->ht_keys_used = 1; + + /* Success */ + ret = AOK; + +done: + + if (h) + { + if ((ret == AOK) && ph) + { + *ph = h; + } + else + { + cicn_hashtb_free (&h); + } + } + + return (ret); +} + +/* + * Free, de-allocate a hashtable + */ +int +cicn_hashtb_free (cicn_hashtb_h * ph) +{ + int ret = 0; + + if (ph) + { + if ((*ph)->ht_extra_keys) + { + pool_free ((*ph)->ht_extra_keys); + (*ph)->ht_extra_keys = 0; + } + if ((*ph)->ht_nodes) + { + pool_free ((*ph)->ht_nodes); + (*ph)->ht_nodes = 0; + } + if ((*ph)->ht_overflow_buckets) + { + pool_free ((*ph)->ht_overflow_buckets); + (*ph)->ht_overflow_buckets = 0; + } + if ((*ph)->ht_buckets) + { + MEM_FREE ((*ph)->ht_buckets); + (*ph)->ht_buckets = 0; + } + MEM_FREE (*ph); + + *ph = NULL; + } + + return (ret); +} + +/* + * Hash a bytestring, using siphash-2-4. + */ +uint64_t +cicn_hashtb_hash_bytestring (const uint8_t * in, uint32_t inlen) +{ + return (cicn_siphash (in, inlen, cicn_default_sip_seed)); +} + +/* + * Hash a name, using siphash-2-4. TODO -- want a table handle here? + */ +uint64_t +cicn_hashtb_hash_name (const uint8_t * key, uint32_t keylen) +{ + if (key == NULL || keylen < CICN_TLV_HDR_LEN) + { + return (-1LL); + } + return (cicn_siphash (&key[CICN_TLV_HDR_LEN], keylen - CICN_TLV_HDR_LEN, + cicn_default_sip_seed)); +} + +/* + * Hash a name, returning hash values of prefixes (for LPM, e.g.) in + * addition to (or instead of) hash of full name + * - Hash of prefixes (by necessity) and of full name (for consistency) + * skips the name header tlv and starts at the first name component tlv. + * - version using incremental hashing, i.e. a single pass over string + * reusing the results for hashing each prefix in calculating the + * hash of the following prefix (rather than re-hashing from the + * beginning of the bytestring for each successive prefix as in the + * nonincr version). + * Args: + * - is_full_name: + * - if true + * - 'name' points to the beginning of the entire name TLV; + * - calculate hash of entire name, as well as prefixes + * - if false + * - name to the first name-comp sub-tlv + * - not required to compute the full-name hash, though currently + * this version does compute full-name hash. + * TODO: is avoiding that full hash a worthwhile savings? + * - limit: if 'limit' > 0, limit prefixes to less than array size (8) + */ +static inline int +cicn_hashtb_hash_prefixes_incr (const uint8_t * name, uint16_t namelen, + int is_full_name, cicn_prefix_hashinf_t * pfx, + int limit) +{ + int ret = AOK; + + cicn_siphash_hi_t hi_state; + uint64_t cur_hash = 0; + + int comp_offset; // offset (from name_val) of comp + + /* Must point to something, and it must be at least as long + * as an empty name or name-comp + */ + if ((name == NULL) || (namelen < CICN_TLV_HDR_LEN) || (pfx == NULL)) + { + ret = EINVAL; + goto done; + } + + /* Establish sane limit on number of comps */ + if (limit == 0 || limit > CICN_HASHTB_MAX_NAME_COMPS) + { + limit = CICN_HASHTB_MAX_NAME_COMPS; + } + pfx->pfx_overflow = 0; + + // Capture tlv pointer and len in the context struct + // If leading name tlv (packet vs. fib prefix), skip it + if (is_full_name) + { + pfx->pfx_ptr = name + CICN_TLV_HDR_LEN; + pfx->pfx_len = namelen - CICN_TLV_HDR_LEN; + } + else + { + /* Capture tlv pointer and len in the context struct */ + pfx->pfx_ptr = name; + pfx->pfx_len = namelen; + } + + cicn_siphash_hi_initialize (&hi_state, cicn_default_sip_seed); + + int comp_flen; // len of comp incl. hdr + int pfx_idx; // index into returned prefix arrays + for (comp_offset = 0, pfx_idx = 0; + comp_offset < pfx->pfx_len; comp_offset += comp_flen, pfx_idx++) + { + + const unsigned char *comp; // pointer to component record (hdr) + int comp_type; // type of component record (from component (sub)-TLV) + uint16_t comp_vlen; // len of comp excl. hdr (component name/value len) + int pfx_len; // len of pfx (equal to offset of following comp) + + comp = &pfx->pfx_ptr[comp_offset]; + C_GETINT16 (comp_type, &comp[0]); + C_GETINT16 (comp_vlen, &comp[CICN_TLV_TYPE_LEN]); + comp_flen = CICN_TLV_HDR_LEN + comp_vlen; + + pfx_len = comp_offset + comp_flen; + if (pfx_len > pfx->pfx_len) + { + ret = EINVAL; + goto done; + } + + if (comp_type == CICN_NAME_COMP_CHUNK) + { + /* assume FIB entries dont include chunk#: terminate partial hashes + * and proceed to full name hash + * + * for now, only chunk# (above) ends partial hashing, i.e. do not + * rule out creating and matching on FIB entries that include + * non-NameComponent components (that precede chunk#). + */ + comp_flen = -1; /* end indicator */ + pfx_len = pfx->pfx_len; + } + else if (pfx_idx >= limit) + { /* Are we out of partial hash slots? */ + /* + * - no room in arrays to save remaining hashes or, in + * fib lookup case, no reason to calculate remaining + * partial hashes + * - do one last hash covering full string, taking advantage + * of v_whole rather than starting from scratch, to save in + * name_hash as always (even though will not saved in array + * in overflow case). + * - re-check for overflow below, i.e. after efficient + * whole-string hash calculation, and break out of loop there + */ + pfx->pfx_overflow = 1; + comp_flen = -1; /* end indicator */ + pfx_len = pfx->pfx_len; + } + + cur_hash = + cicn_siphash_hi_calculate (&hi_state, pfx->pfx_ptr, pfx_len, + comp_offset); + + if (comp_flen < 0) + { + /* + * - No benefit to calculating more partial hashes. + * - In overflow case, no room to store results. + * - In chunk component case, partial hash not useful + * - Due to similar check above, full string's hash has been + * calculated, break out of loop with cur_hash and + * pfx_idx having right values for return arguments. + * Return appropriate rc: + * - if actually out of room, return overflow (callers may not + * need to know this) + * - otherwise, max requested depth (less than array size) + * reached, but that's okay (not currently used). + */ + if (pfx_idx >= ARRAY_LEN (pfx->pfx_hashes)) + { + ret = ENOSPC; + } + break; + } + + pfx->pfx_lens[pfx_idx] = pfx_len; + pfx->pfx_hashes[pfx_idx] = cur_hash; + } /* for */ + + // pfx_idx is now count (idx+1) (normal loop exit happens after + // for-loop increment and loop breaks happen before next array entry + // is filled in + pfx->pfx_count = pfx_idx; + + if (pfx_idx == 0) + { // case of empty name still has hash + cur_hash = + cicn_siphash_hi_calculate (&hi_state, pfx->pfx_ptr, pfx->pfx_len, + comp_offset); + } + + pfx->pfx_full_hash = cur_hash; + +done: + return (ret); +} + +int +cicn_hashtb_hash_prefixes (const uint8_t * name, uint16_t namelen, + int is_full_name, cicn_prefix_hashinf_t * pfx, + int limit) +{ + int ret; + + ret = + cicn_hashtb_hash_prefixes_incr (name, namelen, is_full_name, pfx, limit); + + return (ret); +} + +/* + * Basic api to lookup a specific hash+key tuple. This does the entire + * lookup operation, retrieving node structs and comparing keys, + * so it's not optimized for prefetching or high performance. + * + * Returns zero and mails back a node on success, errno otherwise. + */ +int +cicn_hashtb_lookup_node (cicn_hashtb_h h, const uint8_t * key, + uint32_t keylen, uint64_t hashval, + cicn_hash_node_t ** nodep) +{ + return (cicn_hashtb_lookup_node_ex + (h, key, keylen, hashval, FALSE /*deleted nodes */ , nodep)); +} + +/* + * Extended api to lookup a specific hash+key tuple. The implementation + * allows the caller to locate nodes that are marked for deletion, which + * is part of some hashtable applications, such as the FIB. + * + * This does the entire lookup operation, retrieving node structs + * and comparing keys, so it's not optimized for prefetching or high performance. + * + * Returns zero and mails back a node on success, errno otherwise. + */ +int +cicn_hashtb_lookup_node_ex (cicn_hashtb_h h, const uint8_t * key, + uint32_t keylen, uint64_t hashval, + int include_deleted_p, cicn_hash_node_t ** nodep) +{ + int i, ret = EINVAL; + int found_p = FALSE; + uint32_t bidx; + cicn_hash_bucket_t *bucket; + cicn_hash_node_t *node; + + /* Check args */ + if ((h == NULL) || (key == NULL) || (keylen == 0)) + { + goto done; + } + + /* Use some bits of the low half of the hash + * to locate a row/bucket in the table + */ + bidx = (hashval & (h->ht_bucket_count - 1)); + + bucket = h->ht_buckets + bidx; + + /* Check the entries in the bucket for matching hash value */ + +loop_buckets: + + for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++) + { + + /* If an entry is marked for deletion, ignore it unless the caller + * explicitly wants these nodes. + */ + if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED) + { + if (!include_deleted_p) + { + continue; + } + } + + /* Be prepared to continue to an overflow bucket if necessary. + * We only expect the last entry in a bucket to refer to an overflow + * bucket... + */ + if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1) && + bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW) + { + bucket = pool_elt_at_index (h->ht_overflow_buckets, + bucket->hb_entries[i].he_node); + goto loop_buckets; + } + + if (bucket->hb_entries[i].he_msb64 == hashval) + { + /* Found a candidate - must retrieve the actual node and + * check the key. + */ + node = pool_elt_at_index (h->ht_nodes, + bucket->hb_entries[i].he_node); + + /* Check the key itself; we've already checked the hash value */ + ASSERT (node->hn_hash == hashval); + + if (key && (keylen > 0)) + { + if (keylen != node->hn_keysize) + { + continue; + } + + if (keylen <= CICN_HASH_KEY_BYTES) + { + if (memcmp (key, node->hn_key.ks.key, keylen) != 0) + { + continue; + } + } + else + { + int key_bseen; // bytes processed/compared equal + int kc_bytes; + const cicn_hash_key_t *kc; // key chunks + for (kc = &node->hn_key, key_bseen = 0; kc != NULL; + kc = next_key_buf (h, kc), key_bseen += kc_bytes) + { + int key_bleft = keylen - key_bseen; + kc_bytes = (key_bleft <= CICN_HASH_KEY_LIST_BYTES) ? + key_bleft : CICN_HASH_KEY_LIST_BYTES; + if (memcmp (&key[key_bseen], kc->kl.key, kc_bytes)) + { + break; // key chunk didn't match + } + } + if (kc != NULL) + { // found a mismatch before end of key + continue; + } + } + } + + /* Found a match */ + if (nodep) + { + *nodep = node; + } + + found_p = TRUE; + break; + } + } + + if (found_p) + { + ret = AOK; + } + else + { + ret = ENOENT; + } + +done: + + return (ret); +} + +/* Compute hash node index from node pointer */ +#define NODE_IDX_FROM_NODE(p, h) \ + ((p) - ((h)->ht_nodes)) + +/* + * Utility to init a new entry in a hashtable bucket/row. We use this + * to add new a node+hash, and to clear out an entry during removal. + */ +void +cicn_hashtb_init_entry (cicn_hash_entry_t * entry, uint32_t nodeidx, + uint64_t hashval) +{ + entry->he_msb64 = hashval; + entry->he_node = nodeidx; + + /* Clear out some other fields in the entry */ + entry->he_flags = 0; + entry->he_timeout = 0; +} + +/* + * Insert a node into the hashtable. We expect the caller has a) computed + * the hash value to use, b) initialized the node with the hash and key info, + * and c) filled in its app-specific data portion of the node. + */ +int +cicn_hashtb_insert (cicn_hashtb_h h, cicn_hash_node_t * node) +{ + int i, ret = EINVAL; + uint32_t bidx; + cicn_hash_bucket_t *bucket, *newbkt; + int use_seven; + + if (h == NULL) + { + goto done; + } + + /* Use some bits of the low half of the hash + * to locate a row/bucket in the table + */ + bidx = (node->hn_hash & (h->ht_bucket_count - 1)); + + bucket = h->ht_buckets + bidx; + + use_seven = (h->ht_flags & CICN_HASHTB_FLAG_USE_SEVEN); + + /* Locate a free entry slot in the bucket */ + +loop_buckets: + + for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++) + { + + /* + * If an entry is marked for deletion, ignore it + */ + if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED) + { + continue; + } + + if ((bucket->hb_entries[i].he_msb64 == 0LL) && + (bucket->hb_entries[i].he_node == 0)) + { + /* Found a candidate -- fill it in */ + + /* Special case if the application asked not to use the last + * entry in each bucket. + */ + if ((i != (CICN_HASHTB_BUCKET_ENTRIES - 1)) || !use_seven) + { + + cicn_hashtb_init_entry (&(bucket->hb_entries[i]), + NODE_IDX_FROM_NODE (node, h), + node->hn_hash); + + ret = AOK; + break; + } + } + + /* Be prepared to continue to an overflow bucket if necessary, + * or to add a new overflow bucket. + * We only expect the last entry in a bucket to refer to an overflow + * bucket... + */ + if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1)) + { + if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW) + { + + /* Existing overflow bucket - re-start the search loop */ + bucket = pool_elt_at_index (h->ht_overflow_buckets, + bucket->hb_entries[i].he_node); + goto loop_buckets; + + } + else + { + /* Overflow - reached the end of a bucket without finding a + * free entry slot. Need to allocate an overflow bucket, and + * connect it to this bucket. + */ + newbkt = alloc_overflow_bucket (h); + if (newbkt == NULL) + { + ret = ENOMEM; + goto done; + } + + /* We're touching some more bytes than we absolutely have to + * here, but ... that seems ok. + */ + memset (newbkt, 0, sizeof (cicn_hash_bucket_t)); + + if (!use_seven) + { + /* Copy existing entry into new bucket - we really expect + * these to be properly aligned so they can be treated as + * ints. TODO -- do in 8-byte assignments? + */ + memcpy (&(newbkt->hb_entries[0]), + &(bucket->hb_entries[i]), + sizeof (cicn_hash_entry_t)); + } + + /* Connect original bucket to the index of the + * new overflow bucket + */ + bucket->hb_entries[i].he_flags |= CICN_HASH_ENTRY_FLAG_OVERFLOW; + bucket->hb_entries[i].he_node = + (newbkt - h->ht_overflow_buckets); + + /* Add new entry to new overflow bucket */ + bucket = newbkt; + + /* Use entry [1] in the new bucket _if_ we just copied into + * entry [zero] above. + */ + if (!use_seven) + { + + cicn_hashtb_init_entry (&(bucket->hb_entries[1]), + NODE_IDX_FROM_NODE (node, h), + node->hn_hash); + } + else + { + + cicn_hashtb_init_entry (&(bucket->hb_entries[0]), + NODE_IDX_FROM_NODE (node, h), + node->hn_hash); + } + + /* And we're done with the overflow bucket */ + ret = AOK; + break; + } + } + } + +done: + + return (ret); +} + +/* + * Delete a node from a hashtable using the node itself, and delete/free + * the node. Caller's pointer is cleared on success. + */ +int +cicn_hashtb_delete (cicn_hashtb_h h, cicn_hash_node_t ** pnode) +{ + int ret = EINVAL; + + if ((h == NULL) || (pnode == NULL) || (*pnode == NULL)) + { + goto done; + } + + ret = cicn_hashtb_remove_node (h, *pnode); + if (ret == AOK) + { + cicn_hashtb_free_node (h, *pnode); + *pnode = NULL; + } + +done: + return (ret); +} + +/* + * Delete an entry from a hashtable using the node itself + */ +int +cicn_hashtb_remove_node (cicn_hashtb_h h, cicn_hash_node_t * node) +{ + int i, count, ret = EINVAL; + uint32_t bidx, overflow_p, nodeidx; + cicn_hash_bucket_t *bucket, *parent; + + if ((h == NULL) || (node == NULL)) + { + goto done; + } + + /* Use some bits of the low half of the hash + * to locate a row/bucket in the table + */ + bidx = (node->hn_hash & (h->ht_bucket_count - 1)); + + nodeidx = NODE_IDX_FROM_NODE (node, h); + + bucket = h->ht_buckets + bidx; + + overflow_p = FALSE; + +loop_buckets: + + for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++) + { + /* Note that we do consider entries that are marked for + * delete here, unlike some other operations. + */ + + /* Be prepared to continue to an overflow bucket if necessary. + * We only expect the last entry in a bucket to refer to an overflow + * bucket... + */ + if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1) && + bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW) + { + + bucket = pool_elt_at_index (h->ht_overflow_buckets, + bucket->hb_entries[i].he_node); + + overflow_p = TRUE; + + goto loop_buckets; + } + + /* Look for an entry carrying this node index */ + if (nodeidx == bucket->hb_entries[i].he_node) + { + ret = AOK; + break; + } + } + + /* If we didn't find the matching entry, we're done */ + if (ret != AOK) + { + ret = ENOENT; + goto done; + } + + /* Clear out the entry. */ + cicn_hashtb_init_entry (&(bucket->hb_entries[i]), 0, 0LL); + + if (!overflow_p) + { + /* And we're done, in the easy case where we didn't change an + * overflow bucket + */ + goto done; + } + + /* The special case: if this is the last remaining entry in an + * overflow bucket, liberate the bucket. That in turn has a special + * case if this bucket is in the middle of a chain of overflow buckets. + * + * Note that we're not trying aggressively (yet) to condense buckets + * at every possible opportunity. + */ + + /* Reset this flag; we'll set it again if this bucket links to another */ + overflow_p = FALSE; + + for (i = 0, count = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++) + { + if (bucket->hb_entries[i].he_node != 0) + { + count++; + } + + if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1) && + (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW)) + { + count--; /* Doesn't count as a 'real' entry */ + overflow_p = TRUE; + } + } + + if (count > 0) + { + /* Still a (real) entry in the row */ + goto done; + } + + /* Need to locate the predecessor of 'bucket': start at + * the beginning of the chain of buckets and move forward + */ + bidx = (node->hn_hash & (h->ht_bucket_count - 1)); + + for (parent = h->ht_buckets + bidx; parent != NULL;) + { + + if ((parent->hb_entries[(CICN_HASHTB_BUCKET_ENTRIES - 1)].he_flags & + CICN_HASH_ENTRY_FLAG_OVERFLOW) == 0) + { + parent = NULL; + break; + } + + bidx = parent->hb_entries[(CICN_HASHTB_BUCKET_ENTRIES - 1)].he_node; + + if (pool_elt_at_index (h->ht_overflow_buckets, bidx) == bucket) + { + /* Found the predecessor of 'bucket'. If 'bucket' has a successor, + * connect 'parent' to it, and take 'bucket out of the middle. + */ + if (overflow_p) + { + parent->hb_entries[(CICN_HASHTB_BUCKET_ENTRIES - 1)].he_node = + bucket->hb_entries[(CICN_HASHTB_BUCKET_ENTRIES - 1)].he_node; + } + else + { + /* Just clear the predecessor entry pointing at 'bucket' */ + cicn_hashtb_init_entry (&parent->hb_entries + [(CICN_HASHTB_BUCKET_ENTRIES - 1)], 0, + 0LL); + } + + break; + } + + /* After the first iteration, 'parent' will be an overflow bucket too */ + parent = pool_elt_at_index (h->ht_overflow_buckets, bidx); + } + + /* We really expect to have found the predecessor */ + ASSERT (parent != NULL); + + /* And now, finally, we can put 'bucket' back on the free list */ + free_overflow_bucket (h, &bucket); + +done: + + return (ret); +} + +/* + * Prepare a hashtable node, supplying the key, and computed hash info. + */ +int +cicn_hashtb_init_node (cicn_hashtb_h h, cicn_hash_node_t * node, + uint64_t hashval, const uint8_t * key, uint32_t keylen) +{ + int ret = EINVAL; + uint32_t keyidx; + cicn_hash_key_t *hk, *prevhk; + + assert (h != NULL); + assert (node != NULL); + + if ((h == NULL) || (node == NULL)) + { + goto done; + } + + /* Init the node struct */ + node->hn_hash = hashval; + node->hn_flags = CICN_HASH_NODE_FLAGS_DEFAULT; + node->hn_keysize = 0; + + if (key && (keylen > 0)) + { + if (keylen > CICN_PARAM_HASHTB_KEY_BYTES_MAX) + { + /* Whoops - key is too darn big */ + ret = EINVAL; + goto done; + } + + node->hn_keysize = keylen; + + if (keylen <= CICN_HASH_KEY_BYTES) + { + /* Use the node's embedded key buffer */ + memcpy (node->hn_key.ks.key, key, keylen); + } + else + { + /* + * Key is too large for the embedded buffer alone; + * must use a chain of key buffers; we capture the chain + * by index. + */ + prevhk = NULL; + hk = &(node->hn_key); + + do + { + + /* Put new key buf index into previous buf */ + if (prevhk) + { + /* Compute index of new key buf */ + keyidx = hk - h->ht_extra_keys; + prevhk->kl.idx_next = keyidx; + } + + /* Copy as much key material as we can */ + if (keylen > CICN_HASH_KEY_LIST_BYTES) + { + memcpy (hk->kl.key, key, CICN_HASH_KEY_LIST_BYTES); + key += CICN_HASH_KEY_LIST_BYTES; + keylen -= CICN_HASH_KEY_LIST_BYTES; + } + else + { + /* Last piece of the key */ + memcpy (hk->kl.key, key, keylen); + keylen = 0; + + /* Terminate the chain of key buffers */ + hk->kl.idx_next = CICN_HASH_INVALID_IDX; + break; + } + + prevhk = hk; + + hk = alloc_key_buf (h); + + } + while (hk); + + if (keylen > 0) + { + /* Whoops - failed to get enough key buffers */ + ret = ENOMEM; + goto done; + } + } + } + + ret = AOK; + +done: + + return (ret); +} + +/* + * Release a hashtable node back to the free list when an entry is cleared + */ +void +cicn_hashtb_free_node (cicn_hashtb_h h, cicn_hash_node_t * node) +{ + uint32_t klen, keyidx; + cicn_hash_key_t *keyp; + + ASSERT (h->ht_nodes_used > 0); + + /* If there is a chain of key structs, need to free them also */ + if (node->hn_keysize > CICN_HASH_KEY_BYTES) + { + + /* Remaining key bytes (for consistency check) */ + klen = node->hn_keysize - CICN_HASH_KEY_LIST_BYTES; + + /* Walk along the chain of key buffers until we reach the end */ + for (keyidx = node->hn_key.kl.idx_next; keyidx != CICN_HASH_INVALID_IDX; + /* can't step in iterator: keyp already freed */ ) + { + + keyp = pool_elt_at_index (h->ht_extra_keys, keyidx); + if (!keyp) + { // should not happen given valid keyidx + break; + } + keyidx = keyp->kl.idx_next; + + /* Return the key buf to the free pool */ + free_key_buf (h, &keyp); + + /* Consistency checks on klen */ + if (klen > CICN_HASH_KEY_LIST_BYTES) + { + klen -= CICN_HASH_KEY_LIST_BYTES; + ASSERT (keyidx != CICN_HASH_INVALID_IDX); + } + else + { + klen = 0; + ASSERT (keyidx == CICN_HASH_INVALID_IDX); + } + } + } + + /* Return 'node' to the free list */ + pool_put (h->ht_nodes, node); + h->ht_nodes_used--; + +} + +/* + * Walk a hashtable, iterating through the nodes, keeping context in 'ctx'. + */ +int +cicn_hashtb_next_node (cicn_hashtb_h h, cicn_hash_node_t ** pnode, + uint64_t * ctx) +{ + int i, j, ret = EINVAL; + uint32_t bidx, entry; + cicn_hash_bucket_t *bucket; + + if ((h == NULL) || (pnode == NULL) || (ctx == NULL)) + { + goto done; + } + + /* Special-case for new iteration */ + if (*ctx == CICN_HASH_WALK_CTX_INITIAL) + { + bidx = 0; + bucket = &h->ht_buckets[0]; + entry = 0; + j = 0; + i = 0; + goto search_table; + } + + /* Convert context to bucket and entry indices */ + bidx = *ctx & 0xffffffffLL; + entry = *ctx >> 32; + + if (bidx >= h->ht_bucket_count) + { + ret = ENOENT; + goto done; + } + + bucket = h->ht_buckets + bidx; + + /* Init total index into entries (includes fixed bucket and overflow) */ + j = 0; + + +skip_processed_bucket_chunks: + /* Figure out where to resume the search for the next entry in + * the table, by trying to find the last entry returned, from the cookie. + * Loop walks one (regular or overflow) bucket chunk, label is used for + * walking chain of chunks. + * Note that if there was a deletion or an addition that created an + * overflow, iterator can skip entries or return duplicate entries, + * for entries that are present from before the walk starts until + * after it ends. + * TODO: Add mechanism to break at bucket row boundaries, to avoid + * skip/duplication of entries not changed during walk. + */ + + for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++, j++) + { + if (j > entry) + { + /* + * Start search for next here, use existing 'bucket' and 'i' + */ + break; + } + + /* + * If an entry is marked for deletion, ignore it + */ + if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED) + { + continue; + } + + /* Be prepared to continue to an overflow bucket if necessary. + * (We only expect the last entry in a bucket to refer to an overflow + * bucket...) + */ + if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1)) + { + if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW) + { + bucket = pool_elt_at_index (h->ht_overflow_buckets, + bucket->hb_entries[i].he_node); + + /* Increment overall entry counter 'j' */ + j++; + + goto skip_processed_bucket_chunks; + } + + /* end of row (end of fixed bucket plus any overflows) */ + i = 0; + j = 0; + + bidx++; + + /* Special case - we're at the end */ + if (bidx >= h->ht_bucket_count) + { + ret = ENOENT; + goto done; + } + bucket = h->ht_buckets + bidx; + break; + } + } + +search_table: + + /* Now we're searching through the table for the next entry that's set */ + + for (; i < CICN_HASHTB_BUCKET_ENTRIES; i++, j++) + { + /* + * If an entry is marked for deletion, ignore it + */ + if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED) + { + continue; + } + + /* Is this entry set? */ + if (bucket->hb_entries[i].he_node != 0) + { + + /* Retrieve the node struct */ + *pnode = pool_elt_at_index (h->ht_nodes, + bucket->hb_entries[i].he_node); + + /* Set 'entry' as we exit, so we can update the cookie */ + entry = j; + ret = AOK; + break; + } + + /* Be prepared to continue to an overflow bucket if necessary. + * (We only expect the last entry in a bucket to refer to an overflow + * bucket...) + */ + if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1)) + { + if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW) + { + bucket = pool_elt_at_index (h->ht_overflow_buckets, + bucket->hb_entries[i].he_node); + /* Reset per-bucket index 'i', here (not done in iterator) */ + i = 0; + /* Increment overall entry counter 'j' */ + j++; + + goto search_table; + } + else + { + /* Move to next bucket, resetting per-bucket and overall + * entry indexes + */ + i = 0; + j = 0; + + bidx++; + + /* Special case - we're at the end */ + if (bidx >= h->ht_bucket_count) + { + ret = ENOENT; + goto done; + } + + bucket = h->ht_buckets + bidx; + goto search_table; + } + } + } + +done: + + if (ret == AOK) + { + /* Update context */ + *ctx = bidx; + *ctx |= ((uint64_t) entry << 32); + } + + return (ret); +} + +/* + * Update the per-entry compression expiration value for a hashtable node. + */ +int +cicn_hashtb_entry_set_expiration (cicn_hashtb_h h, + cicn_hash_node_t * node, + uint16_t entry_timeout, uint8_t entry_flags) +{ + int i, ret = EINVAL; + uint32_t bidx, nodeidx; + cicn_hash_bucket_t *bucket; + + if ((h == NULL) || (node == NULL)) + { + goto done; + } + + /* Use some bits of the low half of the hash + * to locate a row/bucket in the table + */ + bidx = (node->hn_hash & (h->ht_bucket_count - 1)); + + nodeidx = NODE_IDX_FROM_NODE (node, h); + + bucket = h->ht_buckets + bidx; + +loop_buckets: + + for (i = 0; i < CICN_HASHTB_BUCKET_ENTRIES; i++) + { + /* + * If an entry is marked for deletion, ignore it + */ + if (bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_DELETED) + { + continue; + } + + /* Be prepared to continue to an overflow bucket if necessary. + * We only expect the last entry in a bucket to refer to an overflow + * bucket... + */ + if (i == (CICN_HASHTB_BUCKET_ENTRIES - 1) && + bucket->hb_entries[i].he_flags & CICN_HASH_ENTRY_FLAG_OVERFLOW) + { + + bucket = pool_elt_at_index (h->ht_overflow_buckets, + bucket->hb_entries[i].he_node); + + goto loop_buckets; + } + + /* Look for an entry carrying this node index */ + if (nodeidx == bucket->hb_entries[i].he_node) + { + ret = AOK; + break; + } + } + + /* If we didn't find the matching entry, we're done */ + if (ret != AOK) + { + ret = ENOENT; + goto done; + } + + /* Update the entry. */ + bucket->hb_entries[i].he_timeout = entry_timeout; + if (entry_flags & CICN_HASH_ENTRY_FLAG_FAST_TIMEOUT) + { + bucket->hb_entries[i].he_flags |= CICN_HASH_ENTRY_FLAG_FAST_TIMEOUT; + } + else + { + bucket->hb_entries[i].he_flags &= ~CICN_HASH_ENTRY_FLAG_FAST_TIMEOUT; + } + +done: + return (ret); +} + +int +cicn_hashtb_key_to_buf (u8 ** vec_res, cicn_hashtb_h h, + const cicn_hash_node_t * node) +{ + int ret = AOK; + u8 *vec = *vec_res; + + if (node->hn_keysize <= CICN_HASH_KEY_BYTES) + { + vec_add (vec, node->hn_key.ks.key, node->hn_keysize); + goto success; + } + + const cicn_hash_key_t *kc; + for (kc = &node->hn_key; kc != NULL; kc = next_key_buf (h, kc)) + { + + int key_bleft = node->hn_keysize - vec_len (vec); + int kc_bytes = (key_bleft <= CICN_HASH_KEY_LIST_BYTES) ? + key_bleft : CICN_HASH_KEY_LIST_BYTES; + vec_add (vec, kc->kl.key, kc_bytes); + } + +success: + + *vec_res = vec; + return (ret); +} + +int +cicn_hashtb_key_to_str (cicn_hashtb_h h, const cicn_hash_node_t * node, + char *buf, int bufsize, int must_fit) +{ + int ret = EINVAL; + + u8 *kvec = NULL; + int bstr_len; + + ret = cicn_hashtb_key_to_buf (&kvec, h, node); + + if (h->ht_flags & CICN_HASHTB_FLAG_KEY_FMT_PFX) + { + ret = + cicn_parse_prefix_to_str (buf, bufsize, kvec, vec_len (kvec), + &bstr_len); + } + else if (h->ht_flags & CICN_HASHTB_FLAG_KEY_FMT_NAME) + { + ret = + cicn_parse_name_to_str (buf, bufsize, kvec, vec_len (kvec), + &bstr_len); + } + else + { + ret = EINVAL; // should never happen + } + if (ret != AOK) + { + goto err; + } + + if (bstr_len >= bufsize) + { + if (must_fit) + { + ret = ENOSPC; + goto err; + } + if (bufsize < 4) + { + ret = ENOSPC; + goto err; + } + snprintf (&buf[bufsize - 4], 4, "..."); + } + + ret = AOK; + +err: + vec_free (kvec); + /* Not totally sure that I've got the to_str perfect - belt-and-susp. */ + buf[bufsize - 1] = '\000'; + return (ret); +} -- cgit 1.2.3-korg