diff options
author | Jim Gibson <gibson@cisco.com> | 2017-02-20 11:53:54 -0500 |
---|---|---|
committer | Jim Gibson <gibson@cisco.com> | 2017-02-20 12:21:12 -0500 |
commit | dfd7ce27fea04c1a76844e21286c2b1d6653e153 (patch) | |
tree | 0025f965ddb68599ea824b9d9edf61b7647dd4ec /cicn-plugin/cicn/cicn_hashtb.h | |
parent | 9b30fc10fb1cbebe651e5a107e8ca5b24de54675 (diff) |
Initial Commit: VPP cicn VPP plugin
Change-Id: If1b965f0a4b7cfacda8f6caf6925072a9007ffb4
Signed-off-by: Jim Gibson <gibson@cisco.com>
Diffstat (limited to 'cicn-plugin/cicn/cicn_hashtb.h')
-rw-r--r-- | cicn-plugin/cicn/cicn_hashtb.h | 526 |
1 files changed, 526 insertions, 0 deletions
diff --git a/cicn-plugin/cicn/cicn_hashtb.h b/cicn-plugin/cicn/cicn_hashtb.h new file mode 100644 index 00000000..c7522dd1 --- /dev/null +++ b/cicn-plugin/cicn/cicn_hashtb.h @@ -0,0 +1,526 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * cicn_hashtb.h: Fast-path, vpp-aware hashtable, the base for the PIT/CS and FIB + * used in the cicn forwarder. + * + * - As is the case in other areas, we can't share headers between the vpp code + * and the cicn/cndn code: there are conflicting definitions. + * + */ + +#ifndef _CICN_HASHTB_H_ +#define _CICN_HASHTB_H_ 1 + +#if !CICN_VPP_PLUGIN +#error "cicn-internal file included externally" +#endif + +#include "cicn_std.h" +#include "cicn_params.h" + +/* Handy abbreviations for success status, and for boolean values */ +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +/* + * Lookup is finding a hashtable record whose name matches the name + * being looked up. Most of the lookup work is based on the hash + * value of the two names. Note that the intel cache line size is 64 + * bytes, and some platforms load in 2 cache lines together. + * - first step is to match a record at the bucket/slot level + * (htab has an array of htbucket_t/htbc_elmt, where each bucket has + * 7 slots to hold indices for entries.) Matching at this level implies + * - the hashes of the lookup name and the record map to the + * same bucket + * - the high 32 bits of the hashes (slot bce_hash_msb32s) match. + * Read cost (on the hash table size, i.e. ignoring reading the + * name being looked up): + * - First step normally requires 1 cache line load to pull in + * the 64-byte htbucket_t with the 7 element slot table holding the + * hash_msb32s. + * - In the event (hopefully rare for a hash table with + * appropriate number of buckets) that more than 7 elements + * hash to the same bucket, lookup may well need to look not + * only at the static htbc_elmt_t but at the chain of dynamically + * allocated htbc_elmt_t's linked to the static htbc_elmt_t, where + * each of these holds slot entries for additional elements. + * - Before reaching that point, it is initially required is to read in + * the hash table record fields (ht_bucket_buf, htnode buf, etc) + * holding pointers to the arrays, but these cache lines are common + * to all lookups so will likely already be in the cache. + * - second step is to match at the record level (htnode/htkb level) + * once a slot-level match happens. Matching at this level implies + * the following match + * - the hash values (the full 64 bits vs. bucket+32 msb, above) + * With siphash, two names hashing to the same 64-bit value is + * quite rare. + * - the name which, on the hash table side, is stored as a list + * of htkb_t (key buffers). [In some cases, the full name is + * not compared, and a match is assumed based on hash value match. + * Read cost: + * - htnode_t, in one cache line, holds hash value and index for the + * htkb at the head of the key buffer list + * - each key buffer (htkb_t) is cache line aligned/sized, and holds + * 60 bytes of the name and requires a cache line read. + * Simplification is that a fib lookup requires 3 cache lines: + * - bucket + * - htnode + * - single key buffer (for cases where a name comparision is done) + * + * Some hashtables (for which rare false positives are tolerable) + * store hash values but no keys. (In ISM NDN forwarder, this was + * used for dcm_dpf: data cache manager's dataplane filter, where + * speed was critical and very rare false positives would be detected + * in the full dcm check.) + * - No key buffers are used (or even allocated at hash table creation). + */ + +#define CICN_HASH_INVALID_IDX ~0 +/* for cicn_hashtb_next_node() iterator, this otherwise illegal context value + * indicates first call of iteration. + * Note: must not be 0, which is a legal context value. + */ +#define CICN_HASH_WALK_CTX_INITIAL (~((uint64_t)0)) + +/* + * Key memory allocation scheme. + * + * The key is the bytestring that a hashtable entry is + * storing, e.g. a fib prefix or packet name. The hash of the + * name is used not just to pick the bucket, but also as a surrogate + * for the actual key value. + * + * Client calls pass key/name as contiguous memory for lookup/add/delete + * but hashable stores its copy of the key/name as a list of one or + * more hash_key structs. + * - key memory is managed as a list of keys (cache line + * sized/aligned buffers). + * - If (keysize < 128) then use key struct's full 128 bytes + * - If not, first key struct is head of a linked list of elements + * where the first bytes are used for the key and the last 4 bytes + * are the index of the next entry (or an end marker). + * - key memory is generally the single largest use of memory + * in the hash table, especially for PIT, as names are bigger + * than node structs (which is also per name/entry). + * + */ + +#define CICN_HASH_KEY_BYTES 128 +#define CICN_HASH_KEY_LIST_BYTES (CICN_HASH_KEY_BYTES - sizeof(uint32_t)) +typedef struct +{ + union + { + struct + { + uint8_t key[CICN_HASH_KEY_BYTES]; + } ks; /* Entire key in one block */ + struct + { + uint8_t key[CICN_HASH_KEY_LIST_BYTES]; + uint32_t idx_next; /* Next keybuf idx */ + } kl; /* Key in a list of blocks */ + }; +} cicn_hash_key_t; + +/* Ratio of extra key blocks to allocate, in case the embedded ones aren't + * sufficient. This is the fraction of the number of entries allocated. + */ +#define CICN_HASHTB_KEY_RATIO 8 + +/* + * hash node, used to store a hash table entry; indexed by an entry in a bucket. + * the node contains an embedded key; long keys are stored as chains of keys. + * + * The memory block for a node includes space for client data, + * additional memory located off the end of the htnode data structure. + * Size of client-supplied data is fixed, so we can use vpp pools. The PIT + * and FIB need to ensure that they fit within the available data area, + * or change the size to accomodate their needs. + * + * NOTE: app_data_size currently applies to all apps, i.e. bigger FIB + * nodes means (leads to, requires) bigger PCS nodes + */ + +/* Size this so that we can offer 64B aligned on 64-bits to the applications */ +#define CICN_HASH_NODE_APP_DATA_SIZE 72 /* TODO -- big enough? */ + +typedef struct cicn_hash_node_s +{ + + uint64_t hn_hash; /* Complete hash value */ + + /* Total size of the key (chained in several key structs if necessary) */ + uint16_t hn_keysize; + + /* 1 byte of flags for application use */ + uint8_t hn_flags; + + uint8_t _hn_reserved1; /* TBD, to align what follows back to 32 */ + + cicn_hash_key_t hn_key; /* Key value embedded in the node, may + * chain to more key buffers if necessary + */ + /* TODO -- keep array of 'next keys' so we can prefetch better? */ + + /* Followed by app-specific data (fib or pit or cs entry, e.g.) */ + uint8_t hn_data[CICN_HASH_NODE_APP_DATA_SIZE]; + +} cicn_hash_node_t; + +#define CICN_HASH_NODE_FLAGS_DEFAULT 0x00 + + +/* + * cicn_hash_entry_t + * Structure holding all or part of a hash value, a node index, and other + * key pieces of info. + * + * - 128 bytes/bucket with 16 bytes/entry gives 8 entries, + * or 7 entries plus next bucket ptr if overflow + */ +typedef struct +{ + + /* MSB of the hash value */ + uint64_t he_msb64; + + /* Index of node block */ + uint32_t he_node; + + /* Timeout value, units and scheme still TBD */ + uint16_t he_timeout; + + /* A few flags, including 'this points to a chain of buckets' */ + uint8_t he_flags; + + /* A byte for domain/application data (e.g. 'virtual fib entry' */ + uint8_t he_appval; + +} cicn_hash_entry_t; + +#define CICN_HASH_ENTRY_FLAGS_DEFAULT 0x00 + +/* This entry heads a chain of overflow buckets (we expect to see this + * only in the last entry in a bucket.) In this case, the index is + * to an overflow bucket rather than to a single node block. + */ +#define CICN_HASH_ENTRY_FLAG_OVERFLOW 0x01 + +/* This entry has been marked for deletion */ +#define CICN_HASH_ENTRY_FLAG_DELETED 0x02 + +/* Use fast he_timeout units for expiration, slow if not */ +#define CICN_HASH_ENTRY_FLAG_FAST_TIMEOUT 0x04 + +/* + * hash bucket: Contains an array of entries. + * Cache line sized/aligned, so no room for extra fields unless + * bucket size is increased to 2 cache lines or the entry struct + * shrinks. + */ + +/* + * Overflow bucket ratio as a fraction of the fixed/configured count; + * a pool of hash buckets used if a row in the fixed table overflows. + */ +#define CICN_HASHTB_OVERFLOW_FRACTION 8 + +#define CICN_HASHTB_BUCKET_ENTRIES 8 + +typedef struct +{ + cicn_hash_entry_t hb_entries[CICN_HASHTB_BUCKET_ENTRIES]; +} cicn_hash_bucket_t; + +/* Overall target fill-factor for the hashtable */ +#define CICN_HASHTB_FILL_FACTOR 4 + +#define CICN_HASHTB_MIN_ENTRIES (1 << 4) // includes dummy node 0 entry +#define CICN_HASHTB_MAX_ENTRIES (1 << 24) + +#define CICN_HASHTB_MIN_BUCKETS (1 << 10) + +/* + * htab_t + * + * Hash table main structure. + * + * Contains + * - pointers to dynamically allocated arrays of cache-line + * sized/aligned structures (buckets, nodes, keys). + * Put frequently accessed fields in the first cache line. + */ +typedef struct cicn_hashtb_s +{ + + /* 8B - main array of hash buckets */ + cicn_hash_bucket_t *ht_buckets; + + /* 8B - just-in-case block of overflow buckets */ + cicn_hash_bucket_t *ht_overflow_buckets; + + /* 8B - block of nodes associated with entries in buckets */ + cicn_hash_node_t *ht_nodes; + + /* + * 8B - just-in-case block of extra keys, used when a key is too + * large to fit in a node's embedded key area + */ + cicn_hash_key_t *ht_extra_keys; + + /* Flags */ + uint32_t ht_flags; + + /* Count of buckets allocated in the main array */ + uint32_t ht_bucket_count; + + /* Count of overflow buckets allocated */ + uint32_t ht_overflow_bucket_count; + uint32_t ht_overflow_buckets_used; + + /* Count of nodes allocated */ + uint32_t ht_node_count; + uint32_t ht_nodes_used; + + /* Count of overflow key structs allocated */ + uint32_t ht_key_count; + uint32_t ht_keys_used; + + /* TODO -- stats? */ + +} cicn_hashtb_t, *cicn_hashtb_h; + +/* Offset to aligned start of additional data (PIT/CS, FIB) + * embedded in each node. + */ +extern uint32_t ht_node_data_offset_aligned; + +/* Flags for hashtable */ + +#define CICN_HASHTB_FLAGS_DEFAULT 0x00 + +/* Don't use the last/eighth entry in each bucket - only use it for overflow. + * We use this for the FIB, currently, so that we can support in-place + * FIB changes that would be difficult if there were hash entry copies + * as part of overflow handling. + */ +#define CICN_HASHTB_FLAG_USE_SEVEN 0x01 +#define CICN_HASHTB_FLAG_KEY_FMT_PFX 0x02 +#define CICN_HASHTB_FLAG_KEY_FMT_NAME 0x04 + +/* + * Max prefix name components we'll support in our incremental hashing; + * currently used only for LPM in the FIB. + */ +#define CICN_HASHTB_MAX_NAME_COMPS CICN_PARAM_FIB_ENTRY_PFX_COMPS_MAX + +/* + * Info about an LPM hash computation on a prefix or name. + */ +typedef struct cicn_prefix_hashinf_s +{ + + const uint8_t *pfx_ptr; + uint16_t pfx_len; + + uint16_t pfx_count; /* Number of prefix entries used */ + uint8_t pfx_overflow; /* true if pfx has extra components (not hashed) */ + + uint16_t pfx_lens[CICN_HASHTB_MAX_NAME_COMPS]; + uint64_t pfx_hashes[CICN_HASHTB_MAX_NAME_COMPS]; + + uint64_t pfx_full_hash; + +} cicn_prefix_hashinf_t; + +/* + * APIs and inlines + */ + +/* Compute hash node index from node pointer */ +static inline uint32_t +cicn_hashtb_node_idx_from_node (cicn_hashtb_h h, cicn_hash_node_t * p) +{ + return (p - h->ht_nodes); +} + +/* Retrieve a hashtable node by node index */ +static inline cicn_hash_node_t * +cicn_hashtb_node_from_idx (cicn_hashtb_h h, uint32_t idx) +{ + return (pool_elt_at_index (h->ht_nodes, idx)); +} + +/* Allocate a brand-new hashtable */ +int cicn_hashtb_alloc (cicn_hashtb_h * ph, uint32_t max_elems, + size_t app_data_size); + +/* Free a hashtable, including its embedded arrays */ +int cicn_hashtb_free (cicn_hashtb_h * ph); + +/* Hash a bytestring, currently using siphash64 (for UT) */ +uint64_t cicn_hashtb_hash_bytestring (const uint8_t * key, uint32_t keylen); + +/* Hash a name, currently using siphash64 */ +uint64_t cicn_hashtb_hash_name (const uint8_t * key, uint32_t keylen); + +/* + * Hash a name, with incremental prefix hashing (for LPM, e.g.) If + * 'limit' > 0, limit computation of incrementals. + * if 'is_full_name', we expect that 'name' points to the beginning + * of the entire name TLV; otherwise, 'name' just points to the first + * name-comp sub-tlv, and we will _not_ compute the full-name hash. + * Note that 'name' and 'namelen' are captured in the prefix-hash + * context struct, to make the LPM/FIB apis a little cleaner. + */ +int cicn_hashtb_hash_prefixes (const uint8_t * name, uint16_t namelen, + int is_full_name, cicn_prefix_hashinf_t * pfx, + int limit); + +/* + * Prepare a hashtable node for insertion, supplying the key + * and computed hash info. This sets up the node->key relationship, possibly + * allocating overflow key buffers. + */ +int cicn_hashtb_init_node (cicn_hashtb_h h, cicn_hash_node_t * node, + uint64_t hashval, + const uint8_t * key, uint32_t keylen); + +/* + * Insert a node into the hashtable. We expect the caller has used the + * init api to set the node key and hash info, and populated the extra + * data area (if any) - or done the equivalent work itself. + */ +int cicn_hashtb_insert (cicn_hashtb_h h, cicn_hash_node_t * node); + +/* + * Basic api to lookup a specific hash+key tuple. This does the entire + * lookup operation, retrieving node structs and comparing keys, + * so it's not optimized for prefetching or high performance. + * + * Returns zero and mails back a node on success, errno otherwise. + */ +int cicn_hashtb_lookup_node (cicn_hashtb_h h, const uint8_t * key, + uint32_t keylen, uint64_t hashval, + cicn_hash_node_t ** nodep); + +/* + * Extended api to lookup a specific hash+key tuple. The implementation + * allows the caller to locate nodes that are marked for deletion; this + * is part of some hashtable applications, such as the FIB. + * + * This does the entire lookup operation, retrieving node structs + * and comparing keys, so it's not optimized for prefetching or high performance. + * + * Returns zero and mails back a node on success, errno otherwise. + */ +int +cicn_hashtb_lookup_node_ex (cicn_hashtb_h h, const uint8_t * key, + uint32_t keylen, uint64_t hashval, + int include_deleted_p, cicn_hash_node_t ** nodep); + +/* + * Remove a node from a hashtable using the node itself. The internal + * data structs are cleaned up, but the node struct itself is not: the caller + * must free the node itself. + */ +int cicn_hashtb_remove_node (cicn_hashtb_h h, cicn_hash_node_t * node); + +/* + * Delete a node from a hashtable using the node itself, and delete/free + * the node. Caller's pointer is cleared on success. + */ +int cicn_hashtb_delete (cicn_hashtb_h h, cicn_hash_node_t ** pnode); + +/* + * Utility to init a new entry in a hashtable bucket/row. We use this + * to add new a node+hash, and to clear out an entry during removal. + */ +void cicn_hashtb_init_entry (cicn_hash_entry_t * entry, + uint32_t nodeidx, uint64_t hashval); + +/* + * Return data area embedded in a hash node struct. We maintain an 'offset' + * value in case the common node body struct doesn't leave the data area + * aligned properly. + */ +static inline void * +cicn_hashtb_node_data (cicn_hash_node_t * node) +{ + return ((uint8_t *) (node) + ht_node_data_offset_aligned); +} + +/* + * Use some bits of the low half of the hash to locate a row/bucket in the table + */ +static inline uint32_t +cicn_hashtb_bucket_idx (cicn_hashtb_h h, uint64_t hashval) +{ + return ((uint32_t) (hashval & (h->ht_bucket_count - 1))); +} + +/* + * Return a hash node struct from the free list, or NULL. + * Note that the returned struct is _not_ cleared/zeroed - init + * is up to the caller. + */ +static inline cicn_hash_node_t * +cicn_hashtb_alloc_node (cicn_hashtb_h h) +{ + cicn_hash_node_t *p = NULL; + + if (h->ht_nodes_used < h->ht_node_count) + { + pool_get_aligned (h->ht_nodes, p, 8); + h->ht_nodes_used++; + } + + return (p); +} + +/* + * Release a hashtable node back to the free list when an entry is cleared + */ +void cicn_hashtb_free_node (cicn_hashtb_h h, cicn_hash_node_t * node); + +/* + * Walk a hashtable, iterating through the nodes, keeping context + * in 'ctx' between calls. + * + * Set the context value to CICN_HASH_WALK_CTX_INITIAL to start an iteration. + */ +int cicn_hashtb_next_node (cicn_hashtb_h h, cicn_hash_node_t ** pnode, + uint64_t * ctx); + +/* + * Update the per-entry compression expiration value and type + * for a hashtable node. + */ +int cicn_hashtb_entry_set_expiration (cicn_hashtb_h h, + cicn_hash_node_t * node, + uint16_t entry_timeout, + uint8_t entry_flags); + +int cicn_hashtb_key_to_str (cicn_hashtb_h h, const cicn_hash_node_t * node, + char *buf, int bufsize, int must_fit); + +#endif /* _CICN_HASHTB_H_ */ |