From bac3da61644515f05663789b122554dc77549286 Mon Sep 17 00:00:00 2001 From: Luca Muscariello Date: Thu, 17 Jan 2019 13:47:57 +0100 Subject: This is the first commit of the hicn project Change-Id: I6f2544ad9b9f8891c88cc4bcce3cf19bd3cc863f Signed-off-by: Luca Muscariello --- hicn-plugin/src/hashtb.h | 550 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 550 insertions(+) create mode 100755 hicn-plugin/src/hashtb.h (limited to 'hicn-plugin/src/hashtb.h') diff --git a/hicn-plugin/src/hashtb.h b/hicn-plugin/src/hashtb.h new file mode 100755 index 000000000..1690419a1 --- /dev/null +++ b/hicn-plugin/src/hashtb.h @@ -0,0 +1,550 @@ +/* + * Copyright (c) 2017-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HICN_HASHTB_H__ +#define __HICN_HASHTB_H__ + +#include +#include +#include + +#include "params.h" +#include "parser.h" +#include "error.h" + +/* Handy abbreviations for success status, and for boolean values */ +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +/* + * Lookup is finding a hashtable record whose name matches the name being + * looked up. Most of the lookup work is based on the hash value of the two + * names. Note that the intel cache line size is 64 bytes, and some platforms + * load in 2 cache lines together. - first step is to match a record at the + * bucket/slot level (htab has an array of htbucket_t/htbc_elmt, where each + * bucket has 7 slots to hold indices for entries.) Matching at this level + * implies - the hashes of the lookup name and the record map to the same + * bucket - the high 32 bits of the hashes (slot bce_hash_msb32s) match. Read + * cost (on the hash table size, i.e. ignoring reading the name being looked + * up): - First step normally requires 1 cache line load to pull in the + * 64-byte htbucket_t with the 7 element slot table holding the hash_msb32s. + * - In the event (hopefully rare for a hash table with appropriate number of + * buckets) that more than 7 elements hash to the same bucket, lookup may + * well need to look not only at the static htbc_elmt_t but at the chain of + * dynamically allocated htbc_elmt_t's linked to the static htbc_elmt_t, + * where each of these holds slot entries for additional elements. - Before + * reaching that point, it is initially required is to read in the hash table + * record fields (ht_bucket_buf, htnode buf, etc) holding pointers to the + * arrays, but these cache lines are common to all lookups so will likely + * already be in the cache. - second step is to match at the record level + * (htnode/htkb level) once a slot-level match happens. Matching at this + * level implies the following match - the hash values (the full 64 bits vs. + * bucket+32 msb, above) With siphash, two names hashing to the same 64-bit + * value is quite rare. - the name which, on the hash table side, is stored + * as a list of htkb_t (key buffers). [In some cases, the full name is not + * compared, and a match is assumed based on hash value match. Read cost: - + * htnode_t, in one cache line, holds hash value and index for the htkb at + * the head of the key buffer list - each key buffer (htkb_t) is cache line + * aligned/sized, and holds 60 bytes of the name and requires a cache line + * read. Simplification is that a fib lookup requires 3 cache lines: - bucket + * - htnode - single key buffer (for cases where a name comparision is done) + * + * Some hashtables (for which rare false positives are tolerable) store hash + * values but no keys. (In ISM NDN forwarder, this was used for dcm_dpf: data + * cache manager's dataplane filter, where speed was critical and very rare + * false positives would be detected in the full dcm check.) - No key buffers + * are used (or even allocated at hash table creation). + */ + +#define HICN_HASH_INVALID_IDX ~0 +/* + * for hicn_hashtb_next_node() iterator, this otherwise illegal context value + * indicates first call of iteration. Note: must not be 0, which is a legal + * context value. + */ +#define HICN_HASH_WALK_CTX_INITIAL (~((u64)0)) + +/* + * Key memory allocation scheme. + * + * The key is the bytestring that a hashtable entry is storing, e.g. a fib + * prefix or packet name. The hash of the name is used not just to pick the + * bucket, but also as a surrogate for the actual key value. + * + * Client calls pass key/name as contiguous memory for lookup/add/delete but + * hashable stores its copy of the key/name as a list of one or more hash_key + * structs. - key memory is managed as a list of keys (cache line + * sized/aligned buffers). - If (keysize < 128) then use key struct's full + * 128 bytes - If not, first key struct is head of a linked list of elements + * where the first bytes are used for the key and the last 4 bytes are the + * index of the next entry (or an end marker). - key memory is generally the + * single largest use of memory in the hash table, especially for PIT, as + * names are bigger than node structs (which is also per name/entry). + * + */ + +/* Compute hash node index from node pointer */ +#define NODE_IDX_FROM_NODE(p, h) \ + (u32)((p) - ((h)->ht_nodes)) + +#define HICN_HASH_KEY_BYTES 20 + +typedef struct +{ + struct + { + u8 key[HICN_HASH_KEY_BYTES]; + } ks; /* Entire key in one block */ +} hicn_hash_key_t; + +/* + * Ratio of extra key blocks to allocate, in case the embedded ones aren't + * sufficient. This is the fraction of the number of entries allocated. + */ +#define HICN_HASHTB_KEY_RATIO 8 + +/* + * hash node, used to store a hash table entry; indexed by an entry in a + * bucket. the node contains an embedded key; long keys are stored as chains + * of keys. + * + * The memory block for a node includes space for client data, additional memory + * located off the end of the htnode data structure. Size of client-supplied + * data is fixed, so we can use vpp pools. The PIT and FIB need to ensure + * that they fit within the available data area, or change the size to + * accomodate their needs. + * + * NOTE: app_data_size currently applies to all apps, i.e. bigger FIB nodes + * means (leads to, requires) bigger PCS nodes + */ + +/* Size this so that we can offer 64B aligned on 64-bits to the applications */ +/* New PIT entry syze 62B */ +#define HICN_HASH_NODE_APP_DATA_SIZE 4184 //to support 512 entry //96 //190 to support 50 faces + +/* How to align in the right way */ +typedef struct __attribute__ ((packed)) hicn_hash_node_s +{ + /* Bucket id containing the corresponding hash entry. */ + u32 bucket_id; + + /* Hash entry index in the bucket */ + u32 entry_idx; + + /* Total size of the key */ + u16 hn_keysize; + + /* 1 byte of flags for application use */ + u8 hn_flags; + + u8 _hn_reserved1; /* TBD, to align what follows back to + * 32 */ + + hicn_hash_key_t hn_key; /* Key value embedded in the node, may chain + * to more key buffers if necessary */ + + /* 32B + HICN_HASH_NODE_APP_DATA_SIZE */ + /* Followed by app-specific data (fib or pit or cs entry, e.g.) */ + u8 hn_data[HICN_HASH_NODE_APP_DATA_SIZE]; + +} hicn_hash_node_t; + +#define HICN_HASH_NODE_FLAGS_DEFAULT 0x00 +#define HICN_HASH_NODE_CS_FLAGS 0x01 +#define HICN_HASH_NODE_OVERFLOW_BUCKET 0x02 + +/* + * hicn_hash_entry_t Structure holding all or part of a hash value, a node + * index, and other key pieces of info. + * + * - 128 bytes/bucket with 19 bytes/entry gives 6 entries, or 5 entries plus + * next bucket ptr if overflow Changes in this structure will affect + * hicn_hash_bucket_t + */ +typedef struct __attribute__ ((packed)) hicn_hash_entry_s +{ + + /* MSB of the hash value */ + u64 he_msb64; + + /* Index of node block */ + u32 he_node; + + /* + * Lock to prevent hash_node deletion while there are still interest + * or data referring to it + */ + u32 locks; + + /* A few flags, including 'this points to a chain of buckets' */ + u8 he_flags; + + /* + * Index of the virtual function table corresponding to the dpo_ctx + * strategy + */ + u8 vft_id; + + /* Index of dpo */ + u8 dpo_ctx_id; + +} hicn_hash_entry_t; + +#define HICN_HASH_ENTRY_FLAGS_DEFAULT 0x00 + +/* If entry is PIT this flag is 0 */ +#define HICN_HASH_ENTRY_FLAG_CS_ENTRY 0x01 + +/* + * This entry heads a chain of overflow buckets (we expect to see this only + * in the last entry in a bucket.) In this case, the index is to an overflow + * bucket rather than to a single node block. + */ +#define HICN_HASH_ENTRY_FLAG_OVERFLOW 0x04 + +/* This entry has been marked for deletion */ +#define HICN_HASH_ENTRY_FLAG_DELETED 0x08 + +/* Use fast he_timeout units for expiration, slow if not */ +#define HICN_HASH_ENTRY_FLAG_FAST_TIMEOUT 0x10 + +/* + * hash bucket: Contains an array of entries. Cache line sized/aligned, so no + * room for extra fields unless bucket size is increased to 2 cache lines or + * the entry struct shrinks. + */ + +/* + * Overflow bucket ratio as a fraction of the fixed/configured count; a pool + * of hash buckets used if a row in the fixed table overflows. + */ +#define HICN_HASHTB_BUCKET_ENTRIES 6 + +typedef struct __attribute__ ((packed)) +{ + hicn_hash_entry_t hb_entries[HICN_HASHTB_BUCKET_ENTRIES]; + u64 align1; + u32 align2; + u16 align3; +} hicn_hash_bucket_t; + +/* Overall target fill-factor for the hashtable */ +#define HICN_HASHTB_FILL_FACTOR 4 + +#define HICN_HASHTB_MIN_ENTRIES (1 << 4) // includes dummy node 0 entry +#define HICN_HASHTB_MAX_ENTRIES (1 << 24) + +#define HICN_HASHTB_MIN_BUCKETS (1 << 10) + +/* + * htab_t + * + * Hash table main structure. + * + * Contains - pointers to dynamically allocated arrays of cache-line + * sized/aligned structures (buckets, nodes, keys). Put frequently accessed + * fields in the first cache line. + */ +typedef struct hicn_hashtb_s +{ + + /* 8B - main array of hash buckets */ + hicn_hash_bucket_t *ht_buckets; + + /* 8B - just-in-case block of overflow buckets */ + hicn_hash_bucket_t *ht_overflow_buckets; + + /* 8B - block of nodes associated with entries in buckets */ + hicn_hash_node_t *ht_nodes; + + /* Flags */ + u32 ht_flags; + + /* Count of buckets allocated in the main array */ + u32 ht_bucket_count; + + /* Count of overflow buckets allocated */ + u32 ht_overflow_bucket_count; + u32 ht_overflow_buckets_used; + + /* Count of nodes allocated */ + u32 ht_node_count; + u32 ht_nodes_used; + + /* Count of overflow key structs allocated */ + u32 ht_key_count; + u32 ht_keys_used; + +} hicn_hashtb_t, *hicn_hashtb_h; + +/* + * Offset to aligned start of additional data (PIT/CS, FIB) embedded in each + * node. + */ +extern u32 ht_node_data_offset_aligned; + +/* Flags for hashtable */ + +#define HICN_HASHTB_FLAGS_DEFAULT 0x00 + +/* + * Don't use the last entry in each bucket - only use it for overflow. We use + * this for the FIB, currently, so that we can support in-place FIB changes + * that would be difficult if there were hash entry copies as part of + * overflow handling. + */ +#define HICN_HASHTB_FLAG_USE_SEVEN 0x04 +#define HICN_HASHTB_FLAG_KEY_FMT_PFX 0x08 +#define HICN_HASHTB_FLAG_KEY_FMT_NAME 0x10 + +/* + * Max prefix name components we'll support in our incremental hashing; + * currently used only for LPM in the FIB. + */ +#define HICN_HASHTB_MAX_NAME_COMPS HICN_PARAM_FIB_ENTRY_PFX_COMPS_MAX + +/* + * APIs and inlines + */ + +/* Compute hash node index from node pointer */ +static inline u32 +hicn_hashtb_node_idx_from_node (hicn_hashtb_h h, hicn_hash_node_t * p) +{ + return (p - h->ht_nodes); +} + +/* Retrieve a hashtable node by node index */ +static inline hicn_hash_node_t * +hicn_hashtb_node_from_idx (hicn_hashtb_h h, u32 idx) +{ + return (pool_elt_at_index (h->ht_nodes, idx)); +} + +/* Allocate a brand-new hashtable */ +int +hicn_hashtb_alloc (hicn_hashtb_h * ph, u32 max_elems, size_t app_data_size); + +/* Free a hashtable, including its embedded arrays */ +int hicn_hashtb_free (hicn_hashtb_h * ph); + +/* Hash a bytestring, currently using bihash */ +u64 hicn_hashtb_hash_bytestring (const u8 * key, u32 keylen); + +always_inline hicn_hash_entry_t * +hicn_hashtb_get_entry (hicn_hashtb_h h, u32 entry_idx, u32 bucket_id, + u8 bucket_overflow) +{ + hicn_hash_bucket_t *bucket; + if (bucket_overflow) + bucket = pool_elt_at_index (h->ht_overflow_buckets, bucket_id); + else + bucket = (hicn_hash_bucket_t *) (h->ht_buckets + bucket_id); + + return &(bucket->hb_entries[entry_idx]); +} + +/* Hash a name, currently using bihash */ +always_inline u64 +hicn_hashtb_hash_name (const u8 * key, u16 keylen) +{ + if (key != NULL && keylen == HICN_V4_NAME_LEN) + { + clib_bihash_kv_8_8_t kv; + kv.key = ((u64 *) key)[0]; + return clib_bihash_hash_8_8 (&kv); + } + else if (key != NULL && keylen == HICN_V6_NAME_LEN) + { + clib_bihash_kv_24_8_t kv; + kv.key[0] = ((u64 *) key)[0]; + kv.key[1] = ((u64 *) key)[1]; + kv.key[2] = ((u32 *) key)[4]; + return clib_bihash_hash_24_8 (&kv); + } + else + { + return (-1LL); + } +} + + +/* + * Prepare a hashtable node for insertion, supplying the key and computed + * hash info. This sets up the node->key relationship, possibly allocating + * overflow key buffers. + */ +void +hicn_hashtb_init_node (hicn_hashtb_h h, hicn_hash_node_t * node, + const u8 * key, u32 keylen); + +/* + * Insert a node into the hashtable. We expect the caller has used the init + * api to set the node key and hash info, and populated the extra data area + * (if any) - or done the equivalent work itself. + */ +int +hicn_hashtb_insert (hicn_hashtb_h h, hicn_hash_node_t * node, + hicn_hash_entry_t ** hash_entry, u64 hash, + u32 * node_id, + u8 * dpo_ctx_id, u8 * vft_id, u8 * is_cs, + u8 * hash_entry_id, u32 * bucket_id, + u8 * bucket_is_overflow); + +/* + * Basic api to lookup a specific hash+key tuple. This does the entire lookup + * operation, retrieving node structs and comparing keys, so it's not + * optimized for prefetching or high performance. + * + * Returns zero and mails back a node on success, errno otherwise. + */ +int +hicn_hashtb_lookup_node (hicn_hashtb_h h, const u8 * key, + u32 keylen, u64 hashval, u8 is_data, + u32 * node_id, u8 * dpo_ctx_id, u8 * vft_id, + u8 * is_cs, u8 * hash_entry_id, u32 * bucket_id, + u8 * bucket_is_overflow); + +/* + * Extended api to lookup a specific hash+key tuple. The implementation + * allows the caller to locate nodes that are marked for deletion; this is + * part of some hashtable applications, such as the FIB. + * + * This does the entire lookup operation, retrieving node structs and comparing + * keys, so it's not optimized for prefetching or high performance. + * + * Returns zero and mails back a node on success, errno otherwise. + */ +int +hicn_hashtb_lookup_node_ex (hicn_hashtb_h h, const u8 * key, + u32 keylen, u64 hashval, u8 is_data, + int include_deleted_p, u32 * node_id, + u8 * dpo_ctx_id, u8 * vft_id, u8 * is_cs, + u8 * hash_entry_id, u32 * bucket_id, + u8 * bucket_is_overflow); + +/** + * @brief Compares the key in the node with the given key + * + * This function allows to split the hash verification from the comparison of + * the entire key. Useful to exploit prefertching. + * @result 1 if equals, 0 otherwise + */ +int hicn_node_compare (const u8 * key, u32 keylen, hicn_hash_node_t * node); + +/* + * Remove a node from a hashtable using the node itself. The internal data + * structs are cleaned up, but the node struct itself is not: the caller must + * free the node itself. + */ +void hicn_hashtb_remove_node (hicn_hashtb_h h, hicn_hash_node_t * node, + u64 hashval); + +/* + * Delete a node from a hashtable using the node itself, and delete/free the + * node. Caller's pointer is cleared on success. + */ +void hicn_hashtb_delete (hicn_hashtb_h h, hicn_hash_node_t ** pnode, + u64 hashval); + +/* + * Utility to init a new entry in a hashtable bucket/row. We use this to add + * new a node+hash, and to clear out an entry during removal. + */ +void +hicn_hashtb_init_entry (hicn_hash_entry_t * entry, + u32 nodeidx, u64 hashval, u32 locks); + + +/* + * Return data area embedded in a hash node struct. We maintain an 'offset' + * value in case the common node body struct doesn't leave the data area + * aligned properly. + */ +static inline void * +hicn_hashtb_node_data (hicn_hash_node_t * node) +{ + return ((u8 *) (node) + ht_node_data_offset_aligned); +} + +/* + * Use some bits of the low half of the hash to locate a row/bucket in the + * table + */ +static inline u32 +hicn_hashtb_bucket_idx (hicn_hashtb_h h, u64 hashval) +{ + return ((u32) (hashval & (h->ht_bucket_count - 1))); +} + +/* + * Return a hash node struct from the free list, or NULL. Note that the + * returned struct is _not_ cleared/zeroed - init is up to the caller. + */ +static inline hicn_hash_node_t * +hicn_hashtb_alloc_node (hicn_hashtb_h h) +{ + hicn_hash_node_t *p = NULL; + + if (h->ht_nodes_used < h->ht_node_count) + { + pool_get_aligned (h->ht_nodes, p, 8); + h->ht_nodes_used++; + } + return (p); +} + +/* + * Release a hashtable node back to the free list when an entry is cleared + */ +void hicn_hashtb_free_node (hicn_hashtb_h h, hicn_hash_node_t * node); + +/* + * Walk a hashtable, iterating through the nodes, keeping context in 'ctx' + * between calls. + * + * Set the context value to HICN_HASH_WALK_CTX_INITIAL to start an iteration. + */ +int +hicn_hashtb_next_node (hicn_hashtb_h h, hicn_hash_node_t ** pnode, u64 * ctx); + + +int +hicn_hashtb_key_to_str (hicn_hashtb_h h, const hicn_hash_node_t * node, + char *buf, int bufsize, int must_fit); + +/* + * single hash full name can pass offset for two hashes calculation in case + * we use CS and PIT in a two steps hashes (prefix + seqno) + */ +always_inline int +hicn_hashtb_fullhash (const u8 * name, u16 namelen, u64 * name_hash) +{ + *name_hash = hicn_hashtb_hash_name (name, namelen); + return (*name_hash != (-1LL) ? HICN_ERROR_NONE : HICN_ERROR_HASHTB_INVAL); +} + +#endif /* // __HICN_HASHTB_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: eval: (c-set-style "gnu") End: + */ -- cgit 1.2.3-korg