1 files changed, 526 insertions, 0 deletions
diff --git a/cicn-plugin/cicn/cicn_hashtb.h b/cicn-plugin/cicn/cicn_hashtb.h
new file mode 100644
index 00000000..c7522dd1
--- /dev/null
+++ b/cicn-plugin/cicn/cicn_hashtb.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cicn_hashtb.h: Fast-path, vpp-aware hashtable, the base for the PIT/CS and FIB
+ * used in the cicn forwarder.
+ *
+ * - As is the case in other areas, we can't share headers between the vpp code
+ *   and the cicn/cndn code: there are conflicting definitions.
+ *
+ */
+
+#ifndef _CICN_HASHTB_H_
+#define _CICN_HASHTB_H_ 1
+
+#if !CICN_VPP_PLUGIN
+#error "cicn-internal file included externally"
+#endif
+
+#include "cicn_std.h"
+#include "cicn_params.h"
+
+/* Handy abbreviations for success status, and for boolean values */
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/*
+ * Lookup is finding a hashtable record whose name matches the name
+ * being looked up.  Most of the lookup work is based on the hash
+ * value of the two names. Note that the intel cache line size is 64
+ * bytes, and some platforms load in 2 cache lines together.
+ * - first step is to match a record at the bucket/slot level
+ *   (htab has an array of htbucket_t/htbc_elmt, where each bucket has
+ *   7 slots to hold indices for entries.) Matching at this level implies
+ *   - the hashes of the lookup name and the record map to the
+ *     same bucket
+ *   - the high 32 bits of the hashes (slot bce_hash_msb32s) match.
+ *   Read cost (on the hash table size, i.e. ignoring reading the
+ *   name being looked up):
+ *   - First step normally requires 1 cache line load to pull in
+ *     the 64-byte htbucket_t with the 7 element slot table holding the
+ *     hash_msb32s.
+ *   - In the event (hopefully rare for a hash table with
+ *     appropriate number of buckets) that more than 7 elements
+ *     hash to the same bucket, lookup may well need to look not
+ *     only at the static htbc_elmt_t but at the chain of dynamically
+ *     allocated htbc_elmt_t's linked to the static htbc_elmt_t, where
+ *     each of these holds slot entries for additional elements.
+ *   - Before reaching that point, it is initially required is to read in
+ *     the hash table record fields (ht_bucket_buf, htnode buf, etc)
+ *     holding pointers to the arrays, but these cache lines are common
+ *     to all lookups so will likely already be in the cache.
+ * - second step is to match at the record level (htnode/htkb level)
+ *   once a slot-level match happens. Matching at this level implies
+ *   the following match
+ *   - the hash values (the full 64 bits vs. bucket+32 msb, above)
+ *     With siphash, two names hashing to the same 64-bit value is
+ *     quite rare.
+ *   - the name which, on the hash table side, is stored as a list
+ *     of htkb_t (key buffers). [In some cases, the full name is
+ *     not compared, and a match is assumed based on hash value match.
+ *   Read cost:
+ *   - htnode_t, in one cache line, holds hash value and index for the
+ *     htkb at the head of the key buffer list
+ *   - each key buffer (htkb_t) is cache line aligned/sized, and holds
+ *     60 bytes of the name and requires a cache line read.
+ * Simplification is that a fib lookup requires 3 cache lines:
+ * - bucket
+ * - htnode
+ * - single key buffer (for cases where a name comparision is done)
+ *
+ * Some hashtables (for which rare false positives are tolerable)
+ * store hash values but no keys. (In ISM NDN forwarder, this was
+ * used for dcm_dpf: data cache manager's dataplane filter, where
+ * speed was critical and very rare false positives would be detected
+ * in the full dcm check.)
+ * - No key buffers are used (or even allocated at hash table creation).
+ */
+
+#define CICN_HASH_INVALID_IDX  ~0
+/* for cicn_hashtb_next_node() iterator, this otherwise illegal context value
+ * indicates first call of iteration.
+ * Note: must not be 0, which is a legal context value.
+ */
+#define CICN_HASH_WALK_CTX_INITIAL (~((uint64_t)0))
+
+/*
+ * Key memory allocation scheme.
+ *
+ * The key is the bytestring that a hashtable entry is
+ * storing, e.g. a fib prefix or packet name. The hash of the
+ * name is used not just to pick the bucket, but also as a surrogate
+ * for the actual key value.
+ *
+ * Client calls pass key/name as contiguous memory for lookup/add/delete
+ * but hashable stores its copy of the key/name as a list of one or
+ * more hash_key structs.
+ * - key memory is managed as a list of keys (cache line
+ *   sized/aligned buffers).
+ *   - If (keysize < 128) then use key struct's full 128 bytes
+ *   - If not, first key struct is head of a linked list of elements
+ *     where the first bytes are used for the key and the last 4 bytes
+ *     are the index of the next entry (or an end marker).
+ * - key memory is generally the single largest use of memory
+ *   in the hash table, especially for PIT, as names are bigger
+ *   than node structs (which is also per name/entry).
+ *
+ */
+
+#define CICN_HASH_KEY_BYTES   128
+#define CICN_HASH_KEY_LIST_BYTES (CICN_HASH_KEY_BYTES - sizeof(uint32_t))
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      uint8_t key[CICN_HASH_KEY_BYTES];
+    } ks;			/* Entire key in one block */
+    struct
+    {
+      uint8_t key[CICN_HASH_KEY_LIST_BYTES];
+      uint32_t idx_next;	/* Next keybuf idx */
+    } kl;			/* Key in a list of blocks */
+  };
+} cicn_hash_key_t;
+
+/* Ratio of extra key blocks to allocate, in case the embedded ones aren't
+ * sufficient. This is the fraction of the number of entries allocated.
+ */
+#define CICN_HASHTB_KEY_RATIO 8
+
+/*
+ * hash node, used to store a hash table entry; indexed by an entry in a bucket.
+ * the node contains an embedded key; long keys are stored as chains of keys.
+ *
+ * The memory block for a node includes space for client data,
+ * additional memory located off the end of the htnode data structure.
+ * Size of client-supplied data is fixed, so we can use vpp pools. The PIT
+ * and FIB need to ensure that they fit within the available data area,
+ * or change the size to accomodate their needs.
+ *
+ * NOTE: app_data_size currently applies to all apps, i.e. bigger FIB
+ *       nodes means (leads to, requires) bigger PCS nodes
+ */
+
+/* Size this so that we can offer 64B aligned on 64-bits to the applications */
+#define CICN_HASH_NODE_APP_DATA_SIZE 72	/* TODO -- big enough? */
+
+typedef struct cicn_hash_node_s
+{
+
+  uint64_t hn_hash;		/* Complete hash value */
+
+  /* Total size of the key (chained in several key structs if necessary) */
+  uint16_t hn_keysize;
+
+  /* 1 byte of flags for application use */
+  uint8_t hn_flags;
+
+  uint8_t _hn_reserved1;	/* TBD, to align what follows back to 32 */
+
+  cicn_hash_key_t hn_key;	/* Key value embedded in the node, may
+				 * chain to more key buffers if necessary
+				 */
+  /* TODO -- keep array of 'next keys' so we can prefetch better? */
+
+  /* Followed by app-specific data (fib or pit or cs entry, e.g.) */
+  uint8_t hn_data[CICN_HASH_NODE_APP_DATA_SIZE];
+
+} cicn_hash_node_t;
+
+#define CICN_HASH_NODE_FLAGS_DEFAULT   0x00
+
+
+/*
+ * cicn_hash_entry_t
+ * Structure holding all or part of a hash value, a node index, and other
+ * key pieces of info.
+ *
+ * - 128 bytes/bucket with 16 bytes/entry gives 8 entries,
+ *    or 7 entries plus next bucket ptr if overflow
+ */
+typedef struct
+{
+
+  /* MSB of the hash value */
+  uint64_t he_msb64;
+
+  /* Index of node block */
+  uint32_t he_node;
+
+  /* Timeout value, units and scheme still TBD */
+  uint16_t he_timeout;
+
+  /* A few flags, including 'this points to a chain of buckets' */
+  uint8_t he_flags;
+
+  /* A byte for domain/application data (e.g. 'virtual fib entry' */
+  uint8_t he_appval;
+
+} cicn_hash_entry_t;
+
+#define CICN_HASH_ENTRY_FLAGS_DEFAULT  0x00
+
+/* This entry heads a chain of overflow buckets (we expect to see this
+ * only in the last entry in a bucket.) In this case, the index is
+ * to an overflow bucket rather than to a single node block.
+ */
+#define CICN_HASH_ENTRY_FLAG_OVERFLOW 0x01
+
+/* This entry has been marked for deletion */
+#define CICN_HASH_ENTRY_FLAG_DELETED  0x02
+
+/* Use fast he_timeout units for expiration, slow if not */
+#define CICN_HASH_ENTRY_FLAG_FAST_TIMEOUT 0x04
+
+/*
+ * hash bucket: Contains an array of entries.
+ * Cache line sized/aligned, so no room for extra fields unless
+ * bucket size is increased to 2 cache lines or the entry struct
+ * shrinks.
+ */
+
+/*
+ * Overflow bucket ratio as a fraction of the fixed/configured count;
+ * a pool of hash buckets used if a row in the fixed table overflows.
+ */
+#define CICN_HASHTB_OVERFLOW_FRACTION 8
+
+#define CICN_HASHTB_BUCKET_ENTRIES 8
+
+typedef struct
+{
+  cicn_hash_entry_t hb_entries[CICN_HASHTB_BUCKET_ENTRIES];
+} cicn_hash_bucket_t;
+
+/* Overall target fill-factor for the hashtable */
+#define CICN_HASHTB_FILL_FACTOR    4
+
+#define CICN_HASHTB_MIN_ENTRIES  (1 << 4)	// includes dummy node 0 entry
+#define CICN_HASHTB_MAX_ENTRIES  (1 << 24)
+
+#define CICN_HASHTB_MIN_BUCKETS (1 << 10)
+
+/*
+ * htab_t
+ *
+ * Hash table main structure.
+ *
+ * Contains
+ * - pointers to dynamically allocated arrays of cache-line
+ *   sized/aligned structures (buckets, nodes, keys).
+ * Put frequently accessed fields in the first cache line.
+ */
+typedef struct cicn_hashtb_s
+{
+
+  /* 8B - main array of hash buckets */
+  cicn_hash_bucket_t *ht_buckets;
+
+  /* 8B - just-in-case block of overflow buckets */
+  cicn_hash_bucket_t *ht_overflow_buckets;
+
+  /* 8B - block of nodes associated with entries in buckets */
+  cicn_hash_node_t *ht_nodes;
+
+  /*
+   * 8B - just-in-case block of extra keys, used when a key is too
+   *      large to fit in a node's embedded key area
+   */
+  cicn_hash_key_t *ht_extra_keys;
+
+  /* Flags */
+  uint32_t ht_flags;
+
+  /* Count of buckets allocated in the main array */
+  uint32_t ht_bucket_count;
+
+  /* Count of overflow buckets allocated */
+  uint32_t ht_overflow_bucket_count;
+  uint32_t ht_overflow_buckets_used;
+
+  /* Count of nodes allocated */
+  uint32_t ht_node_count;
+  uint32_t ht_nodes_used;
+
+  /* Count of overflow key structs allocated */
+  uint32_t ht_key_count;
+  uint32_t ht_keys_used;
+
+  /* TODO -- stats? */
+
+} cicn_hashtb_t, *cicn_hashtb_h;
+
+/* Offset to aligned start of additional data (PIT/CS, FIB)
+ * embedded in each node.
+ */
+extern uint32_t ht_node_data_offset_aligned;
+
+/* Flags for hashtable */
+
+#define CICN_HASHTB_FLAGS_DEFAULT    0x00
+
+/* Don't use the last/eighth entry in each bucket - only use it for overflow.
+ * We use this for the FIB, currently, so that we can support in-place
+ * FIB changes that would be difficult if there were hash entry copies
+ * as part of overflow handling.
+ */
+#define CICN_HASHTB_FLAG_USE_SEVEN      0x01
+#define CICN_HASHTB_FLAG_KEY_FMT_PFX    0x02
+#define CICN_HASHTB_FLAG_KEY_FMT_NAME   0x04
+
+/*
+ * Max prefix name components we'll support in our incremental hashing;
+ * currently used only for LPM in the FIB.
+ */
+#define CICN_HASHTB_MAX_NAME_COMPS CICN_PARAM_FIB_ENTRY_PFX_COMPS_MAX
+
+/*
+ * Info about an LPM hash computation on a prefix or name.
+ */
+typedef struct cicn_prefix_hashinf_s
+{
+
+  const uint8_t *pfx_ptr;
+  uint16_t pfx_len;
+
+  uint16_t pfx_count;		/* Number of prefix entries used */
+  uint8_t pfx_overflow;		/* true if pfx has extra components (not hashed) */
+
+  uint16_t pfx_lens[CICN_HASHTB_MAX_NAME_COMPS];
+  uint64_t pfx_hashes[CICN_HASHTB_MAX_NAME_COMPS];
+
+  uint64_t pfx_full_hash;
+
+} cicn_prefix_hashinf_t;
+
+/*
+ * APIs and inlines
+ */
+
+/* Compute hash node index from node pointer */
+static inline uint32_t
+cicn_hashtb_node_idx_from_node (cicn_hashtb_h h, cicn_hash_node_t * p)
+{
+  return (p - h->ht_nodes);
+}
+
+/* Retrieve a hashtable node by node index */
+static inline cicn_hash_node_t *
+cicn_hashtb_node_from_idx (cicn_hashtb_h h, uint32_t idx)
+{
+  return (pool_elt_at_index (h->ht_nodes, idx));
+}
+
+/* Allocate a brand-new hashtable */
+int cicn_hashtb_alloc (cicn_hashtb_h * ph, uint32_t max_elems,
+		       size_t app_data_size);
+
+/* Free a hashtable, including its embedded arrays */
+int cicn_hashtb_free (cicn_hashtb_h * ph);
+
+/* Hash a bytestring, currently using siphash64 (for UT) */
+uint64_t cicn_hashtb_hash_bytestring (const uint8_t * key, uint32_t keylen);
+
+/* Hash a name, currently using siphash64 */
+uint64_t cicn_hashtb_hash_name (const uint8_t * key, uint32_t keylen);
+
+/*
+ * Hash a name, with incremental prefix hashing (for LPM, e.g.) If
+ * 'limit' > 0, limit computation of incrementals.
+ * if 'is_full_name', we expect that 'name' points to the beginning
+ * of the entire name TLV; otherwise, 'name' just points to the first
+ * name-comp sub-tlv, and we will _not_ compute the full-name hash.
+ * Note that 'name' and 'namelen' are captured in the prefix-hash
+ * context struct, to make the LPM/FIB apis a little cleaner.
+ */
+int cicn_hashtb_hash_prefixes (const uint8_t * name, uint16_t namelen,
+			       int is_full_name, cicn_prefix_hashinf_t * pfx,
+			       int limit);
+
+/*
+ * Prepare a hashtable node for insertion, supplying the key
+ * and computed hash info. This sets up the node->key relationship, possibly
+ * allocating overflow key buffers.
+ */
+int cicn_hashtb_init_node (cicn_hashtb_h h, cicn_hash_node_t * node,
+			   uint64_t hashval,
+			   const uint8_t * key, uint32_t keylen);
+
+/*
+ * Insert a node into the hashtable. We expect the caller has used the
+ * init api to set the node key and hash info, and populated the extra
+ * data area (if any) - or done the equivalent work itself.
+ */
+int cicn_hashtb_insert (cicn_hashtb_h h, cicn_hash_node_t * node);
+
+/*
+ * Basic api to lookup a specific hash+key tuple. This does the entire
+ * lookup operation, retrieving node structs and comparing keys,
+ * so it's not optimized for prefetching or high performance.
+ *
+ * Returns zero and mails back a node on success, errno otherwise.
+ */
+int cicn_hashtb_lookup_node (cicn_hashtb_h h, const uint8_t * key,
+			     uint32_t keylen, uint64_t hashval,
+			     cicn_hash_node_t ** nodep);
+
+/*
+ * Extended api to lookup a specific hash+key tuple. The implementation
+ * allows the caller to locate nodes that are marked for deletion; this
+ * is part of some hashtable applications, such as the FIB.
+ *
+ * This does the entire lookup operation, retrieving node structs
+ * and comparing keys, so it's not optimized for prefetching or high performance.
+ *
+ * Returns zero and mails back a node on success, errno otherwise.
+ */
+int
+cicn_hashtb_lookup_node_ex (cicn_hashtb_h h, const uint8_t * key,
+			    uint32_t keylen, uint64_t hashval,
+			    int include_deleted_p, cicn_hash_node_t ** nodep);
+
+/*
+ * Remove a node from a hashtable using the node itself. The internal
+ * data structs are cleaned up, but the node struct itself is not: the caller
+ * must free the node itself.
+ */
+int cicn_hashtb_remove_node (cicn_hashtb_h h, cicn_hash_node_t * node);
+
+/*
+ * Delete a node from a hashtable using the node itself, and delete/free
+ * the node.  Caller's pointer is cleared on success.
+ */
+int cicn_hashtb_delete (cicn_hashtb_h h, cicn_hash_node_t ** pnode);
+
+/*
+ * Utility to init a new entry in a hashtable bucket/row. We use this
+ * to add new a node+hash, and to clear out an entry during removal.
+ */
+void cicn_hashtb_init_entry (cicn_hash_entry_t * entry,
+			     uint32_t nodeidx, uint64_t hashval);
+
+/*
+ * Return data area embedded in a hash node struct. We maintain an 'offset'
+ * value in case the common node body struct doesn't leave the data area
+ * aligned properly.
+ */
+static inline void *
+cicn_hashtb_node_data (cicn_hash_node_t * node)
+{
+  return ((uint8_t *) (node) + ht_node_data_offset_aligned);
+}
+
+/*
+ * Use some bits of the low half of the hash to locate a row/bucket in the table
+ */
+static inline uint32_t
+cicn_hashtb_bucket_idx (cicn_hashtb_h h, uint64_t hashval)
+{
+  return ((uint32_t) (hashval & (h->ht_bucket_count - 1)));
+}
+
+/*
+ * Return a hash node struct from the free list, or NULL.
+ * Note that the returned struct is _not_ cleared/zeroed - init
+ * is up to the caller.
+ */
+static inline cicn_hash_node_t *
+cicn_hashtb_alloc_node (cicn_hashtb_h h)
+{
+  cicn_hash_node_t *p = NULL;
+
+  if (h->ht_nodes_used < h->ht_node_count)
+    {
+      pool_get_aligned (h->ht_nodes, p, 8);
+      h->ht_nodes_used++;
+    }
+
+  return (p);
+}
+
+/*
+ * Release a hashtable node back to the free list when an entry is cleared
+ */
+void cicn_hashtb_free_node (cicn_hashtb_h h, cicn_hash_node_t * node);
+
+/*
+ * Walk a hashtable, iterating through the nodes, keeping context
+ * in 'ctx' between calls.
+ *
+ * Set the context value to CICN_HASH_WALK_CTX_INITIAL to start an iteration.
+ */
+int cicn_hashtb_next_node (cicn_hashtb_h h, cicn_hash_node_t ** pnode,
+			   uint64_t * ctx);
+
+/*
+ * Update the per-entry compression expiration value and type
+ * for a hashtable node.
+ */
+int cicn_hashtb_entry_set_expiration (cicn_hashtb_h h,
+				      cicn_hash_node_t * node,
+				      uint16_t entry_timeout,
+				      uint8_t entry_flags);
+
+int cicn_hashtb_key_to_str (cicn_hashtb_h h, const cicn_hash_node_t * node,
+			    char *buf, int bufsize, int must_fit);
+
+#endif /* _CICN_HASHTB_H_ */