/*
 * Copyright (c) 2017 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * cicn_hashtb.h: Fast-path, vpp-aware hashtable, the base for the PIT/CS and FIB
 * used in the cicn forwarder.
 *
 * - As is the case in other areas, we can't share headers between the vpp code
 *   and the cicn/cndn code: there are conflicting definitions.
 *
 */

#ifndef _CICN_HASHTB_H_
#define _CICN_HASHTB_H_ 1

#if !CICN_VPP_PLUGIN
#error "cicn-internal file included externally"
#endif

#include "cicn_std.h"
#include "cicn_params.h"

/* Handy abbreviations for success status, and for boolean values */
#ifndef TRUE
#define TRUE 1
#endif

#ifndef FALSE
#define FALSE 0
#endif

/*
 * Lookup is finding a hashtable record whose name matches the name
 * being looked up.  Most of the lookup work is based on the hash
 * value of the two names. Note that the intel cache line size is 64
 * bytes, and some platforms load in 2 cache lines together.
 * - first step is to match a record at the bucket/slot level
 *   (htab has an array of htbucket_t/htbc_elmt, where each bucket has
 *   7 slots to hold indices for entries.) Matching at this level implies
 *   - the hashes of the lookup name and the record map to the
 *     same bucket
 *   - the high 32 bits of the hashes (slot bce_hash_msb32s) match.
 *   Read cost (on the hash table size, i.e. ignoring reading the
 *   name being looked up):
 *   - First step normally requires 1 cache line load to pull in
 *     the 64-byte htbucket_t with the 7 element slot table holding the
 *     hash_msb32s.
 *   - In the event (hopefully rare for a hash table with
 *     appropriate number of buckets) that more than 7 elements
 *     hash to the same bucket, lookup may well need to look not
 *     only at the static htbc_elmt_t but at the chain of dynamically
 *     allocated htbc_elmt_t's linked to the static htbc_elmt_t, where
 *     each of these holds slot entries for additional elements.
 *   - Before reaching that point, it is initially required is to read in
 *     the hash table record fields (ht_bucket_buf, htnode buf, etc)
 *     holding pointers to the arrays, but these cache lines are common
 *     to all lookups so will likely already be in the cache.
 * - second step is to match at the record level (htnode/htkb level)
 *   once a slot-level match happens. Matching at this level implies
 *   the following match
 *   - the hash values (the full 64 bits vs. bucket+32 msb, above)
 *     With siphash, two names hashing to the same 64-bit value is
 *     quite rare.
 *   - the name which, on the hash table side, is stored as a list
 *     of htkb_t (key buffers). [In some cases, the full name is
 *     not compared, and a match is assumed based on hash value match.
 *   Read cost:
 *   - htnode_t, in one cache line, holds hash value and index for the
 *     htkb at the head of the key buffer list
 *   - each key buffer (htkb_t) is cache line aligned/sized, and holds
 *     60 bytes of the name and requires a cache line read.
 * Simplification is that a fib lookup requires 3 cache lines:
 * - bucket
 * - htnode
 * - single key buffer (for cases where a name comparision is done)
 *
 * Some hashtables (for which rare false positives are tolerable)
 * store hash values but no keys. (In ISM NDN forwarder, this was
 * used for dcm_dpf: data cache manager's dataplane filter, where
 * speed was critical and very rare false positives would be detected
 * in the full dcm check.)
 * - No key buffers are used (or even allocated at hash table creation).
 */

#define CICN_HASH_INVALID_IDX  ~0
/* for cicn_hashtb_next_node() iterator, this otherwise illegal context value
 * indicates first call of iteration.
 * Note: must not be 0, which is a legal context value.
 */
#define CICN_HASH_WALK_CTX_INITIAL (~((uint64_t)0))

/*
 * Key memory allocation scheme.
 *
 * The key is the bytestring that a hashtable entry is
 * storing, e.g. a fib prefix or packet name. The hash of the
 * name is used not just to pick the bucket, but also as a surrogate
 * for the actual key value.
 *
 * Client calls pass key/name as contiguous memory for lookup/add/delete
 * but hashable stores its copy of the key/name as a list of one or
 * more hash_key structs.
 * - key memory is managed as a list of keys (cache line
 *   sized/aligned buffers).
 *   - If (keysize < 128) then use key struct's full 128 bytes
 *   - If not, first key struct is head of a linked list of elements
 *     where the first bytes are used for the key and the last 4 bytes
 *     are the index of the next entry (or an end marker).
 * - key memory is generally the single largest use of memory
 *   in the hash table, especially for PIT, as names are bigger
 *   than node structs (which is also per name/entry).
 *
 */

#define CICN_HASH_KEY_BYTES   128
#define CICN_HASH_KEY_LIST_BYTES (CICN_HASH_KEY_BYTES - sizeof(uint32_t))
typedef struct
{
  union
  {
    struct
    {
      uint8_t key[CICN_HASH_KEY_BYTES];
    } ks;			/* Entire key in one block */
    struct
    {
      uint8_t key[CICN_HASH_KEY_LIST_BYTES];
      uint32_t idx_next;	/* Next keybuf idx */
    } kl;			/* Key in a list of blocks */
  };
} cicn_hash_key_t;

/* Ratio of extra key blocks to allocate, in case the embedded ones aren't
 * sufficient. This is the fraction of the number of entries allocated.
 */
#define CICN_HASHTB_KEY_RATIO 8

/*
 * hash node, used to store a hash table entry; indexed by an entry in a bucket.
 * the node contains an embedded key; long keys are stored as chains of keys.
 *
 * The memory block for a node includes space for client data,
 * additional memory located off the end of the htnode data structure.
 * Size of client-supplied data is fixed, so we can use vpp pools. The PIT
 * and FIB need to ensure that they fit within the available data area,
 * or change the size to accomodate their needs.
 *
 * NOTE: app_data_size currently applies to all apps, i.e. bigger FIB
 *       nodes means (leads to, requires) bigger PCS nodes
 */

/* Size this so that we can offer 64B aligned on 64-bits to the applications */
#define CICN_HASH_NODE_APP_DATA_SIZE 72	/* TODO -- big enough? */

typedef struct cicn_hash_node_s
{

  uint64_t hn_hash;		/* Complete hash value */

  /* Total size of the key (chained in several key structs if necessary) */
  uint16_t hn_keysize;

  /* 1 byte of flags for application use */
  uint8_t hn_flags;

  uint8_t _hn_reserved1;	/* TBD, to align what follows back to 32 */

  cicn_hash_key_t hn_key;	/* Key value embedded in the node, may
				 * chain to more key buffers if necessary
				 */
  /* TODO -- keep array of 'next keys' so we can prefetch better? */

  /* Followed by app-specific data (fib or pit or cs entry, e.g.) */
  uint8_t hn_data[CICN_HASH_NODE_APP_DATA_SIZE];

} cicn_hash_node_t;

#define CICN_HASH_NODE_FLAGS_DEFAULT   0x00


/*
 * cicn_hash_entry_t
 * Structure holding all or part of a hash value, a node index, and other
 * key pieces of info.
 *
 * - 128 bytes/bucket with 16 bytes/entry gives 8 entries,
 *    or 7 entries plus next bucket ptr if overflow
 */
typedef struct
{

  /* MSB of the hash value */
  uint64_t he_msb64;

  /* Index of node block */
  uint32_t he_node;

  /* Timeout value, units and scheme still TBD */
  uint16_t he_timeout;

  /* A few flags, including 'this points to a chain of buckets' */
  uint8_t he_flags;

  /* A byte for domain/application data (e.g. 'virtual fib entry' */
  uint8_t he_appval;

} cicn_hash_entry_t;

#define CICN_HASH_ENTRY_FLAGS_DEFAULT  0x00

/* This entry heads a chain of overflow buckets (we expect to see this
 * only in the last entry in a bucket.) In this case, the index is
 * to an overflow bucket rather than to a single node block.
 */
#define CICN_HASH_ENTRY_FLAG_OVERFLOW 0x01

/* This entry has been marked for deletion */
#define CICN_HASH_ENTRY_FLAG_DELETED  0x02

/* Use fast he_timeout units for expiration, slow if not */
#define CICN_HASH_ENTRY_FLAG_FAST_TIMEOUT 0x04

/*
 * hash bucket: Contains an array of entries.
 * Cache line sized/aligned, so no room for extra fields unless
 * bucket size is increased to 2 cache lines or the entry struct
 * shrinks.
 */

/*
 * Overflow bucket ratio as a fraction of the fixed/configured count;
 * a pool of hash buckets used if a row in the fixed table overflows.
 */
#define CICN_HASHTB_OVERFLOW_FRACTION 8

#define CICN_HASHTB_BUCKET_ENTRIES 8

typedef struct
{
  cicn_hash_entry_t hb_entries[CICN_HASHTB_BUCKET_ENTRIES];
} cicn_hash_bucket_t;

/* Overall target fill-factor for the hashtable */
#define CICN_HASHTB_FILL_FACTOR    4

#define CICN_HASHTB_MIN_ENTRIES  (1 << 4)	// includes dummy node 0 entry
#define CICN_HASHTB_MAX_ENTRIES  (1 << 24)

#define CICN_HASHTB_MIN_BUCKETS (1 << 10)

/*
 * htab_t
 *
 * Hash table main structure.
 *
 * Contains
 * - pointers to dynamically allocated arrays of cache-line
 *   sized/aligned structures (buckets, nodes, keys).
 * Put frequently accessed fields in the first cache line.
 */
typedef struct cicn_hashtb_s
{

  /* 8B - main array of hash buckets */
  cicn_hash_bucket_t *ht_buckets;

  /* 8B - just-in-case block of overflow buckets */
  cicn_hash_bucket_t *ht_overflow_buckets;

  /* 8B - block of nodes associated with entries in buckets */
  cicn_hash_node_t *ht_nodes;

  /*
   * 8B - just-in-case block of extra keys, used when a key is too
   *      large to fit in a node's embedded key area
   */
  cicn_hash_key_t *ht_extra_keys;

  /* Flags */
  uint32_t ht_flags;

  /* Count of buckets allocated in the main array */
  uint32_t ht_bucket_count;

  /* Count of overflow buckets allocated */
  uint32_t ht_overflow_bucket_count;
  uint32_t ht_overflow_buckets_used;

  /* Count of nodes allocated */
  uint32_t ht_node_count;
  uint32_t ht_nodes_used;

  /* Count of overflow key structs allocated */
  uint32_t ht_key_count;
  uint32_t ht_keys_used;

  /* TODO -- stats? */

} cicn_hashtb_t, *cicn_hashtb_h;

/* Offset to aligned start of additional data (PIT/CS, FIB)
 * embedded in each node.
 */
extern uint32_t ht_node_data_offset_aligned;

/* Flags for hashtable */

#define CICN_HASHTB_FLAGS_DEFAULT    0x00

/* Don't use the last/eighth entry in each bucket - only use it for overflow.
 * We use this for the FIB, currently, so that we can support in-place
 * FIB changes that would be difficult if there were hash entry copies
 * as part of overflow handling.
 */
#define CICN_HASHTB_FLAG_USE_SEVEN      0x01
#define CICN_HASHTB_FLAG_KEY_FMT_PFX    0x02
#define CICN_HASHTB_FLAG_KEY_FMT_NAME   0x04

/*
 * Max prefix name components we'll support in our incremental hashing;
 * currently used only for LPM in the FIB.
 */
#define CICN_HASHTB_MAX_NAME_COMPS CICN_PARAM_FIB_ENTRY_PFX_COMPS_MAX

/*
 * Info about an LPM hash computation on a prefix or name.
 */
typedef struct cicn_prefix_hashinf_s
{

  const uint8_t *pfx_ptr;
  uint16_t pfx_len;

  uint16_t pfx_count;		/* Number of prefix entries used */
  uint8_t pfx_overflow;		/* true if pfx has extra components (not hashed) */

  uint16_t pfx_lens[CICN_HASHTB_MAX_NAME_COMPS];
  uint64_t pfx_hashes[CICN_HASHTB_MAX_NAME_COMPS];

  uint64_t pfx_full_hash;

} cicn_prefix_hashinf_t;

/*
 * APIs and inlines
 */

/* Compute hash node index from node pointer */
static inline uint32_t
cicn_hashtb_node_idx_from_node (cicn_hashtb_h h, cicn_hash_node_t * p)
{
  return (p - h->ht_nodes);
}

/* Retrieve a hashtable node by node index */
static inline cicn_hash_node_t *
cicn_hashtb_node_from_idx (cicn_hashtb_h h, uint32_t idx)
{
  return (pool_elt_at_index (h->ht_nodes, idx));
}

/* Allocate a brand-new hashtable */
int cicn_hashtb_alloc (cicn_hashtb_h * ph, uint32_t max_elems,
		       size_t app_data_size);

/* Free a hashtable, including its embedded arrays */
int cicn_hashtb_free (cicn_hashtb_h * ph);

/* Hash a bytestring, currently using siphash64 (for UT) */
uint64_t cicn_hashtb_hash_bytestring (const uint8_t * key, uint32_t keylen);

/* Hash a name, currently using siphash64 */
uint64_t cicn_hashtb_hash_name (const uint8_t * key, uint32_t keylen);

/*
 * Hash a name, with incremental prefix hashing (for LPM, e.g.) If
 * 'limit' > 0, limit computation of incrementals.
 * if 'is_full_name', we expect that 'name' points to the beginning
 * of the entire name TLV; otherwise, 'name' just points to the first
 * name-comp sub-tlv, and we will _not_ compute the full-name hash.
 * Note that 'name' and 'namelen' are captured in the prefix-hash
 * context struct, to make the LPM/FIB apis a little cleaner.
 */
int cicn_hashtb_hash_prefixes (const uint8_t * name, uint16_t namelen,
			       int is_full_name, cicn_prefix_hashinf_t * pfx,
			       int limit);

/*
 * Prepare a hashtable node for insertion, supplying the key
 * and computed hash info. This sets up the node->key relationship, possibly
 * allocating overflow key buffers.
 */
int cicn_hashtb_init_node (cicn_hashtb_h h, cicn_hash_node_t * node,
			   uint64_t hashval,
			   const uint8_t * key, uint32_t keylen);

/*
 * Insert a node into the hashtable. We expect the caller has used the
 * init api to set the node key and hash info, and populated the extra
 * data area (if any) - or done the equivalent work itself.
 */
int cicn_hashtb_insert (cicn_hashtb_h h, cicn_hash_node_t * node);

/*
 * Basic api to lookup a specific hash+key tuple. This does the entire
 * lookup operation, retrieving node structs and comparing keys,
 * so it's not optimized for prefetching or high performance.
 *
 * Returns zero and mails back a node on success, errno otherwise.
 */
int cicn_hashtb_lookup_node (cicn_hashtb_h h, const uint8_t * key,
			     uint32_t keylen, uint64_t hashval,
			     cicn_hash_node_t ** nodep);

/*
 * Extended api to lookup a specific hash+key tuple. The implementation
 * allows the caller to locate nodes that are marked for deletion; this
 * is part of some hashtable applications, such as the FIB.
 *
 * This does the entire lookup operation, retrieving node structs
 * and comparing keys, so it's not optimized for prefetching or high performance.
 *
 * Returns zero and mails back a node on success, errno otherwise.
 */
int
cicn_hashtb_lookup_node_ex (cicn_hashtb_h h, const uint8_t * key,
			    uint32_t keylen, uint64_t hashval,
			    int include_deleted_p, cicn_hash_node_t ** nodep);

/*
 * Remove a node from a hashtable using the node itself. The internal
 * data structs are cleaned up, but the node struct itself is not: the caller
 * must free the node itself.
 */
int cicn_hashtb_remove_node (cicn_hashtb_h h, cicn_hash_node_t * node);

/*
 * Delete a node from a hashtable using the node itself, and delete/free
 * the node.  Caller's pointer is cleared on success.
 */
int cicn_hashtb_delete (cicn_hashtb_h h, cicn_hash_node_t ** pnode);

/*
 * Utility to init a new entry in a hashtable bucket/row. We use this
 * to add new a node+hash, and to clear out an entry during removal.
 */
void cicn_hashtb_init_entry (cicn_hash_entry_t * entry,
			     uint32_t nodeidx, uint64_t hashval);

/*
 * Return data area embedded in a hash node struct. We maintain an 'offset'
 * value in case the common node body struct doesn't leave the data area
 * aligned properly.
 */
static inline void *
cicn_hashtb_node_data (cicn_hash_node_t * node)
{
  return ((uint8_t *) (node) + ht_node_data_offset_aligned);
}

/*
 * Use some bits of the low half of the hash to locate a row/bucket in the table
 */
static inline uint32_t
cicn_hashtb_bucket_idx (cicn_hashtb_h h, uint64_t hashval)
{
  return ((uint32_t) (hashval & (h->ht_bucket_count - 1)));
}

/*
 * Return a hash node struct from the free list, or NULL.
 * Note that the returned struct is _not_ cleared/zeroed - init
 * is up to the caller.
 */
static inline cicn_hash_node_t *
cicn_hashtb_alloc_node (cicn_hashtb_h h)
{
  cicn_hash_node_t *p = NULL;

  if (h->ht_nodes_used < h->ht_node_count)
    {
      pool_get_aligned (h->ht_nodes, p, 8);
      h->ht_nodes_used++;
    }

  return (p);
}

/*
 * Release a hashtable node back to the free list when an entry is cleared
 */
void cicn_hashtb_free_node (cicn_hashtb_h h, cicn_hash_node_t * node);

/*
 * Walk a hashtable, iterating through the nodes, keeping context
 * in 'ctx' between calls.
 *
 * Set the context value to CICN_HASH_WALK_CTX_INITIAL to start an iteration.
 */
int cicn_hashtb_next_node (cicn_hashtb_h h, cicn_hash_node_t ** pnode,
			   uint64_t * ctx);

/*
 * Update the per-entry compression expiration value and type
 * for a hashtable node.
 */
int cicn_hashtb_entry_set_expiration (cicn_hashtb_h h,
				      cicn_hash_node_t * node,
				      uint16_t entry_timeout,
				      uint8_t entry_flags);

int cicn_hashtb_key_to_str (cicn_hashtb_h h, const cicn_hash_node_t * node,
			    char *buf, int bufsize, int must_fit);

#endif /* _CICN_HASHTB_H_ */