summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/bihash_doc.h
blob: a7e70e9695cc03ae07c23c37a486a957a9ef095b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
/*
 * Copyright (c) 2014 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

#error do not #include this file!

/** \file

    Bounded-index extensible hashing. The basic algorithm performs
    thread-safe constant-time lookups in the face of a rational number
    of hash collisions. The computed hash code h(k) must have
    reasonable statistics with respect to the key space. It won't do
    to have h(k) = 0 or 1, for all values of k.

    Each bucket in the power-of-two bucket array contains the index
    (in a private vppinfra memory heap) of the "backing store" for the
    bucket, as well as a size field. The size field (log2_pages)
    corresponds to 1, 2, 4, ... contiguous "pages" containing the
    (key,value) pairs in the bucket.

    When a single page fills, we allocate two contiguous pages.  We
    recompute h(k) for each (key,value) pair, using an additional bit
    to deal the (key, value) pairs into the "top" and "bottom" pages.

    At lookup time, we compute h(k), using lg(bucket-array-size) to
    pick the bucket. We read the bucket to find the base of the
    backing pages.  We use an additional log2_pages' worth of bits
    from h(k) to compute the offset of the page which will contain the
    (key,value) pair we're trying to find.
*/

/** template key/value backing page structure */
typedef struct clib_bihash_value
{
  union
  {

    clib_bihash_kv kvp[BIHASH_KVP_PER_PAGE]; /**< the actual key/value pairs */
    clib_bihash_value *next_free;  /**< used when a KVP page (or block thereof) is on a freelist */
  };
} clib_bihash_value_t
/** bihash bucket structure */
  typedef struct
{
  union
  {
    struct
    {
      u32 offset;  /**< backing page offset in the clib memory heap */
      u8 pad[3];   /**< log2 (size of the packing page block) */
      u8 log2_pages;
    };
    u64 as_u64;
  };
} clib_bihash_bucket_t;

/** A bounded index extensible hash table */
typedef struct
{
  clib_bihash_bucket_t *buckets;  /**< Hash bucket vector, power-of-two in size */
  volatile u32 *writer_lock;  /**< Writer lock, in its own cache line */
    BVT (clib_bihash_value) ** working_copies;
					    /**< Working copies (various sizes), to avoid locking against readers */
  clib_bihash_bucket_t saved_bucket; /**< Saved bucket pointer */
  u32 nbuckets;			     /**< Number of hash buckets */
  u32 log2_nbuckets;		     /**< lg(nbuckets) */
  u8 *name;			     /**< hash table name */
    BVT (clib_bihash_value) ** freelists;
				      /**< power of two freelist vector */
  uword alloc_arena;		      /**< memory allocation arena  */
  uword alloc_arena_next;	      /**< first available mem chunk */
  uword alloc_arena_size;	      /**< size of the arena */
} clib_bihash_t;

/** Get pointer to value page given its clib mheap offset */
static inline void *clib_bihash_get_value (clib_bihash * h, uword offset);

/** Get clib mheap offset given a pointer */
static inline uword clib_bihash_get_offset (clib_bihash * h, void *v);

/** initialize a bounded index extensible hash table

    @param h - the bi-hash table to initialize
    @param name - name of the hash table
    @param nbuckets - the number of buckets, will be rounded up to
a power of two
    @param memory_size - clib mheap size, in bytes
*/

void clib_bihash_init
  (clib_bihash * h, char *name, u32 nbuckets, uword memory_size);

/** Destroy a bounded index extensible hash table
    @param h - the bi-hash table to free
*/

void clib_bihash_free (clib_bihash * h);

/** Add or delete a (key,value) pair from a bi-hash table

    @param h - the bi-hash table to search
    @param add_v - the (key,value) pair to add
    @param is_add - add=1, delete=0
    @returns 0 on success, < 0 on error
    @note This function will replace an existing (key,value) pair if the
    new key matches an existing key
*/
int clib_bihash_add_del (clib_bihash * h, clib_bihash_kv * add_v, int is_add);


/** Search a bi-hash table, use supplied hash code

    @param h - the bi-hash table to search
    @param hash - the hash code
    @param in_out_kv - (key,value) pair containing the search key
    @returns 0 on success (with in_out_kv set), < 0 on error
*/
int clib_bihash_search_inline_with_hash
  (clib_bihash * h, u64 hash, clib_bihash_kv * in_out_kv);

/** Search a bi-hash table

    @param h - the bi-hash table to search
    @param in_out_kv - (key,value) pair containing the search key
    @returns 0 on success (with in_out_kv set), < 0 on error
*/
int clib_bihash_search_inline (clib_bihash * h, clib_bihash_kv * in_out_kv);

/** Prefetch a bi-hash bucket given a hash code

    @param h - the bi-hash table to search
    @param hash - the hash code
    @note see also clib_bihash_hash to compute the code
*/
void clib_bihash_prefetch_bucket (clib_bihash * h, u64 hash);

/** Prefetch bi-hash (key,value) data given a hash code

    @param h - the bi-hash table to search
    @param hash - the hash code
    @note assumes that the bucket has been prefetched, see
     clib_bihash_prefetch_bucket
*/
void clib_bihash_prefetch_data (clib_bihash * h, u64 hash);

/** Search a bi-hash table

    @param h - the bi-hash table to search
    @param search_key - (key,value) pair containing the search key
    @param valuep - (key,value) set to search result
    @returns 0 on success (with valuep set), < 0 on error
    @note used in situations where key modification is not desired
*/
int clib_bihash_search_inline_2
  (clib_bihash * h, clib_bihash_kv * search_key, clib_bihash_kv * valuep);

/** Visit active (key,value) pairs in a bi-hash table

    @param h - the bi-hash table to search
    @param callback - function to call with each active (key,value) pair
    @param arg - arbitrary second argument passed to the callback function
    First argument is the (key,value) pair to visit
    @note Trying to supply a proper function prototype for the
    callback function appears to be a fool's errand.
*/
void clib_bihash_foreach_key_value_pair (clib_bihash * h,
					 void *callback, void *arg);

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
n> #include <vlibmemory/vl_memory_api_h.h> #undef vl_endianfun /* instantiate all the print functions we know about */ #define vl_print(handle, ...) clib_warning (__VA_ARGS__) #define vl_printfun #include <vlibmemory/vl_memory_api_h.h> #undef vl_printfun typedef struct { u8 rx_thread_jmpbuf_valid; u8 connected_to_vlib; jmp_buf rx_thread_jmpbuf; pthread_t rx_thread_handle; /* Plugin message base lookup scheme */ volatile u8 first_msg_id_reply_ready; u16 first_msg_id_reply; } memory_client_main_t; memory_client_main_t memory_client_main; static void * rx_thread_fn (void *arg) { unix_shared_memory_queue_t *q; memory_client_main_t *mm = &memory_client_main; api_main_t *am = &api_main; q = am->vl_input_queue; /* So we can make the rx thread terminate cleanly */ if (setjmp (mm->rx_thread_jmpbuf) == 0) { mm->rx_thread_jmpbuf_valid = 1; while (1) { vl_msg_api_queue_handler (q); } } pthread_exit (0); } static void vl_api_rx_thread_exit_t_handler (vl_api_rx_thread_exit_t * mp) { memory_client_main_t *mm = &memory_client_main; vl_msg_api_free (mp); longjmp (mm->rx_thread_jmpbuf, 1); } static void vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp) { serialize_main_t _sm, *sm = &_sm; api_main_t *am = &api_main; u8 *tblv; u32 nmsgs; int i; u8 *name_and_crc; u32 msg_index; am->my_client_index = mp->index; am->my_registration = (vl_api_registration_t *) (uword) mp->handle; /* Clean out any previous hash table (unlikely) */ if (am->msg_index_by_name_and_crc) { int i; u8 **keys = 0; hash_pair_t *hp; /* *INDENT-OFF* */ hash_foreach_pair (hp, am->msg_index_by_name_and_crc, ({ vec_add1 (keys, (u8 *) hp->key); })); /* *INDENT-ON* */ for (i = 0; i < vec_len (keys); i++) vec_free (keys[i]); vec_free (keys); } am->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword)); /* Recreate the vnet-side API message handler table */ tblv = uword_to_pointer (mp->message_table, u8 *); serialize_open_vector (sm, tblv); unserialize_integer (sm, &nmsgs, sizeof (u32)); for (i = 0; i < nmsgs; i++) { msg_index = unserialize_likely_small_unsigned_integer (sm); unserialize_cstring (sm, (char **) &name_and_crc); hash_set_mem (am->msg_index_by_name_and_crc, name_and_crc, msg_index); } } static void noop_handler (void *notused) { } int vl_client_connect (const char *name, int ctx_quota, int input_queue_size) { svm_region_t *svm; vl_api_memclnt_create_t *mp; vl_api_memclnt_create_reply_t *rp; unix_shared_memory_queue_t *vl_input_queue; vl_shmem_hdr_t *shmem_hdr; int rv = 0; void *oldheap; api_main_t *am = &api_main; if (am->my_registration) { clib_warning ("client %s already connected...", name); return -1; } if (am->vlib_rp == 0) { clib_warning ("am->vlib_rp NULL"); return -1; } svm = am->vlib_rp; shmem_hdr = am->shmem_hdr; if (shmem_hdr == 0 || shmem_hdr->vl_input_queue == 0) { clib_warning ("shmem_hdr / input queue NULL"); return -1; } pthread_mutex_lock (&svm->mutex); oldheap = svm_push_data_heap (svm); vl_input_queue = unix_shared_memory_queue_init (input_queue_size, sizeof (uword), getpid (), 0); pthread_mutex_unlock (&svm->mutex); svm_pop_heap (oldheap); am->my_client_index = ~0; am->my_registration = 0; am->vl_input_queue = vl_input_queue; mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_create_t)); memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE); mp->ctx_quota = ctx_quota; mp->input_queue = (uword) vl_input_queue; strncpy ((char *) mp->name, name, sizeof (mp->name) - 1); vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp); while (1) { int qstatus; struct timespec ts, tsrem; int i; /* Wait up to 10 seconds */ for (i = 0; i < 1000; i++) { qstatus = unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp, 1 /* nowait */ ); if (qstatus == 0) goto read_one_msg; ts.tv_sec = 0; ts.tv_nsec = 10000 * 1000; /* 10 ms */ while (nanosleep (&ts, &tsrem) < 0) ts = tsrem; } /* Timeout... */ clib_warning ("memclnt_create_reply timeout"); return -1; read_one_msg: if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_CREATE_REPLY) { clib_warning ("unexpected reply: id %d", ntohs (rp->_vl_msg_id)); continue; } rv = clib_net_to_host_u32 (rp->response); vl_msg_api_handler ((void *) rp); break; } return (rv); } static void vl_api_memclnt_delete_reply_t_handler (vl_api_memclnt_delete_reply_t * mp) { void *oldheap; api_main_t *am = &api_main; pthread_mutex_lock (&am->vlib_rp->mutex); oldheap = svm_push_data_heap (am->vlib_rp); unix_shared_memory_queue_free (am->vl_input_queue); pthread_mutex_unlock (&am->vlib_rp->mutex); svm_pop_heap (oldheap); am->my_client_index = ~0; am->my_registration = 0; am->vl_input_queue = 0; } void vl_client_disconnect (void) { vl_api_memclnt_delete_t *mp; vl_api_memclnt_delete_reply_t *rp; unix_shared_memory_queue_t *vl_input_queue; vl_shmem_hdr_t *shmem_hdr; time_t begin; api_main_t *am = &api_main; ASSERT (am->vlib_rp); shmem_hdr = am->shmem_hdr; ASSERT (shmem_hdr && shmem_hdr->vl_input_queue); vl_input_queue = am->vl_input_queue; mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_delete_t)); memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE); mp->index = am->my_client_index; mp->handle = (uword) am->my_registration; vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp); /* * Have to be careful here, in case the client is disconnecting * because e.g. the vlib process died, or is unresponsive. */ begin = time (0); while (1) { time_t now; now = time (0); if (now >= (begin + 2)) { clib_warning ("peer unresponsive, give up"); am->my_client_index = ~0; am->my_registration = 0; am->shmem_hdr = 0; break; } if (unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp, 1) < 0) continue; /* drain the queue */ if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY) { vl_msg_api_handler ((void *) rp); continue; } vl_msg_api_handler ((void *) rp); break; } } #define foreach_api_msg \ _(RX_THREAD_EXIT, rx_thread_exit) \ _(MEMCLNT_CREATE_REPLY, memclnt_create_reply) \ _(MEMCLNT_DELETE_REPLY, memclnt_delete_reply) int vl_client_api_map (const char *region_name) { int rv; if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0) { return rv; } #define _(N,n) \ vl_msg_api_set_handlers(VL_API_##N, #n, \ vl_api_##n##_t_handler, \ noop_handler, \ vl_api_##n##_t_endian, \ vl_api_##n##_t_print, \ sizeof(vl_api_##n##_t), 1); foreach_api_msg; #undef _ return 0; } void vl_client_api_unmap (void) { vl_unmap_shmem (); } static int connect_to_vlib_internal (const char *svm_name, const char *client_name, int rx_queue_size, int want_pthread) { int rv = 0; memory_client_main_t *mm = &memory_client_main; if ((rv = vl_client_api_map (svm_name))) { clib_warning ("vl_client_api map rv %d", rv); return rv; } if (vl_client_connect (client_name, 0 /* punt quota */ , rx_queue_size /* input queue */ ) < 0) { vl_client_api_unmap (); return -1; } /* Start the rx queue thread */ if (want_pthread) { rv = pthread_create (&mm->rx_thread_handle, NULL /*attr */ , rx_thread_fn, 0); if (rv) clib_warning ("pthread_create returned %d", rv); } mm->connected_to_vlib = 1; return 0; } int vl_client_connect_to_vlib (const char *svm_name, const char *client_name, int rx_queue_size) { return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, 1 /* want pthread */ ); } int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name, const char *client_name, int rx_queue_size) { return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, 0 /* want pthread */ ); } void vl_client_disconnect_from_vlib (void) { memory_client_main_t *mm = &memory_client_main; api_main_t *am = &api_main; uword junk; if (mm->rx_thread_jmpbuf_valid) { vl_api_rx_thread_exit_t *ep; ep = vl_msg_api_alloc (sizeof (*ep)); ep->_vl_msg_id = ntohs (VL_API_RX_THREAD_EXIT); vl_msg_api_send_shmem (am->vl_input_queue, (u8 *) & ep); pthread_join (mm->rx_thread_handle, (void **) &junk); } if (mm->connected_to_vlib) { vl_client_disconnect (); vl_client_api_unmap (); } memset (mm, 0, sizeof (*mm)); } static void vl_api_get_first_msg_id_reply_t_handler (vl_api_get_first_msg_id_reply_t * mp) { memory_client_main_t *mm = &memory_client_main; i32 retval = ntohl (mp->retval); mm->first_msg_id_reply = (retval >= 0) ? ntohs (mp->first_msg_id) : ~0; mm->first_msg_id_reply_ready = 1; } u16 vl_client_get_first_plugin_msg_id (const char *plugin_name) { vl_api_get_first_msg_id_t *mp; api_main_t *am = &api_main; memory_client_main_t *mm = &memory_client_main; f64 timeout; void *old_handler; clib_time_t clib_time; u16 rv = ~0; if (strlen (plugin_name) + 1 > sizeof (mp->name)) return (rv); memset (&clib_time, 0, sizeof (clib_time)); clib_time_init (&clib_time); /* Push this plugin's first_msg_id_reply handler */ old_handler = am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY]; am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = (void *) vl_api_get_first_msg_id_reply_t_handler; /* Ask the data-plane for the message-ID base of the indicated plugin */ mm->first_msg_id_reply_ready = 0; mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_GET_FIRST_MSG_ID); mp->client_index = am->my_client_index; strncpy ((char *) mp->name, plugin_name, sizeof (mp->name) - 1); vl_msg_api_send_shmem (am->shmem_hdr->vl_input_queue, (u8 *) & mp); /* Synchronously wait for the answer */ do { timeout = clib_time_now (&clib_time) + 1.0; while (clib_time_now (&clib_time) < timeout) { if (mm->first_msg_id_reply_ready == 1) { rv = mm->first_msg_id_reply; goto result; } } /* Restore old handler */ am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = old_handler; return rv; } while (0); result: /* Restore the old handler */ am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = old_handler; if (rv == (u16) ~ 0) clib_warning ("plugin '%s' not registered", plugin_name); return rv; } void vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) { clib_warning ("STUB called..."); } /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */